|
- import jittor as jt
- import jclip as clip
- import jittor.nn as nn
- import os
- from PIL import Image
- from tqdm import tqdm
- import argparse
- import random
- import pandas as pd
- import numpy as np
- from finetune.Tipadapter import Tip_adapter
- from finetune.cache_module import cache_module
- from sklearn.linear_model import LogisticRegression
- from utils import generate_prompt, normalize_tensor, count_parameters_in_mb, get_val_text_features, get_date_format
- from sklearn.multiclass import OneVsRestClassifier
-
-
- random.seed(20)
- jt.misc.set_global_seed(20)
-
- def compute_acc_TestSetZ(root_TrainSet, pkl_path, FD_Align_pkl_path, Tip_Adapter_F_pkl_path, cross_modal_pkl_path, classes_path , TestData_path , TestSetZ_label_path, ALPHA = 2.4, BETA = 0.7 ,method_name = None):
-
- """
- root_TrainSet : 训练集的根目录
- pkl_path : clip模型权重的路径
- FD_Align_pkl_path : FD_Align方法训练得到的权重路径
- Tip_Adapter_F_pkl_path : Tip-Adapter-F方法训练得到的权重路径
- cross_modal_pkl_path : cross_modal_adapter方法训练得到的权重路径
- classes_path : classes.txt文件的路径
- TestData_path : 测试数据集的路径 (TestSetZ or TestSetA or TestSetB)
- TestSetZ_label_path : TestSetZ-label.txt文件路径
- method_name : 使用的微调方法,默认为None,其余可选择的有 ['Tip-Adapter', 'Tip-Adapter-F', 'Linear_Probe', 'cross_modal_Adapter', 'cross_modal+tip_adapter', 'FD-Align', 'WiSE-FT', 'fusion', 'fusion_2']
- ALPHA , BETA : Tip-Adapter微调方法中的超参数,默认为(2.4,0.7)
- """
-
- model, preprocess = clip.load(pkl_path) # 加载训练过的clip模型
-
- text_features = get_val_text_features(classes_path, model=model) # 类别的文本特征
-
- num = 0 # 预测正确的图片数量
-
- class_4_path= f'{root_TrainSet}/train_4class.txt'
- df_label = pd.read_csv(TestSetZ_label_path, delimiter = '\t',encoding ='utf-8',header = None) # TestSetZ 的标签,用于计算准确率
- df_result = pd.DataFrame(columns=['img_name', 'pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']) # 用于保存在TestSetA上预测的结果
- result_name = f'../results/result{get_date_format()}.txt' # 保存结果的文件名
-
- print(f'Test Dataset is: {TestData_path[-9:-1]}')
- imgs = os.listdir(TestData_path)
-
- def compute_probs(path_of_img :str, model = model, preprocess = preprocess):
- '''
- path_of_img : 输入的图片路径
- return : 两个返回值:经过CLIP模型输出的预测概率 以及 经过image_encoder输出的图片特征
- '''
- image = Image.open(path_of_img).convert('RGB')
- image = preprocess(image).unsqueeze(0)
- # image = Image_Transform(image, clip_init).choose_best_img().unsqueeze(0)
- image_features = model.encode_image(image)
- image_features /= image_features.norm(dim=-1, keepdim=True)
-
- return image_features
-
- # 获取所有测试图像的特征
- def get_test_fea(model=model, preprocess=preprocess):
- test_features = []
- print('loading test data...')
- with jt.no_grad():
- for img in tqdm(imgs):
- image_features = compute_probs(TestData_path + img, model, preprocess)
- test_features.append(image_features)
- test_features = jt.cat(test_features)
- return test_features
-
- # 获取所有测试图像的标签
- def get_test_label():
- test_label = df_label.set_index(0).reindex(imgs)[1].tolist()
- return test_label
-
- test_label = get_test_label()
-
- #------------------------------不使用其他微调方法,直接测试------------------------------
- if method_name == None:
-
- def test(img_fea):
- '''
- img_fea : 图像特征
- return : 返回预测的Top5的类别
- '''
- test_probs = (100.0 * img_fea @ text_features.transpose(0, 1)).softmax(dim=-1)
- _, top_labels = test_probs.topk(5) # top5 predictions
- return top_labels
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
- top5_result = test(get_test_fea())
-
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False) # 转成txt结果文件
- return num
-
- else:
- top5_result = test(get_test_fea())
- top1_result = top5_result[:,0]
- num = jt.sum(jt.equal(test_label, top1_result))
- print(f"共{len(df_label)}张图片,预测完成!")
- return num
-
- #------------------------------调用其他finetune方法------------------------------
- #———————————— Tip-Adapter ————————————#
- elif method_name == 'Tip-Adapter':
-
- print(f'Utilized the {method_name} method!')
- cached_keys , cached_values= Tip_adapter(root_TrainSet, class_4_path, classes_path , model , preprocess)
-
- def tip_test(test_features, alpha=ALPHA, beta=BETA):
- cache_logits = ((-1) * (beta - beta* test_features @ cached_keys.t())).exp() @ cached_values # [3000,374]
- cache_logits = normalize_tensor(cache_logits)
- # cache_logits /= cache_logits.norm(dim=1, keepdim=True)
-
- text_probs = (100.0 * test_features @ text_features.transpose(0, 1)).softmax(dim=-1)
- logits = alpha * cache_logits + text_probs
- # logits = alpha * cache_logits * text_probs
-
- _, top_labels = logits.topk(5)
- return top_labels # 返回预测Top5的类别 [3000,5]
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
- test_features = get_test_fea()
- top5_result = tip_test(test_features, alpha=2.0, beta=3.0)
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False) # 转成txt结果文件
- return num
-
- else:
- best_acc = 0
- ALPHA = [0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0,2.2,2.4,2.6,2.8,3.0,3.2,3.4,3.6,3.8,4.0,4.2,4.4,4.6,4.8,5.0,5.2,5.4,5.6,5.8,6.0,6.2,6.4,6.6]
- BETA = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9,5.0,5.1]
- test_features = get_test_fea()
- for alpha in ALPHA:
- for beta in BETA:
- top5_result = tip_test(test_features, alpha, beta)
- top1_result = top5_result[:,0]
- num = jt.sum(jt.equal(test_label, top1_result))
- acc = num/3000
- if round(float(acc), 4) > best_acc:
- best_acc = round(float(acc), 4)
- ALPHA_best = alpha
- BETA_best = beta
- print(f'The best acc is {best_acc}, alpha is {ALPHA_best}, beta is {BETA_best}')
- return num
-
- #———————————— Tip-Adapter-F:经过训练的模型 ————————————#
- elif method_name == 'Tip-Adapter-F':
-
- print(f'Utilized the {method_name} method!')
- cached_keys , cached_values= Tip_adapter(root_TrainSet, class_4_path , classes_path , model , preprocess)
-
- # 加载训练的Tip-adapater-F
- adapter = nn.Linear(cached_keys.shape[1], cached_keys.shape[0], bias=False)
- adapter.load_state_dict(jt.load(Tip_Adapter_F_pkl_path))
-
- def tip_F_test(test_features, alpha=0.4, beta=5.1):
- affinity = adapter(test_features)
- cache_logits = ((-1) * (beta - beta* affinity)).exp() @ cached_values # [3000,374]
- # cache_logits = normalize_tensor(cache_logits)
- # cache_logits /= cache_logits.norm(dim=1, keepdim=True)
-
- text_probs = (100.0 * test_features @ text_features.transpose(0, 1)).softmax(dim=-1)
- logits = alpha * cache_logits + text_probs
- # logits = alpha * cache_logits * text_probs
-
- _, top_labels = logits.topk(5)
- return top_labels # 返回预测Top5的类别 [3000,5]
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
- test_features = get_test_fea()
- top5_result = tip_F_test(test_features, alpha=0.22, beta=4.5)
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False) # 转成txt结果文件
- return num
-
- else:
-
- best_acc = 0
- ALPHA = [0.1,0.2,0.22,0.24,0.26,0.28,0.3,0.32,0.34,0.36,0.38,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0,2.2,2.4,2.6,2.8,3.0,3.2,3.4,3.6,3.8,4.0,4.2,4.4,4.6,4.8,5.0,5.2,5.4,5.6,5.8,6.0,6.2,6.4,6.6]
- BETA = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9,5.0,5.1]
- test_features = get_test_fea()
- for alpha in ALPHA:
- for beta in BETA:
-
- top5_result = tip_F_test(test_features, alpha, beta)
- top1_result = top5_result[:,0]
- test_label = get_test_label()
- num = jt.sum(jt.equal(test_label, top1_result))
- acc = num/3000
- if round(float(acc), 4) > best_acc:
- best_acc = round(float(acc), 4)
- ALPHA_best = alpha
- BETA_best = beta
- print(f'The best acc is {best_acc}, alpha is {ALPHA_best}, beta is {BETA_best}')
- return num
-
-
- #———————————— Linear Probe:线性分类头,冻住主干,仅微调一个线性分类器 ————————————#
- # 📌在TestSetZ上的效果不好,所以并未在TestSetA or B 上进行测试
- elif method_name == 'Linear_Probe':
-
- print(f'Utilized the {method_name} method!')
-
- # 注意:因为要简单训练一下分类器,正好导入tip_adapter中的缓存模型,而不是使用Tip-adapter方法
- train_features , train_labels = Tip_adapter(root_TrainSet, class_4_path , classes_path , model , preprocess)
- train_features = train_features.numpy()
- train_labels = jt.argmax(train_labels, dim=1)[0].numpy().astype('float32')
-
- if 'TestSetA' in TestData_path:
- ...
- else:
- # 训练分类器
- classifier = LogisticRegression(random_state=0,
- C=8.960,
- max_iter=3000,
- verbose=1)
- classifier.fit(train_features, train_labels)
-
- # 加载所有测试数据
- test_features = [] # 存储所有的测试图片特征
- test_label = [] # 存储所有的测试图片标签
- print('loading test data...')
- with jt.no_grad():
- for img in tqdm(imgs):
- image_features = compute_probs(TestData_path + img)
- test_features.append(image_features)
- test_label.append(int(df_label.loc[df_label[0] == img, 1].values[0]))
-
- test_features = jt.cat(test_features).numpy()
-
- print('start predicting...')
- predictions = classifier.predict_proba(test_features)
- for prediction , label in zip(predictions , test_label):
- prediction = np.asarray(prediction)
- top5_idx = prediction.argsort()[-1:-6:-1] # 取前5个预测结果
- output = top5_idx[0]
- if output == label:
- num += 1
- return num
-
- elif method_name == 'cross_modal_Adapter':
-
- print(f'Utilized the {method_name} method!')
- from finetune.Cross_modal_Adapter import logitHead
-
- model.add_module('cross_logit', logitHead(get_val_text_features(classes_path, model)))
- model.load_state_dict(jt.load(pkl_path))
-
- model1, preprocess1 = clip.load(cross_modal_pkl_path)
-
- test_features_clip = get_test_fea(model1, preprocess=preprocess1)
- text_features = get_val_text_features(classes_path, model=model1) # 类别的文本特征
-
- def cross_modal_Adapter_test(test_features, alpha=ALPHA, beta=BETA):
-
- logits1 =(100.0*model.cross_logit(test_features)).softmax(dim=-1)
- logits2 = (100.0 * test_features_clip @ text_features.transpose(0, 1)).softmax(dim=-1)
-
- logits = alpha * logits2 + logits1*beta
-
- _, top_labels = logits.topk(5)
- return top_labels # 返回预测Top5的类别
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
- test_features = get_test_fea()
- top5_result = cross_modal_Adapter_test(test_features, alpha=ALPHA, beta=BETA)
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False) # 转成txt结果文件
- return num
-
- else:
-
- best_acc = 0
- ALPHA = [0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0,2.2,2.4,2.6,2.8,3.0,3.2,3.4,3.6,3.8,4.0,4.2,4.4,4.6,4.8,5.0,5.2,5.4,5.6,5.8,6.0,6.2,6.4,6.6]
- BETA = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9,5.0,5.1]
- test_features = get_test_fea()
- for alpha in ALPHA:
- for beta in BETA:
-
- top5_result = cross_modal_Adapter_test(test_features, alpha, beta)
- top1_result = top5_result[:,0]
- num = jt.sum(jt.equal(test_label, top1_result))
- acc = num/3000
- if round(float(acc), 4) > best_acc:
- best_acc = round(float(acc), 4)
- ALPHA_best = alpha
- BETA_best = beta
- print(f'The best acc is {best_acc}, alpha is {ALPHA_best}, beta is {BETA_best}')
- return num
-
- #———————————— cross_modal+tip_adapter : 两种微调方法的结合,即跨模态训练+缓存模型微调————————————#
- elif method_name == 'cross_modal+tip_adapter':
-
- print(f'Utilized the {method_name} method!')
-
- train_img_features, train_txt_features, train_labels = cache_module(root_TrainSet, class_4_path , classes_path , model , preprocess)
- train_img_features, train_txt_features = train_img_features.numpy(), train_txt_features.numpy()
- # train_labels = jt.argmax(train_labels, dim=1)[0].numpy().astype('float32')
- train_features = np.concatenate((train_img_features, train_txt_features), axis=0)
- train_labels = np.concatenate((train_labels, train_labels), axis=0)
- # 训练分类器
- classifier = OneVsRestClassifier(LogisticRegression(random_state=0,
- C=8.960,
- max_iter=6000,
- verbose=1))
- classifier.fit(train_features, train_labels)
-
- # 加载所有测试数据
- print('loading test data...')
- test_features = get_test_fea().numpy()
-
- def cmta_test(alpha=0.5, beta=0.1): # The best acc is 0.7547, alpha is 1.4, beta is 0.9, gamma is 0.1
- # print('start predicting...')
- predictions = classifier.predict_proba(test_features)
- pre_similitys = (100.0 * jt.array(test_features) @ text_features.transpose(0, 1)).softmax(dim=-1)
-
- x = predictions
- y = pre_similitys.numpy()
-
- logits = alpha*x + beta*y
- # logits = 0.8*x* 0.23*y
- top5_result = np.flip(np.argsort(logits, axis=1)[:, -5:], axis=1) # np.array():包含整个数据集top5的预测结果
- return top5_result
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
-
- top5_result = cmta_test(alpha=7.0, beta=1.2)
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result['img_name'] = imgs
- df_result.to_csv(result_name, sep=' ', index=False, header=False)
-
- else:
- best_acc = 0
- ALPHA = [0.2,0.3,0.4,0.5,0.6,0.7,0.8,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2.0,2.2,2.4,2.6,2.8,3.0,3.2,3.4,3.6,3.8,4.0,4.2,4.4,4.6,4.8,5.0,5.2,5.4,5.6,5.8,6.0,6.2,6.4,6.6,6.8,7.0,7.2,7.4,7.6,7.8,8.0,8.2,8.4,8.6,8.8,9.0,9.2,9.4,9.6,9.8,10.0,10.2,10.4,10.6,10.8,11.0,11.2,11.4,11.6,11.8,12.0,12.2,12.4,12.6,12.8,13.0,13.2,13.4,13.6,13.8,14.0,14.2,14.4,14.6,14.8,15.0,15.2]
- BETA = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9,5.0]
- for alpha in ALPHA:
- for beta in BETA:
- top5_result = cmta_test(alpha, beta)
- top1_result = top5_result[:,0]
- num = jt.sum(jt.equal(np.asarray(test_label), top1_result))
- acc = num/3000
- if round(float(acc), 4) > best_acc:
- best_acc = round(float(acc), 4)
- ALPHA_best = alpha
- BETA_best = beta
- print(f'The best acc is {best_acc}, alpha is {ALPHA_best}, beta is {BETA_best}')
-
- # top1_result = cmta_test()[:, 0]
- # num = jt.sum(jt.equal(np.asarray(test_label), top1_result))
- return num
-
-
- elif method_name == 'FD-Align':
-
- print(f'Utilized the {method_name} method!')
- from finetune.FD_Align import Prototype
- class_prototype, prompt_prototype= Prototype(classes_path)
-
- def FD_Align_test(img_fea):
- '''
- img_fea : 测试图像特征
- return : 返回预测的Top5类别
- '''
- text_probs = (100.0 * img_fea @ class_prototype.transpose(0, 1)).softmax(dim=-1)
- _, top_labels = text_probs.topk(5)
- return top_labels
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
- top5_result = FD_Align_test(get_test_fea())
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False)
- return num
- else:
- top5_result = FD_Align_test(get_test_fea())
- top1_result = top5_result[:,0]
- num = jt.sum(jt.equal(test_label, top1_result))
- print(f"共{len(df_label)}张图片,预测完成!")
- return num
-
- elif method_name == 'WiSE-FT':
-
- print(f'Utilized the {method_name} method!')
- from jclip.model import build_model
- from jclip.clip import _transform
-
- def wise_test(alpha):
-
- added_dict = { key: alpha* weights_1.get(key, 0) + (1-alpha) * weights_2.get(key, 0) for key in set(weights_1) | set(weights_2)}
- model_12 = build_model(added_dict)
- preprocess_12 = _transform(model_12.visual.input_resolution)
- model_12.eval()
- text_features = get_val_text_features(classes_path, model_12)
- image_features= get_test_fea(model=model_12, preprocess=preprocess_12)
- test_probs = (100.0 * image_features @ text_features.transpose(0, 1)).softmax(dim=-1)
- _, top_labels = test_probs.topk(5) # top5 predictions
- return top_labels
-
-
- model_1, preprocess_1 = clip.load(pkl_path)
- model_2, preprocess_2 = clip.load('another_pkl_path')
-
- weights_1 = model_1.state_dict()
- weights_2 = model_2.state_dict()
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
- top5_result = wise_test(0.2)
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False)
-
- else:
- best_acc = 0
- best_alpha = 0
- for ALPHA in np.arange(0.1, 1, 0.01):
- top5_result = wise_test(ALPHA)
- top1_result = top5_result[: ,0]
- num = jt.sum(jt.equal(test_label, top1_result))
- acc = num / 3000
- if round(float(acc), 4) > best_acc:
- best_acc = round(float(acc), 4)
- best_alpha = ALPHA
- print(f'The best acc is {best_acc} ; best x is : {best_alpha}')
- print(f"共{len(df_label)}张图片,预测完成!")
- print("Best alpha is : ", best_alpha)
- return num
-
-
-
-
-
- # 训练的clip模型 + Tip-adapter-F + FD-Align + cross_modal_adapter
- elif method_name == 'fusion':
- print(f'Utilized the {method_name} method!')
-
- from finetune.FD_Align import Prototype
- class_prototype, prompt_prototype= Prototype(classes_path)
-
- def get_logits_FD_Align(pkl_FD):
-
- model_FD_Align, preprocess_FD_Align = clip.load(pkl_FD)
- test_features = get_test_fea(model_FD_Align, preprocess_FD_Align)
-
- text_probs = (100.0 * test_features @ class_prototype.transpose(0, 1)).softmax(dim=-1)
- return text_probs, count_parameters_in_mb(model_FD_Align)
-
- def get_logits_Tip_Adapter_F(pkl_TAF, alpha, beta):
- cached_keys , cached_values= Tip_adapter(root_TrainSet, class_4_path , classes_path , model , preprocess)
-
- # 加载训练的Tip-adapater-F
- adapter = nn.Linear(cached_keys.shape[1], cached_keys.shape[0], bias=False)
- adapter.load_state_dict(jt.load(pkl_TAF))
-
- test_features = get_test_fea()
- affinity = adapter(test_features)
- cache_logits = ((-1) * (beta - beta* affinity)).exp() @ cached_values
-
- text_probs = (100.0 * test_features @ text_features.transpose(0, 1)).softmax(dim=-1)
- logits = alpha * cache_logits + text_probs
- # logits = cache_logits
- return logits, count_parameters_in_mb(adapter)
-
- def get_logits_cross_modal(pkl_cross):
- from finetune.Cross_modal_Adapter import logitHead
-
- model.add_module('cross_logit', logitHead(get_val_text_features(classes_path, model)))
- model.load_state_dict(jt.load(pkl_cross))
-
- test_features = get_test_fea(model)
- text_probs = (100.0 * model.cross_logit(test_features)).softmax(dim=-1)
- return text_probs, count_parameters_in_mb(model)
-
-
- def get_logits_clip_train(pkl_clip):
- img_fea = get_test_fea(model, preprocess)
- test_probs = (100.0 * img_fea @ text_features.transpose(0, 1)).softmax(dim=-1)
- return test_probs, count_parameters_in_mb(model)
-
- logits_FD_Align, parameters_0 = get_logits_FD_Align(FD_Align_pkl_path)
- logits_Tip_Adapter_F, parameters_1 = get_logits_Tip_Adapter_F(Tip_Adapter_F_pkl_path, alpha=1.4, beta=1.3)
- logits_clip, parameters_2 = get_logits_clip_train(pkl_path)
- logits_cross_modal, parameters_3 = get_logits_cross_modal(cross_modal_pkl_path)
-
- total_parameters = parameters_0 + parameters_1 + parameters_2 + parameters_3
- print(f'The total number of parameters used by the model is: {total_parameters:.2f} Mb')
-
- def fusion_test(logits_FD_Align, logits_Tip_Adapter_F, logits_cross_modal, X, Y, Z):
- logits = X * logits_Tip_Adapter_F + Y * logits_FD_Align + Z * logits_cross_modal
- _, top_labels = logits.topk(5)
- return top_labels
-
- if 'TestSetA' in TestData_path or 'TestSetB' in TestData_path:
- top5_result = fusion_test(logits_FD_Align, logits_Tip_Adapter_F, logits_cross_modal, X=0.04, Y=0.53, Z=0.43)
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False)
- else:
- best_acc = 0
- X_list = np.arange(0.00, 1.00, 0.02)
- for X in X_list:
- for Y in np.arange(0.00, 1-X, 0.02):
- Z = round(1-X-Y, 2)
- top1_result = fusion_test(logits_FD_Align, logits_clip, logits_cross_modal, X=X, Y=Y, Z=Z)[:,0]
- num = jt.sum(jt.equal(test_label, top1_result))
- acc = num / 3000
- if round(float(acc), 4) > best_acc:
- best_acc = round(float(acc), 4)
- print(f'The best acc is {best_acc} ; best x is : {X}, best y is : {Y}, best z is : {Z}')
- return num
-
-
-
- # 训练和的clip模型 + 使用Tip-adapter-F 方法微调的clip模型
- elif method_name == 'fusion_2':
-
- print(f'Utilized the {method_name} method!')
-
- test_features = get_test_fea()
-
- def get_clip_result():
- text_probs = (100.0 * test_features @ text_features.transpose(0, 1)).softmax(dim=-1)
- top5_result = text_probs.topk(5)[1]
- return top5_result, count_parameters_in_mb(model)
-
- def get_Tip_adapater_F_result(pkl_TAF, alpha, beta):
- cached_keys , cached_values= Tip_adapter(root_TrainSet, class_4_path , classes_path , model , preprocess)
-
- # 加载训练的Tip-adapater-F
- adapter = nn.Linear(cached_keys.shape[1], cached_keys.shape[0], bias=False)
- adapter.load_state_dict(jt.load(pkl_TAF))
-
- affinity = adapter(test_features)
- cache_logits = ((-1) * (beta - beta* affinity)).exp() @ cached_values
-
- text_probs = (100.0 * test_features @ text_features.transpose(0, 1)).softmax(dim=-1)
- logits = alpha * cache_logits + text_probs
-
- top5_result = logits.topk(5)[1]
- return top5_result, count_parameters_in_mb(adapter)
-
- def result_fusion(top5_result_clip, top5_result_TAF):
-
- '''
- 对于top5_result_clip预测的结果,即训练后的clip模型预测的结果:
- 如果预测的类别在训练集上未出现过,则保留;反之,则替换成使用了Tip-adapter-F方法预测的结果
- '''
- index = np.where(top5_result_clip < 374)
- top5_result_clip[index] = top5_result_TAF[index]
- return top5_result_clip
-
- top5_result_clip, parameters_1 = get_clip_result()
- top5_result_TAF, parameters_2 = get_Tip_adapater_F_result(Tip_Adapter_F_pkl_path, alpha=2.4, beta=0.7)
-
- total_parameters = parameters_1 + parameters_2
- print(f'The total number of parameters used by the model is: {total_parameters:.2f} Mb')
-
- top5_result = result_fusion(top5_result_clip, top5_result_TAF)
-
- # top1_result = top5_result[:,0]
- # num = jt.sum(jt.equal(test_label, top1_result))
- # print(num / 3000)
-
- df_result['img_name'] = imgs
- df_result[['pred_1', 'pred_2', 'pred_3', 'pred_4', 'pred_5']] = top5_result
- df_result.to_csv(result_name, sep=' ', index=False, header=False)
-
-
-
-
- if __name__ == "__main__":
-
- jt.flags.use_cuda = 1
-
- root_TrainSet = r'E:\Competition1'
-
- pkl_path = "E:\Competition1\Weights\CLIP-0820.pkl"
- Tip_Adapter_F_pkl_path = "E:\Competition1\Weights\Tip_adapter_F-0820.pkl"
-
- TestData_path = f"{root_TrainSet}\TestSetB/" # TestSetA | TestSetB | TestSetZ
- label_path = f"{root_TrainSet}\TestSetZ-label.txt"
- classes_path = f"{root_TrainSet}\classes_b.txt"
-
- FD_Align_pkl_path = ""
- os.environ['ROOT_PATH']= root_TrainSet
- cross_modal_pkl_path = ""
-
-
- method_name = 'fusion_2'
- num = compute_acc_TestSetZ(root_TrainSet, pkl_path, FD_Align_pkl_path, Tip_Adapter_F_pkl_path, cross_modal_pkl_path, classes_path , TestData_path ,label_path, method_name = method_name)
- if 'TestSetZ' in TestData_path:
- print(f"该模型在{TestData_path[-9:-1]}数据集上的分类准确率为:{num / 3000:.4f}")
- else:
- print(f'该模型在{TestData_path[-9:-1]}数据集上预测完成!')
|