|
- import random
- import yaml
- import jittor as jt
- import numpy as np
- import pandas as pd
- from PIL import Image
- import jclip as clip
- from jittor.transform import ImageNormalize, Compose
- from datetime import datetime
- from jittor.optim import Optimizer
-
- # 类别到idx的映射
- def class_2_idx(path):
- '''
- path : classes.txt文件路径
- return : dict: {'Bear': 0,'Bee': 1, ..., 'papillon': 373,...}
- '''
- df_classes = pd.read_csv(path, delimiter = ' ', header = None, index_col = False)
- list_classes = list(map(lambda x: x.split('_',1)[-1], df_classes[0]))
- res = {k:v for k,v in zip(list_classes,range(len(list_classes)))}
-
- return res
-
-
- def generate_prompt(name :str):
- '''
- 用于生成提示语句
- '''
- li = [name,
- f"{name} , {len(name)}",
- f"A photo of a {name}",
- f"A photo of a {name} with {len(name)}",
- f"A photo of {name} and the length of the prompt is {len(name)}",
- f"A photo of {name} and the length of the {name} is {len(name)}",
- f"A photo of {name.replace('_', ' ')} and the length of the name is {len(name.replace('_', ' '))}",
- f"A photo of {name} with {len(name)} and _ in {name}" if '_' in name else f"A photo of {name} with {len(name)} and _ not in {name}",
- f"A photo of {name}, {name}, {name}"]
-
-
- return li[4]
-
-
- def normalize_tensor(tensor, a=0.0, b=1.0):
- '''
- 归一化
- '''
- tensor_min = tensor.min()
- tensor_max = tensor.max()
- normalized_tensor = a + (tensor - tensor_min) * (b - a) / (tensor_max - tensor_min)
-
- return normalized_tensor
-
-
- def get_val_text_features(classes_path, model):
- '''
- 用于获取模型在待预测类别上的文本特征
- '''
- df_classes = pd.read_csv(classes_path , delimiter = ' ' , header = None , index_col = False)
- classes = list(map(lambda x: generate_prompt(x.split('_',1)[-1]), df_classes[0]))
- text = clip.tokenize(classes)
- with jt.no_grad():
- text_features = model.encode_text(text)
- text_features /= text_features.norm(dim=-1, keepdim=True) # shape -> (374, 512) 包含了所有类别的文本特征
- return text_features
-
-
- # 自定义的随机擦除函数:类似PyTorch中的torchvision.transforms.RandomErasing函数
- class RandomErasing:
- def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0):
- self.p = p
- self.scale = scale
- self.ratio = ratio
- self.value = value
-
- def __call__(self, img):
- if random.uniform(0, 1) > self.p:
- return img
-
- img = np.array(img)
- h, w, c = img.shape
- area = h * w
-
- for attempt in range(100):
- target_area = random.uniform(*self.scale) * area
- aspect_ratio = random.uniform(*self.ratio)
-
- erase_h = int(round(np.sqrt(target_area * aspect_ratio)))
- erase_w = int(round(np.sqrt(target_area / aspect_ratio)))
-
- if erase_h < h and erase_w < w:
- x1 = random.randint(0, h - erase_h)
- y1 = random.randint(0, w - erase_w)
-
- if self.value == 'random':
- img[x1:x1+erase_h, y1:y1+erase_w, :] = np.random.randint(0, 256, (erase_h, erase_w, c), dtype=np.uint8)
- else:
- img[x1:x1+erase_h, y1:y1+erase_w, :] = self.value
-
- return jt.array(img)
-
- return jt.array(img)
-
-
-
-
-
- # 计算模型的参数量 : 根据赛题要求不能超过 500Mb
- def count_parameters_in_mb(model):
- total_params = 0
- for param in model.parameters():
- param_count = param.numel()
- total_params += param_count
-
- total_params_in_mb = total_params / 1e6 # 转换为百万参数(Mb)
-
- assert total_params_in_mb <= 500, "Model {} has too many parameters: {:.2f} Mb. The limit is 500 Mb.".format(model.__class__.__name__, total_params_in_mb)
- # print("Model {} : {:.2f} Mb".format(model.__class__.__name__, total_params_in_mb))
-
- return total_params_in_mb
-
-
-
- # 格式化输出args对象中除了 exclude_keys 以外的键值对
- def format_args(args, exclude_keys=['root_TrainSet', 'train_path', 'save_path', 'TestSetZ_path', 'label_path', 'classes_path', 'classes_b_path', 'class_4_path']):
-
- max_key_length = max(len(key) for key in vars(args).keys()) # 计算所有键的最大长度
- formatted_args = "\n".join([f"{key.ljust(max_key_length)} : {value}" for key, value in vars(args).items() if key not in exclude_keys]) # 格式化键值对
-
- return formatted_args
-
-
-
-
-
- # 重写clip.py文件中【_transform方法】,,主要更新了对图像的裁剪的过程
- class Image_Transform():
- def __init__(self, img:Image, clip_init):
- self.img = clip.clip.Resize(224, mode=Image.BICUBIC)(img)
- self.size = self.img.size
- self.clip = clip_init
-
- def choose_best_img(self):
-
- # 若图像的长宽比在指定的阈值以内,则直接缩放;否则就裁剪多张图片送入CLIP中,选取预测类别的众数中预测概率值最大的那张
- # 比如同一张图片裁剪了10张图片,预测到了8个A,1个B和1个C,那么选择8个A中概率值最大的那张
- if self.is_direct_scaling(self.size):
- img = self.img.resize((224,224))
- img = self.transform()(img)
- return img
-
- else:
- self.class_path= r'F:\jittor_comprtition\Competition1\classes.txt'
- classes = list(map(lambda x:generate_prompt(x.split('_',1)[-1]),list(class_2_idx(self.class_path).keys())))
- text = clip.tokenize(classes)
- with jt.no_grad():
- text_features = self.clip.encode_text(text)
- text_features /= text_features.norm(dim=1, keepdim=True)
- self.text_features = text_features
-
- crop_list = self.crop_image_sliding(self.img, 10)
-
- img_tensor = []
- for img in crop_list:
- img_tensor.append(self.transform()(img))
- img_tensor = jt.array(img_tensor) # [crop_num, 3, 224 224]
- with jt.no_grad():
- image_features = self.clip.encode_image(img_tensor)
- image_features /= image_features.norm(dim=1, keepdim=True)
- text_probs = (100.0 * image_features @ self.text_features.transpose(0, 1)).softmax(dim=-1)
- pred_probs, top_labels = text_probs.topk(1)
-
- # 选取预测类别的众数且预测概率值最大的那张图片,如果有多个众数,则随机选择一个
- idx, _, idx_counts = jt.unique(top_labels, return_inverse=True, return_counts=True) # 统计众数及其数量
- idx_mode = idx[jt.argmax(idx_counts, dim=0)[0]]
- index_mode = jt.equal(idx_mode, top_labels.flatten()).nonzero().flatten()
- best_img_index = index_mode[jt.argmax(pred_probs[index_mode],dim=0)[0]].item()
- best_img = img_tensor[best_img_index]
- return best_img
-
-
-
- def is_direct_scaling(self, img_size:tuple, threshold=1.05):
- aspect_ratio = max(img_size) / 224.0
- return aspect_ratio <= threshold
-
- def transform(self):
- return Compose([clip.clip._convert_image_to_rgb,
- ImageNormalize((0.48145466, 0.4578275, 0.40821073),
- (0.26862954, 0.26130258, 0.27577711)),
- clip.clip.ImageToTensor()
- ])
-
- def predict(self, img_fea):
- text_probs = (100.0 * img_fea @ self.text_features.transpose(0, 1)).softmax(dim=-1)
-
-
- def crop_image_sliding(self, img, crop_num, step=None):
-
- width, height = img.size
-
- # 检查维度,确定沿哪个维度滑动裁剪
- if width > 224:
- slide_dim = 'width'
- fixed_dim = 224
- else:
- slide_dim = 'height'
- fixed_dim = 224
-
-
- # 计算默认步长,如果没有指定
- if step is None:
- if slide_dim == 'width':
- step = (width - 224) // (crop_num - 1)
- else:
- step = (height - 224) // (crop_num - 1)
-
- cropped_images = []
-
- for i in range(crop_num):
- if slide_dim == 'width':
- left = i * step
- if left + 224 > width:
- left = width - 224
- box = (left, 0, left + 224, 224)
- else:
- top = i * step
- if top + 224 > height:
- top = height - 224
- box = (0, top, 224, top + 224)
-
- cropped_img = img.crop(box)
- cropped_images.append(cropped_img)
-
- return cropped_images
-
-
- # 获取当天日期
- def get_date_format():
- today = datetime.now()
- formatted_date = str(today.month).zfill(2) + str(today.day).zfill(2)
- return formatted_date
-
-
- # 从yaml文件中读取参数
- def load_yaml_params(yaml_path, args):
- with open(yaml_path, 'rb') as f:
- config = yaml.safe_load(f)
- for key in config.keys():
- assert hasattr(args, key), f"The 'args' object does not have the attribute: {key}"
- setattr(args, key, config[key])
- return args
-
-
|