import pandas as pd import os import shutil source_path = '/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/unlabel_11_12/' dist_path_01 = '/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_03/train/' infer_data=pd.read_csv('./test_unlabel_11_12.csv') print(infer_data.shape) infer_data_cascade=pd.read_csv('./test_cascade_unlabel_11_12.csv') print(infer_data_cascade.shape) '''infer_data.info() infer_data.describe() infer_data.head() print(infer_data['score']) print(infer_data['Image_Name'])''' #infer_data = infer_data.sort_values('score',ascending=False) atss_score = {} for index, row in infer_data.iterrows(): atss_score[row['Image_Name']] = row['score'] cascade_score = {} for index, row in infer_data_cascade.iterrows(): cascade_score[row['Image_Name']] = row['score'] hard_score = {} for image_name in atss_score.keys(): hard_score[image_name] = abs(atss_score[image_name] - cascade_score[image_name]) #print(atss_score[image_name], cascade_score[image_name], hard_score[image_name]) hard_score = sorted(hard_score.items(), key=lambda x: x[1], reverse=True) #print(hard_score) select_01 = [] count = 0 for k, v in hard_score: if count<2750: select_01.append(k) #print(k, v) count += 1 print(len(select_01)) count_img = 0 count_label = 0 for file in select_01: shutil.copy(source_path+'images/'+file, dist_path_01+'images/'+file) count_img += 1 if os.path.exists(source_path+'labels/'+file.replace(".jpg",".txt")): shutil.copy(source_path+'labels/'+file.replace(".jpg",".txt"), dist_path_01+'labels/'+file.replace(".jpg",".txt")) count_label += 1 print(count_img, count_label) '''print(len(infer_data['feature'][0])) feat = infer_data['feature'][0].split(",") print(len(feat)) print(feat[0])'''