import pandas as pd import os import shutil import numpy as np from sklearn.covariance import LedoitWolf from scipy.spatial.distance import mahalanobis source_path = '/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/unlabel_11_12/' dist_path_01 = '/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_04/train/' infer_data_unlabel=pd.read_csv('./test_unlabel_11_12.csv') print(infer_data_unlabel.shape) infer_data_train=pd.read_csv('./test_baseline_06_10.csv') print(infer_data_train.shape) '''infer_data.info() infer_data.describe() infer_data.head()''' train_feats = [] for index, row in infer_data_train.iterrows(): feat = row['feature'].split(",") feat[0] = feat[0][1:] feat[-1] = feat[-1][:-1] feat=list(map(float,feat)) train_feats.append(feat) train_feats = np.array(train_feats) print(train_feats.shape) train_mean = np.mean(train_feats, axis=0) train_cov = LedoitWolf().fit(train_feats).covariance_ train_cov_inv = np.linalg.inv(train_cov) print(train_mean.shape) print(train_cov.shape) print(train_cov_inv.shape) feat_dist = {} for index, row in infer_data_unlabel.iterrows(): feat = row['feature'].split(",") feat[0] = feat[0][1:] feat[-1] = feat[-1][:-1] feat=list(map(float,feat)) feat_dist[row['Image_Name']] = mahalanobis(feat, train_mean, train_cov_inv) feat_dist = sorted(feat_dist.items(), key=lambda x: x[1], reverse=True) #print(feat_dist) select_01 = [] count = 0 for k, v in feat_dist: if count<2750: select_01.append(k) #print(k, v) count += 1 print(len(select_01)) count_img = 0 count_label = 0 for file in select_01: shutil.copy(source_path+'images/'+file, dist_path_01+'images/'+file) count_img += 1 if os.path.exists(source_path+'labels/'+file.replace(".jpg",".txt")): shutil.copy(source_path+'labels/'+file.replace(".jpg",".txt"), dist_path_01+'labels/'+file.replace(".jpg",".txt")) count_label += 1 print(count_img, count_label) '''print(len(infer_data['feature'][0])) feat = infer_data['feature'][0].split(",") print(len(feat)) print(feat[0])'''