You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

eval.py 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # less required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Eval Retinaface_resnet50."""
  16. from __future__ import print_function
  17. import os
  18. import time
  19. import datetime
  20. import numpy as np
  21. import cv2
  22. from mindspore import Tensor, context
  23. from mindspore.train.serialization import load_checkpoint, load_param_into_net
  24. from src.config import cfg_res50
  25. from src.network import RetinaFace, resnet50
  26. from src.utils import decode_bbox, prior_box
  27. class Timer():
  28. def __init__(self):
  29. self.start_time = 0.
  30. self.diff = 0.
  31. def start(self):
  32. self.start_time = time.time()
  33. def end(self):
  34. self.diff = time.time() - self.start_time
  35. class DetectionEngine:
  36. def __init__(self, cfg):
  37. self.results = {}
  38. self.nms_thresh = cfg['val_nms_threshold']
  39. self.conf_thresh = cfg['val_confidence_threshold']
  40. self.iou_thresh = cfg['val_iou_threshold']
  41. self.var = cfg['variance']
  42. self.save_prefix = cfg['val_predict_save_folder']
  43. self.gt_dir = cfg['val_gt_dir']
  44. def _iou(self, a, b):
  45. A = a.shape[0]
  46. B = b.shape[0]
  47. max_xy = np.minimum(
  48. np.broadcast_to(np.expand_dims(a[:, 2:4], 1), [A, B, 2]),
  49. np.broadcast_to(np.expand_dims(b[:, 2:4], 0), [A, B, 2]))
  50. min_xy = np.maximum(
  51. np.broadcast_to(np.expand_dims(a[:, 0:2], 1), [A, B, 2]),
  52. np.broadcast_to(np.expand_dims(b[:, 0:2], 0), [A, B, 2]))
  53. inter = np.maximum((max_xy - min_xy + 1), np.zeros_like(max_xy - min_xy))
  54. inter = inter[:, :, 0] * inter[:, :, 1]
  55. area_a = np.broadcast_to(
  56. np.expand_dims(
  57. (a[:, 2] - a[:, 0] + 1) * (a[:, 3] - a[:, 1] + 1), 1),
  58. np.shape(inter))
  59. area_b = np.broadcast_to(
  60. np.expand_dims(
  61. (b[:, 2] - b[:, 0] + 1) * (b[:, 3] - b[:, 1] + 1), 0),
  62. np.shape(inter))
  63. union = area_a + area_b - inter
  64. return inter / union
  65. def _nms(self, boxes, threshold=0.5):
  66. x1 = boxes[:, 0]
  67. y1 = boxes[:, 1]
  68. x2 = boxes[:, 2]
  69. y2 = boxes[:, 3]
  70. scores = boxes[:, 4]
  71. areas = (x2 - x1 + 1) * (y2 - y1 + 1)
  72. order = scores.argsort()[::-1]
  73. reserved_boxes = []
  74. while order.size > 0:
  75. i = order[0]
  76. reserved_boxes.append(i)
  77. max_x1 = np.maximum(x1[i], x1[order[1:]])
  78. max_y1 = np.maximum(y1[i], y1[order[1:]])
  79. min_x2 = np.minimum(x2[i], x2[order[1:]])
  80. min_y2 = np.minimum(y2[i], y2[order[1:]])
  81. intersect_w = np.maximum(0.0, min_x2 - max_x1 + 1)
  82. intersect_h = np.maximum(0.0, min_y2 - max_y1 + 1)
  83. intersect_area = intersect_w * intersect_h
  84. ovr = intersect_area / (areas[i] + areas[order[1:]] - intersect_area)
  85. indexs = np.where(ovr <= threshold)[0]
  86. order = order[indexs + 1]
  87. return reserved_boxes
  88. def write_result(self):
  89. # save result to file.
  90. import json
  91. t = datetime.datetime.now().strftime('_%Y_%m_%d_%H_%M_%S')
  92. try:
  93. if not os.path.isdir(self.save_prefix):
  94. os.makedirs(self.save_prefix)
  95. self.file_path = self.save_prefix + '/predict' + t + '.json'
  96. f = open(self.file_path, 'w')
  97. json.dump(self.results, f)
  98. except IOError as e:
  99. raise RuntimeError("Unable to open json file to dump. What(): {}".format(str(e)))
  100. else:
  101. f.close()
  102. return self.file_path
  103. def detect(self, boxes, confs, resize, scale, image_path, priors):
  104. if boxes.shape[0] == 0:
  105. # add to result
  106. event_name, img_name = image_path.split('/')
  107. self.results[event_name][img_name[:-4]] = {'img_path': image_path,
  108. 'bboxes': []}
  109. return
  110. boxes = decode_bbox(np.squeeze(boxes.asnumpy(), 0), priors, self.var)
  111. boxes = boxes * scale / resize
  112. scores = np.squeeze(confs.asnumpy(), 0)[:, 1]
  113. # ignore low scores
  114. inds = np.where(scores > self.conf_thresh)[0]
  115. boxes = boxes[inds]
  116. scores = scores[inds]
  117. # keep top-K before NMS
  118. order = scores.argsort()[::-1]
  119. boxes = boxes[order]
  120. scores = scores[order]
  121. # do NMS
  122. dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
  123. keep = self._nms(dets, self.nms_thresh)
  124. dets = dets[keep, :]
  125. dets[:, 2:4] = (dets[:, 2:4].astype(np.int) - dets[:, 0:2].astype(np.int)).astype(np.float) # int
  126. dets[:, 0:4] = dets[:, 0:4].astype(np.int).astype(np.float) # int
  127. # add to result
  128. event_name, img_name = image_path.split('/')
  129. if event_name not in self.results.keys():
  130. self.results[event_name] = {}
  131. self.results[event_name][img_name[:-4]] = {'img_path': image_path,
  132. 'bboxes': dets[:, :5].astype(np.float).tolist()}
  133. def _get_gt_boxes(self):
  134. from scipy.io import loadmat
  135. gt = loadmat(os.path.join(self.gt_dir, 'wider_face_val.mat'))
  136. hard = loadmat(os.path.join(self.gt_dir, 'wider_hard_val.mat'))
  137. medium = loadmat(os.path.join(self.gt_dir, 'wider_medium_val.mat'))
  138. easy = loadmat(os.path.join(self.gt_dir, 'wider_easy_val.mat'))
  139. faceboxes = gt['face_bbx_list']
  140. events = gt['event_list']
  141. files = gt['file_list']
  142. hard_gt_list = hard['gt_list']
  143. medium_gt_list = medium['gt_list']
  144. easy_gt_list = easy['gt_list']
  145. return faceboxes, events, files, hard_gt_list, medium_gt_list, easy_gt_list
  146. def _norm_pre_score(self):
  147. max_score = 0
  148. min_score = 1
  149. for event in self.results:
  150. for name in self.results[event].keys():
  151. bbox = np.array(self.results[event][name]['bboxes']).astype(np.float)
  152. if not bool(bbox):
  153. continue
  154. max_score = max(max_score, np.max(bbox[:, -1]))
  155. min_score = min(min_score, np.min(bbox[:, -1]))
  156. length = max_score - min_score
  157. for event in self.results:
  158. for name in self.results[event].keys():
  159. bbox = np.array(self.results[event][name]['bboxes']).astype(np.float)
  160. if not bool(bbox):
  161. continue
  162. bbox[:, -1] -= min_score
  163. bbox[:, -1] /= length
  164. self.results[event][name]['bboxes'] = bbox.tolist()
  165. def _image_eval(self, predict, gt, keep, iou_thresh, section_num):
  166. _predict = predict.copy()
  167. _gt = gt.copy()
  168. image_p_right = np.zeros(_predict.shape[0])
  169. image_gt_right = np.zeros(_gt.shape[0])
  170. proposal = np.ones(_predict.shape[0])
  171. # x1y1wh -> x1y1x2y2
  172. _predict[:, 2:4] = _predict[:, 0:2] + _predict[:, 2:4]
  173. _gt[:, 2:4] = _gt[:, 0:2] + _gt[:, 2:4]
  174. ious = self._iou(_predict[:, 0:4], _gt[:, 0:4])
  175. for i in range(_predict.shape[0]):
  176. gt_ious = ious[i, :]
  177. max_iou, max_index = gt_ious.max(), gt_ious.argmax()
  178. if max_iou >= iou_thresh:
  179. if keep[max_index] == 0:
  180. image_gt_right[max_index] = -1
  181. proposal[i] = -1
  182. elif image_gt_right[max_index] == 0:
  183. image_gt_right[max_index] = 1
  184. right_index = np.where(image_gt_right == 1)[0]
  185. image_p_right[i] = len(right_index)
  186. image_pr = np.zeros((section_num, 2), dtype=np.float)
  187. for section in range(section_num):
  188. _thresh = 1 - (section + 1)/section_num
  189. over_score_index = np.where(predict[:, 4] >= _thresh)[0]
  190. if not bool(over_score_index):
  191. image_pr[section, 0] = 0
  192. image_pr[section, 1] = 0
  193. else:
  194. index = over_score_index[-1]
  195. p_num = len(np.where(proposal[0:(index+1)] == 1)[0])
  196. image_pr[section, 0] = p_num
  197. image_pr[section, 1] = image_p_right[index]
  198. return image_pr
  199. def get_eval_result(self):
  200. self._norm_pre_score()
  201. facebox_list, event_list, file_list, hard_gt_list, medium_gt_list, easy_gt_list = self._get_gt_boxes()
  202. section_num = 1000
  203. sets = ['easy', 'medium', 'hard']
  204. set_gts = [easy_gt_list, medium_gt_list, hard_gt_list]
  205. ap_key_dict = {0: "Easy Val AP : ", 1: "Medium Val AP : ", 2: "Hard Val AP : ",}
  206. ap_dict = {}
  207. for _set in range(len(sets)):
  208. gt_list = set_gts[_set]
  209. count_gt = 0
  210. pr_curve = np.zeros((section_num, 2), dtype=np.float)
  211. for i, _ in enumerate(event_list):
  212. event = str(event_list[i][0][0])
  213. image_list = file_list[i][0]
  214. event_predict_dict = self.results[event]
  215. event_gt_index_list = gt_list[i][0]
  216. event_gt_box_list = facebox_list[i][0]
  217. for j, _ in enumerate(image_list):
  218. predict = np.array(event_predict_dict[str(image_list[j][0][0])]['bboxes']).astype(np.float)
  219. gt_boxes = event_gt_box_list[j][0].astype('float')
  220. keep_index = event_gt_index_list[j][0]
  221. count_gt += len(keep_index)
  222. if not bool(gt_boxes) or not bool(predict):
  223. continue
  224. keep = np.zeros(gt_boxes.shape[0])
  225. if bool(keep_index):
  226. keep[keep_index-1] = 1
  227. image_pr = self._image_eval(predict, gt_boxes, keep,
  228. iou_thresh=self.iou_thresh,
  229. section_num=section_num)
  230. pr_curve += image_pr
  231. precision = pr_curve[:, 1] / pr_curve[:, 0]
  232. recall = pr_curve[:, 1] / count_gt
  233. precision = np.concatenate((np.array([0.]), precision, np.array([0.])))
  234. recall = np.concatenate((np.array([0.]), recall, np.array([1.])))
  235. for i in range(precision.shape[0]-1, 0, -1):
  236. precision[i-1] = np.maximum(precision[i-1], precision[i])
  237. index = np.where(recall[1:] != recall[:-1])[0]
  238. ap = np.sum((recall[index + 1] - recall[index]) * precision[index + 1])
  239. print(ap_key_dict[_set] + '{:.4f}'.format(ap))
  240. return ap_dict
  241. def val():
  242. context.set_context(mode=context.GRAPH_MODE, device_target='GPU', save_graphs=False)
  243. cfg = cfg_res50
  244. backbone = resnet50(1001)
  245. network = RetinaFace(phase='predict', backbone=backbone)
  246. backbone.set_train(False)
  247. network.set_train(False)
  248. # load checkpoint
  249. assert cfg['val_model'] is not None, 'val_model is None.'
  250. param_dict = load_checkpoint(cfg['val_model'])
  251. print('Load trained model done. {}'.format(cfg['val_model']))
  252. network.init_parameters_data()
  253. load_param_into_net(network, param_dict)
  254. # testing dataset
  255. testset_folder = cfg['val_dataset_folder']
  256. testset_label_path = cfg['val_dataset_folder'] + "label.txt"
  257. with open(testset_label_path, 'r') as f:
  258. _test_dataset = f.readlines()
  259. test_dataset = []
  260. for im_path in _test_dataset:
  261. if im_path.startswith('# '):
  262. test_dataset.append(im_path[2:-1]) # delete '# ...\n'
  263. num_images = len(test_dataset)
  264. timers = {'forward_time': Timer(), 'misc': Timer()}
  265. if cfg['val_origin_size']:
  266. h_max, w_max = 0, 0
  267. for img_name in test_dataset:
  268. image_path = os.path.join(testset_folder, 'images', img_name)
  269. _img = cv2.imread(image_path, cv2.IMREAD_COLOR)
  270. if _img.shape[0] > h_max:
  271. h_max = _img.shape[0]
  272. if _img.shape[1] > w_max:
  273. w_max = _img.shape[1]
  274. h_max = (int(h_max / 32) + 1) * 32
  275. w_max = (int(w_max / 32) + 1) * 32
  276. priors = prior_box(image_sizes=(h_max, w_max),
  277. min_sizes=[[16, 32], [64, 128], [256, 512]],
  278. steps=[8, 16, 32],
  279. clip=False)
  280. else:
  281. target_size = 1600
  282. max_size = 2176
  283. priors = prior_box(image_sizes=(max_size, max_size),
  284. min_sizes=[[16, 32], [64, 128], [256, 512]],
  285. steps=[8, 16, 32],
  286. clip=False)
  287. # init detection engine
  288. detection = DetectionEngine(cfg)
  289. # testing begin
  290. print('Predict box starting')
  291. for i, img_name in enumerate(test_dataset):
  292. image_path = os.path.join(testset_folder, 'images', img_name)
  293. img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
  294. img = np.float32(img_raw)
  295. # testing scale
  296. if cfg['val_origin_size']:
  297. resize = 1
  298. assert img.shape[0] <= h_max and img.shape[1] <= w_max
  299. image_t = np.empty((h_max, w_max, 3), dtype=img.dtype)
  300. image_t[:, :] = (104.0, 117.0, 123.0)
  301. image_t[0:img.shape[0], 0:img.shape[1]] = img
  302. img = image_t
  303. else:
  304. im_size_min = np.min(img.shape[0:2])
  305. im_size_max = np.max(img.shape[0:2])
  306. resize = float(target_size) / float(im_size_min)
  307. # prevent bigger axis from being more than max_size:
  308. if np.round(resize * im_size_max) > max_size:
  309. resize = float(max_size) / float(im_size_max)
  310. img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
  311. assert img.shape[0] <= max_size and img.shape[1] <= max_size
  312. image_t = np.empty((max_size, max_size, 3), dtype=img.dtype)
  313. image_t[:, :] = (104.0, 117.0, 123.0)
  314. image_t[0:img.shape[0], 0:img.shape[1]] = img
  315. img = image_t
  316. scale = np.array([img.shape[1], img.shape[0], img.shape[1], img.shape[0]], dtype=img.dtype)
  317. img -= (104, 117, 123)
  318. img = img.transpose(2, 0, 1)
  319. img = np.expand_dims(img, 0)
  320. img = Tensor(img) # [1, c, h, w]
  321. timers['forward_time'].start()
  322. boxes, confs, _ = network(img) # forward pass
  323. timers['forward_time'].end()
  324. timers['misc'].start()
  325. detection.detect(boxes, confs, resize, scale, img_name, priors)
  326. timers['misc'].end()
  327. print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'.format(i + 1, num_images,
  328. timers['forward_time'].diff,
  329. timers['misc'].diff))
  330. print('Predict box done.')
  331. print('Eval starting')
  332. if cfg['val_save_result']:
  333. # Save the predict result if you want.
  334. predict_result_path = detection.write_result()
  335. print('predict result path is {}'.format(predict_result_path))
  336. # # TEST
  337. # import json
  338. # with open('./widerface_result/predict_2020_09_08_11_07_25.json', 'r') as f:
  339. # result = json.load(f)
  340. # detection.results = result
  341. detection.get_eval_result()
  342. print('Eval done.')
  343. if __name__ == '__main__':
  344. val()