From 5dc7ecf630db85defff370a1b8729bdb2a087355 Mon Sep 17 00:00:00 2001
From: jiangzhenguang <jiangzhenguang1@huawei.com>
Date: Tue, 27 Apr 2021 11:13:01 +0800
Subject: [PATCH] add train and val in yolov4

---
 .../cv/yolov4/ascend310_infer/src/main.cc     |   3 -
 model_zoo/official/cv/yolov4/eval.py          | 257 +-------------
 model_zoo/official/cv/yolov4/export.py        |   6 +-
 model_zoo/official/cv/yolov4/src/config.py    |   3 +
 .../official/cv/yolov4/src/eval_utils.py      | 323 ++++++++++++++++++
 model_zoo/official/cv/yolov4/src/util.py      |   1 +
 model_zoo/official/cv/yolov4/src/yolo.py      |  18 +-
 model_zoo/official/cv/yolov4/test.py          |   6 +-
 model_zoo/official/cv/yolov4/train.py         |  89 +++--
 9 files changed, 416 insertions(+), 290 deletions(-)
 create mode 100644 model_zoo/official/cv/yolov4/src/eval_utils.py
diff --git a/model_zoo/official/cv/yolov4/ascend310_infer/src/main.cc b/model_zoo/official/cv/yolov4/ascend310_infer/src/main.cc
index ff9c6a1a7c..c16e74d75f 100644
--- a/model_zoo/official/cv/yolov4/ascend310_infer/src/main.cc
+++ b/model_zoo/official/cv/yolov4/ascend310_infer/src/main.cc
@@ -111,13 +111,10 @@ int main(int argc, char **argv) {
       std::cout << "preprocess " << all_files[i] << " failed." << std::endl;
       return 1;
     }
-    std::vector<float> input_shape = {608, 608};
 
     inputs.clear();
     inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(),
         img.Data().get(), img.DataSize());
-    inputs.emplace_back(model_inputs[1].Name(), model_inputs[1].DataType(), model_inputs[1].Shape(),
-        input_shape.data(), input_shape.size() * sizeof(float));
 
     gettimeofday(&start, NULL);
     ret = model.Predict(inputs, &outputs);
diff --git a/model_zoo/official/cv/yolov4/eval.py b/model_zoo/official/cv/yolov4/eval.py
index 5f4aa24e3b..75b9ad89bc 100644
--- a/model_zoo/official/cv/yolov4/eval.py
+++ b/model_zoo/official/cv/yolov4/eval.py
@@ -17,12 +17,6 @@ import os
 import argparse
 import datetime
 import time
-import sys
-from collections import defaultdict
-
-import numpy as np
-from pycocotools.coco import COCO
-from pycocotools.cocoeval import COCOeval
 
 from mindspore import Tensor
 from mindspore.context import ParallelMode
@@ -34,6 +28,7 @@ from src.yolo import YOLOV4CspDarkNet53
 from src.logger import get_logger
 from src.yolo_dataset import create_yolo_dataset
 from src.config import ConfigYOLOV4CspDarkNet53
+from src.eval_utils import apply_eval
 
 parser = argparse.ArgumentParser('mindspore coco testing')
 
@@ -52,220 +47,16 @@ parser.add_argument('--pretrained', default='', type=str, help='model_path, loca
 parser.add_argument('--log_path', type=str, default='outputs/', help='checkpoint save location')
 
 # detect_related
-parser.add_argument('--nms_thresh', type=float, default=0.5, help='threshold for NMS')
-parser.add_argument('--ann_file', type=str, default='', help='path to annotation')
+parser.add_argument('--ann_val_file', type=str, default='', help='path to annotation')
 parser.add_argument('--testing_shape', type=str, default='', help='shape for test ')
-parser.add_argument('--ignore_threshold', type=float, default=0.001, help='threshold to throw low quality boxes')
 
 args, _ = parser.parse_known_args()
 
+config = ConfigYOLOV4CspDarkNet53()
+args.nms_thresh = config.nms_thresh
+args.ignore_threshold = config.eval_ignore_threshold
 args.data_root = os.path.join(args.data_dir, 'val2017')
-args.ann_file = os.path.join(args.data_dir, 'annotations/instances_val2017.json')
-
-class Redirct:
-    def __init__(self):
-        self.content = ""
-
-    def write(self, content):
-        self.content += content
-
-    def flush(self):
-        self.content = ""
-
-
-class DetectionEngine:
-    """Detection engine."""
-    def __init__(self, args_detection):
-        self.ignore_threshold = args_detection.ignore_threshold
-        self.labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
-                       'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
-                       'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
-                       'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
-                       'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
-                       'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
-                       'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
-                       'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
-                       'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
-                       'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
-        self.num_classes = len(self.labels)
-        self.results = {}
-        self.file_path = ''
-        self.save_prefix = args_detection.outputs_dir
-        self.ann_file = args_detection.ann_file
-        self._coco = COCO(self.ann_file)
-        self._img_ids = list(sorted(self._coco.imgs.keys()))
-        self.det_boxes = []
-        self.nms_thresh = args_detection.nms_thresh
-        self.coco_catids = self._coco.getCatIds()
-
-    def do_nms_for_results(self):
-        """Get result boxes."""
-        for img_id in self.results:
-            for clsi in self.results[img_id]:
-                dets = self.results[img_id][clsi]
-                dets = np.array(dets)
-                keep_index = self._diou_nms(dets, thresh=0.6)
-
-                keep_box = [{'image_id': int(img_id),
-                             'category_id': int(clsi),
-                             'bbox': list(dets[i][:4].astype(float)),
-                             'score': dets[i][4].astype(float)}
-                            for i in keep_index]
-                self.det_boxes.extend(keep_box)
-
-    def _nms(self, predicts, threshold):
-        """Calculate NMS."""
-        # convert xywh -> xmin ymin xmax ymax
-        x1 = predicts[:, 0]
-        y1 = predicts[:, 1]
-        x2 = x1 + predicts[:, 2]
-        y2 = y1 + predicts[:, 3]
-        scores = predicts[:, 4]
-
-        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
-        order = scores.argsort()[::-1]
-
-        reserved_boxes = []
-        while order.size > 0:
-            i = order[0]
-            reserved_boxes.append(i)
-            max_x1 = np.maximum(x1[i], x1[order[1:]])
-            max_y1 = np.maximum(y1[i], y1[order[1:]])
-            min_x2 = np.minimum(x2[i], x2[order[1:]])
-            min_y2 = np.minimum(y2[i], y2[order[1:]])
-
-            intersect_w = np.maximum(0.0, min_x2 - max_x1 + 1)
-            intersect_h = np.maximum(0.0, min_y2 - max_y1 + 1)
-            intersect_area = intersect_w * intersect_h
-            ovr = intersect_area / (areas[i] + areas[order[1:]] - intersect_area)
-
-            indexes = np.where(ovr <= threshold)[0]
-            order = order[indexes + 1]
-        return reserved_boxes
-
-    def _diou_nms(self, dets, thresh=0.5):
-        """
-        convert xywh -> xmin ymin xmax ymax
-        """
-        x1 = dets[:, 0]
-        y1 = dets[:, 1]
-        x2 = x1 + dets[:, 2]
-        y2 = y1 + dets[:, 3]
-        scores = dets[:, 4]
-        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
-        order = scores.argsort()[::-1]
-        keep = []
-        while order.size > 0:
-            i = order[0]
-            keep.append(i)
-            xx1 = np.maximum(x1[i], x1[order[1:]])
-            yy1 = np.maximum(y1[i], y1[order[1:]])
-            xx2 = np.minimum(x2[i], x2[order[1:]])
-            yy2 = np.minimum(y2[i], y2[order[1:]])
-
-            w = np.maximum(0.0, xx2 - xx1 + 1)
-            h = np.maximum(0.0, yy2 - yy1 + 1)
-            inter = w * h
-            ovr = inter / (areas[i] + areas[order[1:]] - inter)
-            center_x1 = (x1[i] + x2[i]) / 2
-            center_x2 = (x1[order[1:]] + x2[order[1:]]) / 2
-            center_y1 = (y1[i] + y2[i]) / 2
-            center_y2 = (y1[order[1:]] + y2[order[1:]]) / 2
-            inter_diag = (center_x2 - center_x1) ** 2 + (center_y2 - center_y1) ** 2
-            out_max_x = np.maximum(x2[i], x2[order[1:]])
-            out_max_y = np.maximum(y2[i], y2[order[1:]])
-            out_min_x = np.minimum(x1[i], x1[order[1:]])
-            out_min_y = np.minimum(y1[i], y1[order[1:]])
-            outer_diag = (out_max_x - out_min_x) ** 2 + (out_max_y - out_min_y) ** 2
-            diou = ovr - inter_diag / outer_diag
-            diou = np.clip(diou, -1, 1)
-            inds = np.where(diou <= thresh)[0]
-            order = order[inds + 1]
-        return keep
-
-
-    def write_result(self):
-        """Save result to file."""
-        import json
-        t = datetime.datetime.now().strftime('_%Y_%m_%d_%H_%M_%S')
-        try:
-            self.file_path = self.save_prefix + '/predict' + t + '.json'
-            f = open(self.file_path, 'w')
-            json.dump(self.det_boxes, f)
-        except IOError as e:
-            raise RuntimeError("Unable to open json file to dump. What(): {}".format(str(e)))
-        else:
-            f.close()
-            return self.file_path
-
-    def get_eval_result(self):
-        """Get eval result."""
-        coco_gt = COCO(self.ann_file)
-        coco_dt = coco_gt.loadRes(self.file_path)
-        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
-        coco_eval.evaluate()
-        coco_eval.accumulate()
-        rdct = Redirct()
-        stdout = sys.stdout
-        sys.stdout = rdct
-        coco_eval.summarize()
-        sys.stdout = stdout
-        return rdct.content
-
-    def detect(self, outputs, batch, image_shape, image_id):
-        """Detect boxes."""
-        outputs_num = len(outputs)
-        # output [|32, 52, 52, 3, 85| ]
-        for batch_id in range(batch):
-            for out_id in range(outputs_num):
-                # 32, 52, 52, 3, 85
-                out_item = outputs[out_id]
-                # 52, 52, 3, 85
-                out_item_single = out_item[batch_id, :]
-                # get number of items in one head, [B, gx, gy, anchors, 5+80]
-                dimensions = out_item_single.shape[:-1]
-                out_num = 1
-                for d in dimensions:
-                    out_num *= d
-                ori_w, ori_h = image_shape[batch_id]
-                img_id = int(image_id[batch_id])
-                x = out_item_single[..., 0] * ori_w
-                y = out_item_single[..., 1] * ori_h
-                w = out_item_single[..., 2] * ori_w
-                h = out_item_single[..., 3] * ori_h
-
-                conf = out_item_single[..., 4:5]
-                cls_emb = out_item_single[..., 5:]
-
-                cls_argmax = np.expand_dims(np.argmax(cls_emb, axis=-1), axis=-1)
-                x = x.reshape(-1)
-                y = y.reshape(-1)
-                w = w.reshape(-1)
-                h = h.reshape(-1)
-                cls_emb = cls_emb.reshape(-1, self.num_classes)
-                conf = conf.reshape(-1)
-                cls_argmax = cls_argmax.reshape(-1)
-
-                x_top_left = x - w / 2.
-                y_top_left = y - h / 2.
-                # create all False
-                flag = np.random.random(cls_emb.shape) > sys.maxsize
-                for i in range(flag.shape[0]):
-                    c = cls_argmax[i]
-                    flag[i, c] = True
-                confidence = cls_emb[flag] * conf
-                for x_lefti, y_lefti, wi, hi, confi, clsi in zip(x_top_left, y_top_left, w, h, confidence, cls_argmax):
-                    if confi < self.ignore_threshold:
-                        continue
-                    if img_id not in self.results:
-                        self.results[img_id] = defaultdict(list)
-                    x_lefti = max(0, x_lefti)
-                    y_lefti = max(0, y_lefti)
-                    wi = min(wi, ori_w)
-                    hi = min(hi, ori_h)
-                    # transform catId to match coco
-                    coco_clsi = self.coco_catids[clsi]
-                    self.results[img_id][coco_clsi].append([x_lefti, y_lefti, wi, hi, confi])
+args.ann_val_file = os.path.join(args.data_dir, 'annotations/instances_val2017.json')
 
 
 def convert_testing_shape(args_testing_shape):
@@ -290,7 +81,7 @@ if __name__ == "__main__":
     context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1)
 
     args.logger.info('Creating Network....')
-    network = YOLOV4CspDarkNet53(is_training=False)
+    network = YOLOV4CspDarkNet53()
 
     args.logger.info(args.pretrained)
     if os.path.isfile(args.pretrained):
@@ -311,49 +102,25 @@ if __name__ == "__main__":
         exit(1)
 
     data_root = args.data_root
-    ann_file = args.ann_file
+    ann_val_file = args.ann_val_file
 
-    config = ConfigYOLOV4CspDarkNet53()
     if args.testing_shape:
         config.test_img_shape = convert_testing_shape(args.testing_shape)
 
-    ds, data_size = create_yolo_dataset(data_root, ann_file, is_training=False, batch_size=args.per_batch_size,
+    ds, data_size = create_yolo_dataset(data_root, ann_val_file, is_training=False, batch_size=args.per_batch_size,
                                         max_epoch=1, device_num=1, rank=rank_id, shuffle=False,
                                         config=config)
 
     args.logger.info('testing shape : {}'.format(config.test_img_shape))
     args.logger.info('totol {} images to eval'.format(data_size))
-
     network.set_train(False)
 
     # init detection engine
-    detection = DetectionEngine(args)
-
     input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
     args.logger.info('Start inference....')
-    for index, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
-        image = data["image"]
-
-        image_shape_ = data["image_shape"]
-        image_id_ = data["img_id"]
-
-        prediction = network(image, input_shape)
-        output_big, output_me, output_small = prediction
-        output_big = output_big.asnumpy()
-        output_me = output_me.asnumpy()
-        output_small = output_small.asnumpy()
-        image_id_ = image_id_.asnumpy()
-        image_shape_ = image_shape_.asnumpy()
-
-        detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape_, image_id_)
-        if index % 1000 == 0:
-            args.logger.info('Processing... {:.2f}% '.format(index * args.per_batch_size / data_size * 100))
-
-    args.logger.info('Calculating mAP...')
-    detection.do_nms_for_results()
-    result_file_path = detection.write_result()
-    args.logger.info('result file path: {}'.format(result_file_path))
-    eval_result = detection.get_eval_result()
+    eval_param_dict = {"net": network, "dataset": ds, "data_size": data_size,
+                       "anno_json": args.ann_val_file, "input_shape": input_shape, "args": args}
+    eval_result, _ = apply_eval(eval_param_dict)
 
     cost_time = time.time() - start_time
     args.logger.info('\n=============coco eval reulst=========\n' + eval_result)
diff --git a/model_zoo/official/cv/yolov4/export.py b/model_zoo/official/cv/yolov4/export.py
index 4743672d44..ba04686410 100644
--- a/model_zoo/official/cv/yolov4/export.py
+++ b/model_zoo/official/cv/yolov4/export.py
@@ -39,12 +39,12 @@ if args.device_target == "Ascend":
 if __name__ == "__main__":
     ts_shape = args.testing_shape
 
-    network = YOLOV4CspDarkNet53(is_training=False)
+    network = YOLOV4CspDarkNet53()
+    network.set_train(False)
 
     param_dict = load_checkpoint(args.ckpt_file)
     load_param_into_net(network, param_dict)
 
-    input_shape = Tensor(tuple([ts_shape, ts_shape]), mindspore.float32)
     input_data = Tensor(np.zeros([args.batch_size, 3, ts_shape, ts_shape]), mindspore.float32)
 
-    export(network, input_data, input_shape, file_name=args.file_name, file_format=args.file_format)
+    export(network, input_data, file_name=args.file_name, file_format=args.file_format)
diff --git a/model_zoo/official/cv/yolov4/src/config.py b/model_zoo/official/cv/yolov4/src/config.py
index 357e722c6b..cc18774c2a 100644
--- a/model_zoo/official/cv/yolov4/src/config.py
+++ b/model_zoo/official/cv/yolov4/src/config.py
@@ -52,6 +52,9 @@ class ConfigYOLOV4CspDarkNet53:
 
     # confidence under ignore_threshold means no object when training
     ignore_threshold = 0.7
+    # threshold to throw low quality boxes when eval
+    eval_ignore_threshold = 0.001
+    nms_thresh = 0.5
 
     # h->w
     anchor_scales = [(12, 16),
diff --git a/model_zoo/official/cv/yolov4/src/eval_utils.py b/model_zoo/official/cv/yolov4/src/eval_utils.py
new file mode 100644
index 0000000000..6dc7d177ba
--- /dev/null
+++ b/model_zoo/official/cv/yolov4/src/eval_utils.py
@@ -0,0 +1,323 @@
+import os
+import sys
+import datetime
+import stat
+from collections import defaultdict
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from mindspore.train.callback import Callback
+from mindspore import log as logger
+from mindspore import save_checkpoint
+
+class Redirct:
+    def __init__(self):
+        self.content = ""
+
+    def write(self, content):
+        self.content += content
+
+    def flush(self):
+        self.content = ""
+
+class DetectionEngine:
+    """Detection engine."""
+    def __init__(self, args_detection):
+        self.ignore_threshold = args_detection.ignore_threshold
+        self.labels = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
+                       'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
+                       'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
+                       'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+                       'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+                       'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+                       'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
+                       'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+                       'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
+                       'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
+        self.num_classes = len(self.labels)
+        self.results = {}
+        self.file_path = ''
+        self.save_prefix = args_detection.outputs_dir
+        self.ann_file = args_detection.ann_val_file
+        self._coco = COCO(self.ann_file)
+        self._img_ids = list(sorted(self._coco.imgs.keys()))
+        self.det_boxes = []
+        self.nms_thresh = args_detection.nms_thresh
+        self.coco_catids = self._coco.getCatIds()
+
+    def do_nms_for_results(self):
+        """Get result boxes."""
+        for img_id in self.results:
+            for clsi in self.results[img_id]:
+                dets = self.results[img_id][clsi]
+                dets = np.array(dets)
+                keep_index = self._diou_nms(dets, thresh=0.6)
+
+                keep_box = [{'image_id': int(img_id),
+                             'category_id': int(clsi),
+                             'bbox': list(dets[i][:4].astype(float)),
+                             'score': dets[i][4].astype(float)}
+                            for i in keep_index]
+                self.det_boxes.extend(keep_box)
+
+    def _nms(self, predicts, threshold):
+        """Calculate NMS."""
+        # convert xywh -> xmin ymin xmax ymax
+        x1 = predicts[:, 0]
+        y1 = predicts[:, 1]
+        x2 = x1 + predicts[:, 2]
+        y2 = y1 + predicts[:, 3]
+        scores = predicts[:, 4]
+
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        order = scores.argsort()[::-1]
+
+        reserved_boxes = []
+        while order.size > 0:
+            i = order[0]
+            reserved_boxes.append(i)
+            max_x1 = np.maximum(x1[i], x1[order[1:]])
+            max_y1 = np.maximum(y1[i], y1[order[1:]])
+            min_x2 = np.minimum(x2[i], x2[order[1:]])
+            min_y2 = np.minimum(y2[i], y2[order[1:]])
+
+            intersect_w = np.maximum(0.0, min_x2 - max_x1 + 1)
+            intersect_h = np.maximum(0.0, min_y2 - max_y1 + 1)
+            intersect_area = intersect_w * intersect_h
+            ovr = intersect_area / (areas[i] + areas[order[1:]] - intersect_area)
+
+            indexes = np.where(ovr <= threshold)[0]
+            order = order[indexes + 1]
+        return reserved_boxes
+
+    def _diou_nms(self, dets, thresh=0.5):
+        """
+        convert xywh -> xmin ymin xmax ymax
+        """
+        x1 = dets[:, 0]
+        y1 = dets[:, 1]
+        x2 = x1 + dets[:, 2]
+        y2 = y1 + dets[:, 3]
+        scores = dets[:, 4]
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        order = scores.argsort()[::-1]
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+            xx1 = np.maximum(x1[i], x1[order[1:]])
+            yy1 = np.maximum(y1[i], y1[order[1:]])
+            xx2 = np.minimum(x2[i], x2[order[1:]])
+            yy2 = np.minimum(y2[i], y2[order[1:]])
+
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (areas[i] + areas[order[1:]] - inter)
+            center_x1 = (x1[i] + x2[i]) / 2
+            center_x2 = (x1[order[1:]] + x2[order[1:]]) / 2
+            center_y1 = (y1[i] + y2[i]) / 2
+            center_y2 = (y1[order[1:]] + y2[order[1:]]) / 2
+            inter_diag = (center_x2 - center_x1) ** 2 + (center_y2 - center_y1) ** 2
+            out_max_x = np.maximum(x2[i], x2[order[1:]])
+            out_max_y = np.maximum(y2[i], y2[order[1:]])
+            out_min_x = np.minimum(x1[i], x1[order[1:]])
+            out_min_y = np.minimum(y1[i], y1[order[1:]])
+            outer_diag = (out_max_x - out_min_x) ** 2 + (out_max_y - out_min_y) ** 2
+            diou = ovr - inter_diag / outer_diag
+            diou = np.clip(diou, -1, 1)
+            inds = np.where(diou <= thresh)[0]
+            order = order[inds + 1]
+        return keep
+
+
+    def write_result(self):
+        """Save result to file."""
+        import json
+        t = datetime.datetime.now().strftime('_%Y_%m_%d_%H_%M_%S')
+        try:
+            self.file_path = self.save_prefix + '/predict' + t + '.json'
+            f = open(self.file_path, 'w')
+            json.dump(self.det_boxes, f)
+        except IOError as e:
+            raise RuntimeError("Unable to open json file to dump. What(): {}".format(str(e)))
+        else:
+            f.close()
+            return self.file_path
+
+    def get_eval_result(self):
+        """Get eval result."""
+        up_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+        self.file_path = os.path.join(up_path, self.file_path)
+        if not self.results:
+            print("[WARNING] result is {}")
+            return 0.0, 0.0
+        coco_gt = COCO(self.ann_file)
+        coco_dt = coco_gt.loadRes(self.file_path)
+        coco_eval = COCOeval(coco_gt, coco_dt, 'bbox')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        rdct = Redirct()
+        stdout = sys.stdout
+        sys.stdout = rdct
+        coco_eval.summarize()
+        res_map = coco_eval.stats[0]
+        sys.stdout = stdout
+        return rdct.content, float(res_map)
+
+    def detect(self, outputs, batch, image_shape, image_id):
+        """Detect boxes."""
+        outputs_num = len(outputs)
+        # output [|32, 52, 52, 3, 85| ]
+        for batch_id in range(batch):
+            for out_id in range(outputs_num):
+                # 32, 52, 52, 3, 85
+                out_item = outputs[out_id]
+                # 52, 52, 3, 85
+                out_item_single = out_item[batch_id, :]
+                # get number of items in one head, [B, gx, gy, anchors, 5+80]
+                dimensions = out_item_single.shape[:-1]
+                out_num = 1
+                for d in dimensions:
+                    out_num *= d
+                ori_w, ori_h = image_shape[batch_id]
+                img_id = int(image_id[batch_id])
+                x = out_item_single[..., 0] * ori_w
+                y = out_item_single[..., 1] * ori_h
+                w = out_item_single[..., 2] * ori_w
+                h = out_item_single[..., 3] * ori_h
+
+                conf = out_item_single[..., 4:5]
+                cls_emb = out_item_single[..., 5:]
+
+                cls_argmax = np.expand_dims(np.argmax(cls_emb, axis=-1), axis=-1)
+                x = x.reshape(-1)
+                y = y.reshape(-1)
+                w = w.reshape(-1)
+                h = h.reshape(-1)
+                cls_emb = cls_emb.reshape(-1, self.num_classes)
+                conf = conf.reshape(-1)
+                cls_argmax = cls_argmax.reshape(-1)
+
+                x_top_left = x - w / 2.
+                y_top_left = y - h / 2.
+                # create all False
+                flag = np.random.random(cls_emb.shape) > sys.maxsize
+                for i in range(flag.shape[0]):
+                    c = cls_argmax[i]
+                    flag[i, c] = True
+                confidence = cls_emb[flag] * conf
+                for x_lefti, y_lefti, wi, hi, confi, clsi in zip(x_top_left, y_top_left, w, h, confidence, cls_argmax):
+                    if confi < self.ignore_threshold:
+                        continue
+                    if img_id not in self.results:
+                        self.results[img_id] = defaultdict(list)
+                    x_lefti = max(0, x_lefti)
+                    y_lefti = max(0, y_lefti)
+                    wi = min(wi, ori_w)
+                    hi = min(hi, ori_h)
+                    # transform catId to match coco
+                    coco_clsi = self.coco_catids[clsi]
+                    self.results[img_id][coco_clsi].append([x_lefti, y_lefti, wi, hi, confi])
+
+
+
+class EvalCallBack(Callback):
+    """
+    Evaluation callback when training.
+
+    Args:
+        eval_function (function): evaluation function.
+        eval_param_dict (dict): evaluation parameters' configure dict.
+        interval (int): run evaluation interval, default is 1.
+        eval_start_epoch (int): evaluation start epoch, default is 1.
+        save_best_ckpt (bool): Whether to save best checkpoint, default is True.
+        besk_ckpt_name (str): bast checkpoint name, default is `best.ckpt`.
+        metrics_name (str): evaluation metrics name, default is `acc`.
+
+    Returns:
+        None
+
+    Examples:
+        >>> EvalCallBack(eval_function, eval_param_dict)
+    """
+
+    def __init__(self, eval_function, eval_param_dict, interval=1, eval_start_epoch=1, save_best_ckpt=True,
+                 ckpt_directory="./", besk_ckpt_name="best.ckpt", metrics_name="acc"):
+        super(EvalCallBack, self).__init__()
+        self.eval_param_dict = eval_param_dict
+        self.eval_function = eval_function
+        self.eval_start_epoch = eval_start_epoch
+        if interval < 1:
+            raise ValueError("interval should >= 1.")
+        self.interval = interval
+        self.save_best_ckpt = save_best_ckpt
+        self.best_res = 0
+        self.best_epoch = 0
+        if not os.path.isdir(ckpt_directory):
+            os.makedirs(ckpt_directory)
+        self.bast_ckpt_path = os.path.join(ckpt_directory, besk_ckpt_name)
+        self.metrics_name = metrics_name
+
+    def remove_ckpoint_file(self, file_name):
+        """Remove the specified checkpoint file from this checkpoint manager and also from the directory."""
+        try:
+            os.chmod(file_name, stat.S_IWRITE)
+            os.remove(file_name)
+        except OSError:
+            logger.warning("OSError, failed to remove the older ckpt file %s.", file_name)
+        except ValueError:
+            logger.warning("ValueError, failed to remove the older ckpt file %s.", file_name)
+
+    def epoch_end(self, run_context):
+        """Callback when epoch end."""
+        cb_params = run_context.original_args()
+        cur_epoch = cb_params.cur_epoch_num
+        if cur_epoch >= self.eval_start_epoch and (cur_epoch - self.eval_start_epoch) % self.interval == 0:
+            res, res_map = self.eval_function(self.eval_param_dict)
+            print("epoch: {}, {}:\n {}".format(cur_epoch, self.metrics_name, res), flush=True)
+            if res_map >= self.best_res:
+                self.best_res = res_map
+                self.best_epoch = cur_epoch
+                print("update best result: {}".format(res_map), flush=True)
+                if self.save_best_ckpt:
+                    if os.path.exists(self.bast_ckpt_path):
+                        self.remove_ckpoint_file(self.bast_ckpt_path)
+                    save_checkpoint(cb_params.train_network, self.bast_ckpt_path)
+                    print("update best checkpoint at: {}".format(self.bast_ckpt_path), flush=True)
+
+    def end(self, run_context):
+        print("End training, the best {0} is: {1}, the best {0} epoch is {2}".format(self.metrics_name,
+                                                                                     self.best_res,
+                                                                                     self.best_epoch), flush=True)
+
+
+def apply_eval(eval_param_dict):
+    network = eval_param_dict["net"]
+    network.set_train(False)
+    ds = eval_param_dict["dataset"]
+    data_size = eval_param_dict["data_size"]
+    args = eval_param_dict["args"]
+    detection = DetectionEngine(args)
+    for index, data in enumerate(ds.create_dict_iterator(num_epochs=1)):
+        image = data["image"]
+        image_shape_ = data["image_shape"]
+        image_id_ = data["img_id"]
+        prediction = network(image)
+        output_big, output_me, output_small = prediction
+        output_big = output_big.asnumpy()
+        output_me = output_me.asnumpy()
+        output_small = output_small.asnumpy()
+        image_id_ = image_id_.asnumpy()
+        image_shape_ = image_shape_.asnumpy()
+
+        detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape_, image_id_)
+        if index % 100 == 0:
+            print('Processing... {:.2f}% '.format(index * args.per_batch_size / data_size * 100))
+
+    print('Calculating mAP...')
+    detection.do_nms_for_results()
+    result_file_path = detection.write_result()
+    print('result file path: {}'.format(result_file_path))
+    eval_result = detection.get_eval_result()
+    return eval_result
diff --git a/model_zoo/official/cv/yolov4/src/util.py b/model_zoo/official/cv/yolov4/src/util.py
index de89635433..40381914bc 100644
--- a/model_zoo/official/cv/yolov4/src/util.py
+++ b/model_zoo/official/cv/yolov4/src/util.py
@@ -57,6 +57,7 @@ class AverageMeter:
 def load_backbone(net, ckpt_path, args):
     """Load cspdarknet53 backbone checkpoint."""
     param_dict = load_checkpoint(ckpt_path)
+    param_dict = {key.split("network.")[-1]: value for key, value in param_dict.items()}
     yolo_backbone_prefix = 'feature_map.backbone'
     darknet_backbone_prefix = 'backbone'
     find_param = []
diff --git a/model_zoo/official/cv/yolov4/src/yolo.py b/model_zoo/official/cv/yolov4/src/yolo.py
index b9c9c422df..52176f87cf 100644
--- a/model_zoo/official/cv/yolov4/src/yolo.py
+++ b/model_zoo/official/cv/yolov4/src/yolo.py
@@ -220,7 +220,7 @@ class DetectionBlock(nn.Cell):
          DetectionBlock(scale='l',stride=32)
      """
 
-    def __init__(self, scale, config=ConfigYOLOV4CspDarkNet53(), is_training=True):
+    def __init__(self, scale, config=ConfigYOLOV4CspDarkNet53()):
         super(DetectionBlock, self).__init__()
         self.config = config
         if scale == 's':
@@ -246,7 +246,6 @@ class DetectionBlock(nn.Cell):
         self.reshape = P.Reshape()
         self.tile = P.Tile()
         self.concat = P.Concat(axis=-1)
-        self.conf_training = is_training
 
     def construct(self, x, input_shape):
         """construct method"""
@@ -286,7 +285,7 @@ class DetectionBlock(nn.Cell):
         box_confidence = self.sigmoid(box_confidence)
         box_probs = self.sigmoid(box_probs)
 
-        if self.conf_training:
+        if self.training:
             return prediction, box_xy, box_wh
         return self.concat((box_xy, box_wh, box_confidence, box_probs))
 
@@ -430,9 +429,10 @@ class YOLOV4CspDarkNet53(nn.Cell):
         YOLOV4CspDarkNet53(True)
     """
 
-    def __init__(self, is_training):
+    def __init__(self):
         super(YOLOV4CspDarkNet53, self).__init__()
         self.config = ConfigYOLOV4CspDarkNet53()
+        self.test_img_shape = Tensor(tuple(self.config.test_img_shape), ms.float32)
 
         # YOLOv4 network
         self.feature_map = YOLOv4(backbone=CspDarkNet53(ResidualBlock, detect=True),
@@ -440,11 +440,13 @@ class YOLOV4CspDarkNet53(nn.Cell):
                                   out_channel=self.config.out_channel)
 
         # prediction on the default anchor boxes
-        self.detect_1 = DetectionBlock('l', is_training=is_training)
-        self.detect_2 = DetectionBlock('m', is_training=is_training)
-        self.detect_3 = DetectionBlock('s', is_training=is_training)
+        self.detect_1 = DetectionBlock('l')
+        self.detect_2 = DetectionBlock('m')
+        self.detect_3 = DetectionBlock('s')
 
-    def construct(self, x, input_shape):
+    def construct(self, x, input_shape=None):
+        if input_shape is None:
+            input_shape = self.test_img_shape
         big_object_output, medium_object_output, small_object_output = self.feature_map(x)
         output_big = self.detect_1(big_object_output, input_shape)
         output_me = self.detect_2(medium_object_output, input_shape)
diff --git a/model_zoo/official/cv/yolov4/test.py b/model_zoo/official/cv/yolov4/test.py
index 743edfd911..8d350bca03 100644
--- a/model_zoo/official/cv/yolov4/test.py
+++ b/model_zoo/official/cv/yolov4/test.py
@@ -27,7 +27,6 @@ from mindspore import Tensor
 from mindspore.context import ParallelMode
 from mindspore.communication.management import init, get_rank, get_group_size
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
-import mindspore as ms
 
 from src.yolo import YOLOV4CspDarkNet53
 from src.logger import get_logger
@@ -271,7 +270,7 @@ def test():
     context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1)
 
     args.logger.info('Creating Network....')
-    network = YOLOV4CspDarkNet53(is_training=False)
+    network = YOLOV4CspDarkNet53()
 
     args.logger.info(args.pretrained)
     if os.path.isfile(args.pretrained):
@@ -311,7 +310,6 @@ def test():
     # init detection engine
     detection = DetectionEngine(args)
 
-    input_shape = Tensor(tuple(config.test_img_shape), ms.float32)
     args.logger.info('Start inference....')
     for i, data in enumerate(ds.create_dict_iterator()):
         image = Tensor(data["image"])
@@ -319,7 +317,7 @@ def test():
         image_shape = Tensor(data["image_shape"])
         image_id = Tensor(data["img_id"])
 
-        prediction = network(image, input_shape)
+        prediction = network(image)
         output_big, output_me, output_small = prediction
         output_big = output_big.asnumpy()
         output_me = output_me.asnumpy()
diff --git a/model_zoo/official/cv/yolov4/train.py b/model_zoo/official/cv/yolov4/train.py
index adb4c08463..15662f9585 100644
--- a/model_zoo/official/cv/yolov4/train.py
+++ b/model_zoo/official/cv/yolov4/train.py
@@ -41,10 +41,10 @@ from src.yolo_dataset import create_yolo_dataset
 from src.initializer import default_recurisive_init, load_yolov4_params
 from src.config import ConfigYOLOV4CspDarkNet53
 from src.util import keep_loss_fp32
+from src.eval_utils import apply_eval, EvalCallBack
 
 set_seed(1)
 
-
 parser = argparse.ArgumentParser('mindspore coco training')
 
 # device related
@@ -109,7 +109,18 @@ parser.add_argument('--training_shape', type=str, default="", help='Fix training
 parser.add_argument('--resize_rate', type=int, default=10,
                     help='Resize rate for multi-scale training. Default: None')
 
+parser.add_argument("--run_eval", type=ast.literal_eval, default=False,
+                    help="Run evaluation when training, default is False.")
+parser.add_argument("--save_best_ckpt", type=ast.literal_eval, default=True,
+                    help="Save best checkpoint when run_eval is True, default is True.")
+parser.add_argument("--eval_start_epoch", type=int, default=200,
+                    help="Evaluation start epoch when run_eval is True, default is 200.")
+parser.add_argument("--eval_interval", type=int, default=1,
+                    help="Evaluation interval when run_eval is True, default is 1.")
+parser.add_argument('--ann_file', type=str, default='', help='path to annotation')
+
 args, _ = parser.parse_known_args()
+
 if args.lr_scheduler == 'cosine_annealing' and args.max_epoch > args.t_max:
     args.t_max = args.max_epoch
 
@@ -117,6 +128,13 @@ args.lr_epochs = list(map(int, args.lr_epochs.split(',')))
 args.data_root = os.path.join(args.data_dir, 'train2017')
 args.annFile = os.path.join(args.data_dir, 'annotations/instances_train2017.json')
 
+args.data_val_root = os.path.join(args.data_dir, 'val2017')
+args.ann_val_file = os.path.join(args.data_dir, 'annotations/instances_val2017.json')
+
+config = ConfigYOLOV4CspDarkNet53()
+args.nms_thresh = config.nms_thresh
+args.ignore_threshold = config.eval_ignore_threshold
+
 device_id = int(os.getenv('DEVICE_ID', '0'))
 context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
                     device_target=args.device_target, save_graphs=False, device_id=device_id)
@@ -141,7 +159,7 @@ if args.is_save_on_master:
 else:
     args.rank_save_ckpt_flag = 1
 
- # logger
+# logger
 args.outputs_dir = os.path.join(args.ckpt_path,
                                 datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
 args.logger = get_logger(args.outputs_dir, args.rank)
@@ -176,9 +194,9 @@ if __name__ == "__main__":
         degree = get_group_size()
     context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)
 
-    network = YOLOV4CspDarkNet53(is_training=True)
+    network = YOLOV4CspDarkNet53()
+    network_eval = network
     # default is kaiming-normal
-    config = ConfigYOLOV4CspDarkNet53()
     args.checkpoint_filter_list = config.checkpoint_filter_list
     default_recurisive_init(network)
     load_yolov4_params(args, network)
@@ -222,27 +240,44 @@ if __name__ == "__main__":
         network = TrainingWrapper(network, opt)
         network.set_train()
 
-    if args.rank_save_ckpt_flag:
-        # checkpoint save
-        ckpt_max_num = 10
-        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
-                                       keep_checkpoint_max=ckpt_max_num)
+    # checkpoint save
+    ckpt_max_num = 10
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
+                                   keep_checkpoint_max=ckpt_max_num)
+    save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
+    ckpt_cb = ModelCheckpoint(config=ckpt_config,
+                              directory=save_ckpt_path,
+                              prefix='{}'.format(args.rank))
+    cb_params = _InternalCallbackParam()
+    cb_params.train_network = network
+    cb_params.epoch_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
+    cb_params.cur_epoch_num = 1
+    run_context = RunContext(cb_params)
+    ckpt_cb.begin(run_context)
+
+    if args.run_eval:
+        rank_id = int(os.environ.get('RANK_ID')) if os.environ.get('RANK_ID') else 0
+        data_val_root = args.data_val_root
+        ann_val_file = args.ann_val_file
         save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
-        ckpt_cb = ModelCheckpoint(config=ckpt_config,
-                                  directory=save_ckpt_path,
-                                  prefix='{}'.format(args.rank))
-        cb_params = _InternalCallbackParam()
-        cb_params.train_network = network
-        cb_params.epoch_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
-        cb_params.cur_epoch_num = 1
-        run_context = RunContext(cb_params)
-        ckpt_cb.begin(run_context)
+        input_val_shape = Tensor(tuple(config.test_img_shape), ms.float32)
+        # init detection engine
+        eval_dataset, eval_data_size = create_yolo_dataset(data_val_root, ann_val_file, is_training=False,
+                                                           batch_size=args.per_batch_size, max_epoch=1, device_num=1,
+                                                           rank=0, shuffle=False, config=config)
+        eval_param_dict = {"net": network_eval, "dataset": eval_dataset, "data_size": eval_data_size,
+                           "anno_json": ann_val_file, "input_shape": input_val_shape, "args": args}
+        eval_cb = EvalCallBack(apply_eval, eval_param_dict, interval=args.eval_interval,
+                               eval_start_epoch=args.eval_start_epoch, save_best_ckpt=True,
+                               ckpt_directory=save_ckpt_path, besk_ckpt_name="best_map.ckpt",
+                               metrics_name="mAP")
 
     old_progress = -1
     t_end = time.time()
     data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)
 
     for i, data in enumerate(data_loader):
+        network.set_train()
         images = data["image"]
         input_shape = images.shape[2:4]
         args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))
@@ -261,24 +296,24 @@ if __name__ == "__main__":
                        batch_gt_box2, input_shape)
         loss_meter.update(loss.asnumpy())
 
-        if args.rank_save_ckpt_flag:
-            # ckpt progress
-            cb_params.cur_step_num = i + 1  # current step number
-            cb_params.batch_num = i + 2
-            ckpt_cb.step_end(run_context)
+        # ckpt progress
+        cb_params.cur_step_num = i + 1  # current step number
+        cb_params.batch_num = i + 2
+        ckpt_cb.step_end(run_context)
 
         if i % args.log_interval == 0:
             time_used = time.time() - t_end
             epoch = int(i / args.steps_per_epoch)
             fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used
-            if args.rank == 0:
-                args.logger.info(
-                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i]))
+            print('epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i]))
             t_end = time.time()
             loss_meter.reset()
             old_progress = i
 
-        if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag:
+        if args.run_eval and (i + 1) % args.steps_per_epoch == 0:
+            eval_cb.epoch_end(run_context)
+
+        if (i + 1) % args.steps_per_epoch == 0:
             cb_params.cur_epoch_num += 1
 
         if args.need_profiler: