You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

YOLOv3.cs 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. using NumSharp;
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using Tensorflow;
  6. using static Tensorflow.Binding;
  7. namespace TensorFlowNET.Examples.ImageProcessing.YOLO
  8. {
  9. public class YOLOv3
  10. {
  11. Config cfg;
  12. Tensor trainable;
  13. Tensor input_data;
  14. Dictionary<int, string> classes;
  15. int num_class;
  16. NDArray strides;
  17. NDArray anchors;
  18. int anchor_per_scale;
  19. float iou_loss_thresh;
  20. string upsample_method;
  21. Tensor conv_lbbox;
  22. Tensor conv_mbbox;
  23. Tensor conv_sbbox;
  24. Tensor pred_sbbox;
  25. public YOLOv3(Config cfg_, Tensor input_data_, Tensor trainable_)
  26. {
  27. cfg = cfg_;
  28. input_data = input_data_;
  29. trainable = trainable_;
  30. classes = Utils.read_class_names(cfg.YOLO.CLASSES);
  31. num_class = len(classes);
  32. strides = np.array(cfg.YOLO.STRIDES);
  33. anchors = Utils.get_anchors(cfg.YOLO.ANCHORS);
  34. anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE;
  35. iou_loss_thresh = cfg.YOLO.IOU_LOSS_THRESH;
  36. upsample_method = cfg.YOLO.UPSAMPLE_METHOD;
  37. (conv_lbbox, conv_mbbox, conv_sbbox) = __build_nework(input_data);
  38. tf_with(tf.variable_scope("pred_sbbox"), scope =>
  39. {
  40. pred_sbbox = decode(conv_sbbox, anchors[0], strides[0]);
  41. });
  42. tf_with(tf.variable_scope("pred_mbbox"), scope =>
  43. {
  44. pred_sbbox = decode(conv_sbbox, anchors[0], strides[0]);
  45. });
  46. tf_with(tf.variable_scope("pred_lbbox"), scope =>
  47. {
  48. pred_sbbox = decode(conv_sbbox, anchors[0], strides[0]);
  49. });
  50. }
  51. private (Tensor, Tensor, Tensor) __build_nework(Tensor input_data)
  52. {
  53. Tensor route_1, route_2;
  54. (route_1, route_2, input_data) = backbone.darknet53(input_data, trainable);
  55. input_data = common.convolutional(input_data, new[] { 1, 1, 1024, 512 }, trainable, "conv52");
  56. input_data = common.convolutional(input_data, new[] { 3, 3, 512, 1024 }, trainable, "conv53");
  57. input_data = common.convolutional(input_data, new[] { 1, 1, 1024, 512 }, trainable, "conv54");
  58. input_data = common.convolutional(input_data, new[] { 3, 3, 512, 1024 }, trainable, "conv55");
  59. input_data = common.convolutional(input_data, new[] { 1, 1, 1024, 512 }, trainable, "conv56");
  60. var conv_lobj_branch = common.convolutional(input_data, new[] { 3, 3, 512, 1024 }, trainable, name: "conv_lobj_branch");
  61. var conv_lbbox = common.convolutional(conv_lobj_branch, new[] { 1, 1, 1024, 3 * (num_class + 5) },
  62. trainable: trainable, name: "conv_lbbox", activate: false, bn: false);
  63. input_data = common.convolutional(input_data, new[] { 1, 1, 512, 256 }, trainable, "conv57");
  64. input_data = common.upsample(input_data, name: "upsample0", method: upsample_method);
  65. tf_with(tf.variable_scope("route_1"), delegate
  66. {
  67. input_data = tf.concat(new[] { input_data, route_2 }, axis: -1);
  68. });
  69. input_data = common.convolutional(input_data, new[] { 1, 1, 768, 256 }, trainable, "conv58");
  70. input_data = common.convolutional(input_data, new[] { 3, 3, 256, 512 }, trainable, "conv59");
  71. input_data = common.convolutional(input_data, new[] { 1, 1, 512, 256 }, trainable, "conv60");
  72. input_data = common.convolutional(input_data, new[] { 3, 3, 256, 512 }, trainable, "conv61");
  73. input_data = common.convolutional(input_data, new[] { 1, 1, 512, 256 }, trainable, "conv62");
  74. var conv_mobj_branch = common.convolutional(input_data, new[] { 3, 3, 256, 512 }, trainable, name: "conv_mobj_branch");
  75. conv_mbbox = common.convolutional(conv_mobj_branch, new[] { 1, 1, 512, 3 * (num_class + 5) },
  76. trainable: trainable, name: "conv_mbbox", activate: false, bn: false);
  77. input_data = common.convolutional(input_data, new[] { 1, 1, 256, 128 }, trainable, "conv63");
  78. input_data = common.upsample(input_data, name: "upsample1", method: upsample_method);
  79. tf_with(tf.variable_scope("route_2"), delegate
  80. {
  81. input_data = tf.concat(new[] { input_data, route_1 }, axis: -1);
  82. });
  83. input_data = common.convolutional(input_data, new[] { 1, 1, 384, 128 }, trainable, "conv64");
  84. input_data = common.convolutional(input_data, new[] { 3, 3, 128, 256 }, trainable, "conv65");
  85. input_data = common.convolutional(input_data, new[] { 1, 1, 256, 128 }, trainable, "conv66");
  86. input_data = common.convolutional(input_data, new[] { 3, 3, 128, 256 }, trainable, "conv67");
  87. input_data = common.convolutional(input_data, new[] { 1, 1, 256, 128 }, trainable, "conv68");
  88. var conv_sobj_branch = common.convolutional(input_data, new[] { 3, 3, 128, 256 }, trainable, name: "conv_sobj_branch");
  89. conv_sbbox = common.convolutional(conv_sobj_branch, new[] { 1, 1, 256, 3 * (num_class + 5) },
  90. trainable: trainable, name: "conv_sbbox", activate: false, bn: false);
  91. return (conv_lbbox, conv_mbbox, conv_sbbox);
  92. }
  93. private Tensor decode(Tensor conv_output, NDArray anchors, int stride)
  94. {
  95. var conv_shape = tf.shape(conv_output);
  96. var batch_size = conv_shape[0];
  97. var output_size = conv_shape[1];
  98. anchor_per_scale = len(anchors);
  99. conv_output = tf.reshape(conv_output, new object[] { batch_size, output_size, output_size, anchor_per_scale, 5 + num_class });
  100. var conv_raw_dxdy = conv_output[":", ":", ":", ":", "0:2"];
  101. var conv_raw_dwdh = conv_output[":", ":", ":", ":", "2:4"];
  102. var conv_raw_conf = conv_output[":", ":", ":", ":", "4:5"];
  103. var conv_raw_prob = conv_output[":", ":", ":", ":", "5:"];
  104. var y = tf.tile(tf.range(output_size, dtype: tf.int32)[":", tf.newaxis], new object[] { 1, output_size });
  105. var x = tf.tile(tf.range(output_size, dtype: tf.int32)[tf.newaxis, ":"], new object[] { output_size, 1 });
  106. var xy_grid = tf.concat(new[] { x[":", ":", tf.newaxis], y[":", ":", tf.newaxis] }, axis: -1);
  107. xy_grid = tf.tile(xy_grid[tf.newaxis, ":", ":", tf.newaxis, ":"], new object[] { batch_size, 1, 1, anchor_per_scale, 1 });
  108. xy_grid = tf.cast(xy_grid, tf.float32);
  109. var pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * stride;
  110. var pred_wh = (tf.exp(conv_raw_dwdh) * anchors) * stride;
  111. var pred_xywh = tf.concat(new[] { pred_xy, pred_wh }, axis: -1);
  112. var pred_conf = tf.sigmoid(conv_raw_conf);
  113. var pred_prob = tf.sigmoid(conv_raw_prob);
  114. return tf.concat(new[] { pred_xywh, pred_conf, pred_prob }, axis: -1);
  115. }
  116. public (Tensor, Tensor, Tensor) compute_loss(Tensor label_sbbox, Tensor label_mbbox, Tensor label_lbbox,
  117. Tensor true_sbbox, Tensor true_mbbox, Tensor true_lbbox)
  118. {
  119. Tensor giou_loss = null, conf_loss = null, prob_loss = null;
  120. (Tensor, Tensor, Tensor) loss_sbbox = (null, null, null);
  121. tf_with(tf.name_scope("smaller_box_loss"), delegate
  122. {
  123. loss_sbbox = loss_layer(conv_sbbox, pred_sbbox, label_sbbox, true_sbbox,
  124. anchors: anchors[0], stride: strides[0]);
  125. });
  126. return (giou_loss, conf_loss, prob_loss);
  127. }
  128. public (Tensor, Tensor, Tensor) loss_layer(Tensor conv, Tensor pred, Tensor label, Tensor bboxes, NDArray anchors, int stride)
  129. {
  130. var conv_shape = tf.shape(conv);
  131. var batch_size = conv_shape[0];
  132. var output_size = conv_shape[1];
  133. var input_size = stride * output_size;
  134. conv = tf.reshape(conv, new object[] {batch_size, output_size, output_size,
  135. anchor_per_scale, 5 + num_class });
  136. var conv_raw_conf = conv[":", ":", ":", ":", "4:5"];
  137. var conv_raw_prob = conv[":", ":", ":", ":", "5:"];
  138. var pred_xywh = pred[":", ":", ":", ":", "0:4"];
  139. var pred_conf = pred[":", ":", ":", ":", "4:5"];
  140. var label_xywh = label[":", ":", ":", ":", "0:4"];
  141. var respond_bbox = label[":", ":", ":", ":", "4:5"];
  142. var label_prob = label[":", ":", ":", ":", "5:"];
  143. var giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis: -1);
  144. input_size = tf.cast(input_size, tf.float32);
  145. var bbox_loss_scale = 2.0 - 1.0 * label_xywh[":", ":", ":", ":", "2:3"] * label_xywh[":", ":", ":", ":", "3:4"] / (tf.sqrt(input_size));
  146. var giou_loss = respond_bbox * bbox_loss_scale * (1 - giou);
  147. var iou = bbox_iou(pred_xywh[":", ":", ":", ":", tf.newaxis, ":"], bboxes[":", tf.newaxis, tf.newaxis, tf.newaxis, ":", ":"]);
  148. var max_iou = tf.expand_dims(tf.reduce_max(iou, axis: new[] { -1 }), axis: -1);
  149. var respond_bgd = (1.0 - respond_bbox) * tf.cast(max_iou < iou_loss_thresh, tf.float32);
  150. var conf_focal = focal(respond_bbox, pred_conf);
  151. var conf_loss = conf_focal * (
  152. respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels: respond_bbox, logits: conv_raw_conf) +
  153. respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels: respond_bbox, logits: conv_raw_conf));
  154. var prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels: label_prob, logits: conv_raw_prob);
  155. giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis: new[] { 1, 2, 3, 4 }));
  156. conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis: new[] { 1, 2, 3, 4 }));
  157. prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis: new[] { 1, 2, 3, 4 }));
  158. return (giou_loss, conf_loss, prob_loss);
  159. }
  160. public Tensor focal(Tensor target, Tensor actual, int alpha = 1, int gamma = 2)
  161. {
  162. var focal_loss = alpha * tf.pow(tf.abs(target - actual), gamma);
  163. return focal_loss;
  164. }
  165. public Tensor bbox_giou(Tensor boxes1, Tensor boxes2)
  166. {
  167. boxes1 = tf.concat(new[] { boxes1["...", ":2"] - boxes1["...", "2:"] * 0.5,
  168. boxes1["...", ":2"] + boxes1["...", "2:"] * 0.5}, axis: -1);
  169. boxes2 = tf.concat(new[] { boxes2["...", ":2"] - boxes2["...", "2:"] * 0.5,
  170. boxes2["...", ":2"] + boxes2["...", "2:"] * 0.5}, axis: -1);
  171. boxes1 = tf.concat(new[] { tf.minimum(boxes1["...", ":2"], boxes1["...", "2:"]),
  172. tf.maximum(boxes1["...", ":2"], boxes1["...", "2:"])}, axis: -1);
  173. boxes2 = tf.concat(new[] { tf.minimum(boxes2["...", ":2"], boxes2["...", "2:"]),
  174. tf.maximum(boxes2["...", ":2"], boxes2["...", "2:"])}, axis: -1);
  175. var boxes1_area = (boxes1["...", "2"] - boxes1["...", "0"]) * (boxes1["...", "3"] - boxes1["...", "1"]);
  176. var boxes2_area = (boxes2["...", "2"] - boxes2["...", "0"]) * (boxes2["...", "3"] - boxes2["...", "1"]);
  177. var left_up = tf.maximum(boxes1["...", ":2"], boxes2["...", ":2"]);
  178. var right_down = tf.minimum(boxes1["...", "2:"], boxes2["...", "2:"]);
  179. var inter_section = tf.maximum(right_down - left_up, 0.0f);
  180. var inter_area = inter_section["...", "0"] * inter_section["...", "1"];
  181. var union_area = boxes1_area + boxes2_area - inter_area;
  182. var iou = inter_area / union_area;
  183. var enclose_left_up = tf.minimum(boxes1["...", ":2"], boxes2["...", ":2"]);
  184. var enclose_right_down = tf.maximum(boxes1["...", "2:"], boxes2["...", "2:"]);
  185. var enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0);
  186. var enclose_area = enclose["...", "0"] * enclose["...", "1"];
  187. var giou = iou - 1.0 * (enclose_area - union_area) / enclose_area;
  188. return giou;
  189. }
  190. public Tensor bbox_iou(Tensor boxes1, Tensor boxes2)
  191. {
  192. var boxes1_area = boxes1["...", "2"] * boxes1["...", "3"];
  193. var boxes2_area = boxes2["...", "2"] * boxes2["...", "3"];
  194. boxes1 = tf.concat(new[] { boxes1["...", ":2"] - boxes1["...", "2:"] * 0.5,
  195. boxes1["...", ":2"] + boxes1["...", "2:"] * 0.5}, axis: -1);
  196. boxes2 = tf.concat(new[] { boxes2["...", ":2"] - boxes2["...", "2:"] * 0.5,
  197. boxes2["...", ":2"] + boxes2["...", "2:"] * 0.5}, axis: -1);
  198. var left_up = tf.maximum(boxes1["...", ":2"], boxes2["...", ":2"]);
  199. var right_down = tf.minimum(boxes1["...", "2:"], boxes2["...", "2:"]);
  200. var inter_section = tf.maximum(right_down - left_up, 0.0);
  201. var inter_area = inter_section["...", "0"] * inter_section["...", "1"];
  202. var union_area = boxes1_area + boxes2_area - inter_area;
  203. var iou = 1.0 * inter_area / union_area;
  204. return iou;
  205. }
  206. }
  207. }