You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

opr_footprint.cpp 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. #include "megbrain/opr/basic_arith.h"
  2. #include "megbrain/opr/basic_arith_wrapper.h"
  3. #include "megbrain/opr/blas.h"
  4. #include "megbrain/opr/dnn/convolution.h"
  5. #include "megbrain/opr/dnn/pooling.h"
  6. #include "megbrain/opr/io.h"
  7. #include "megbrain/opr/tensor_manip.h"
  8. #include "megbrain/plugin/profiler.h"
  9. #include "megbrain/test/helper.h"
  10. #include "megbrain/utils/json.h"
  11. using namespace mgb;
  12. namespace {
  13. json::Object& visit_json_obj(json::Object& obj, const std::string& key) {
  14. auto&& val = obj[key];
  15. mgb_assert(val, "key %s not found", key.c_str());
  16. return static_cast<json::Object&>(*val);
  17. };
  18. void compile_and_run(
  19. std::shared_ptr<ComputingGraph> graph, SymbolVar& out, HostTensorND& host_out,
  20. uint64_t computation, uint64_t memory) {
  21. graph->options().graph_opt_level = 0;
  22. auto func = graph->compile({make_callback_copy(out, host_out)});
  23. auto profiler = std::make_shared<GraphProfiler>(graph.get());
  24. func->execute();
  25. host_out.sync();
  26. auto&& opr = out.node()->owner_opr();
  27. auto root_ptr = profiler->to_json();
  28. auto&& json_rst = *root_ptr;
  29. auto&& opr_fp_rst = visit_json_obj(json_rst, "opr_footprint");
  30. auto&& opr_fp_item = visit_json_obj(opr_fp_rst, opr->id_str());
  31. uint64_t mem_rst =
  32. static_cast<json::NumberInt*>(opr_fp_item["memory"].get())->get_impl();
  33. uint64_t comp_rst =
  34. static_cast<json::NumberInt*>(opr_fp_item["computation"].get())->get_impl();
  35. ASSERT_EQ(memory, mem_rst);
  36. ASSERT_EQ(computation, comp_rst);
  37. }
  38. template <typename Func, typename DType, typename Param>
  39. void run_test(
  40. Func func, std::initializer_list<size_t>&& host_x_shape,
  41. std::initializer_list<size_t>&& host_y_shape,
  42. std::initializer_list<size_t>&& host_z_shape, uint64_t computation,
  43. uint64_t nr_elems, DType dtype, const Param& param,
  44. CompNode cn = CompNode::load("xpux")) {
  45. HostTensorGenerator<DType> gen;
  46. auto host_x = gen(host_x_shape, cn);
  47. auto host_y = gen(host_y_shape, cn);
  48. auto host_z = gen(host_z_shape, cn);
  49. auto graph = ComputingGraph::make();
  50. SymbolVar x = opr::SharedDeviceTensor::make(*graph, *host_x.get()).rename("x"),
  51. y = opr::SharedDeviceTensor::make(*graph, *host_y.get()).rename("y"),
  52. z = opr::SharedDeviceTensor::make(*graph, *host_z.get()).rename("z"),
  53. f = func(x, y, z, param);
  54. HostTensorND host_f;
  55. compile_and_run(graph, f, host_f, computation, dtype.size(nr_elems));
  56. }
  57. template <class Param, typename Func>
  58. void test_conv_group(
  59. size_t n, size_t ic, size_t oc, size_t ih, size_t iw, size_t fh, size_t fw,
  60. size_t ph, size_t pw, size_t sh, size_t sw, Func func) {
  61. Param param;
  62. size_t ow = (iw + 2 * pw - fw) / sw + 1;
  63. size_t oh = (ih + 2 * ph - fh) / sh + 1;
  64. uint64_t computation = n * ic * oc * ow * oh * fw * fh * 2;
  65. uint64_t memory = n * ic * ih * iw + oc * ic * fw * fh + n * oc * oh * ow;
  66. param.stride_h = sh;
  67. param.stride_w = sw;
  68. param.pad_h = ph;
  69. param.pad_w = pw;
  70. run_test(
  71. func, {n, ic, ih, iw}, {oc, ic, fh, fw}, {n, oc, oh, ow}, computation,
  72. memory, dtype::Float32(), param);
  73. };
  74. template <class Param, typename Func>
  75. void test_conv_bias_group_nchw4(
  76. size_t n, size_t ic, size_t oc, size_t ih, size_t iw, size_t fh, size_t fw,
  77. size_t ph, size_t pw, size_t sh, size_t sw, Func func, size_t group) {
  78. Param param;
  79. size_t ow = (iw + 2 * pw - fw) / sw + 1;
  80. size_t oh = (ih + 2 * ph - fh) / sh + 1;
  81. uint64_t computation =
  82. (n * ic * oc * ow * oh * fw * fh * 2 + n * oc * ow * oh) * group;
  83. uint64_t memory =
  84. (n * ic * ih * iw + oc * ic * fw * fh + n * oc * oh * ow + 4 * oc) * group;
  85. param.stride_h = sh;
  86. param.stride_w = sw;
  87. param.pad_h = ph;
  88. param.pad_w = pw;
  89. param.format = Param::Format::NCHW4;
  90. if (group == 1) {
  91. run_test(
  92. func, {n, group * ic / 4, ih, iw, 4}, {oc, ic / 4, fh, fw, 4},
  93. {1, oc * group / 4, 1, 1, 4}, computation, memory,
  94. dtype::QuantizedS8(1.0f), param, CompNode::load("cpux"));
  95. } else {
  96. param.sparse = Param::Sparse::GROUP;
  97. run_test(
  98. func, {n, group * ic / 4, ih, iw, 4}, {group, oc, ic / 4, fh, fw, 4},
  99. {1, oc * group / 4, 1, 1, 4}, computation, memory,
  100. dtype::QuantizedS8(1.0f), param, CompNode::load("cpux"));
  101. }
  102. }
  103. } // namespace
  104. TEST(TestOprFootprint, Elemwise) {
  105. using Param = opr::Elemwise::Param;
  106. auto test_elemwise_group = [](Param::Mode mode, size_t nr_inputs, size_t k) {
  107. auto func = [&nr_inputs](
  108. SymbolVar x, SymbolVar y, SymbolVar z,
  109. const Param& param = {}) {
  110. SymbolVarArray inputs{x, y, z};
  111. inputs.resize(nr_inputs);
  112. return opr::Elemwise::make(inputs, param);
  113. };
  114. Param param;
  115. param.mode = mode;
  116. run_test(
  117. func, {2, 3, 3}, {2, 3, 3}, {2, 3, 3}, 18 * k, 18 * (nr_inputs + 1),
  118. dtype::Float32(), param);
  119. auto mem = 30 * (nr_inputs + 1);
  120. if (nr_inputs == 3)
  121. mem -= 2 * 3 * 4;
  122. run_test(
  123. func, {2, 5, 3}, {2, 5, 3}, {2, 1, 3}, 30 * k, mem, dtype::Int32(),
  124. param);
  125. };
  126. test_elemwise_group(Param::Mode::SIGMOID, 1, 1);
  127. test_elemwise_group(Param::Mode::ADD, 2, 1);
  128. test_elemwise_group(Param::Mode::FUSE_MUL_ADD3, 3, 2);
  129. }
  130. TEST(TestOprFootprint, AddUpdate) {
  131. using Param = opr::AddUpdate::Param;
  132. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param = {}) {
  133. return opr::AddUpdate::make(x, y, param);
  134. };
  135. Param param;
  136. run_test(func, {2, 3, 3}, {2, 3, 3}, {0}, 18 * 3, 18 * 3, dtype::Float32(), param);
  137. run_test(func, {2, 3, 5}, {2, 3, 5}, {0}, 30 * 3, 30 * 3, dtype::Int16(), param);
  138. }
  139. TEST(TestOprFootprint, ConvolutionForward) {
  140. using OprType = opr::ConvolutionForward;
  141. using Param = OprType::Param;
  142. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param) {
  143. return OprType::make(x, y, param);
  144. };
  145. REQUIRE_GPU(1);
  146. test_conv_group<Param, decltype(func)>
  147. // n, ic, oc, ih, iw, fh, fw, ph, pw, sh, sw
  148. (10, 3, 2, 24, 24, 3, 3, 1, 1, 3, 3, func);
  149. test_conv_group<Param, decltype(func)>(20, 4, 3, 48, 24, 3, 5, 2, 2, 2, 2, func);
  150. }
  151. TEST(TestOprFootprint, ConvolutionBackwardData) {
  152. using OprType = opr::ConvolutionBackwardData;
  153. using Param = OprType::Param;
  154. auto func = [](SymbolVar src_for_shp, SymbolVar filter, SymbolVar diff,
  155. const Param& param) {
  156. return OprType::make(filter, diff, src_for_shp, param);
  157. };
  158. // n, ic, oc, ih, iw, fh, fw, ph, pw, sh, sw
  159. test_conv_group<opr::ConvolutionForward::Param, decltype(func)>(
  160. 10, 3, 2, 24, 24, 3, 3, 1, 1, 3, 3, func);
  161. test_conv_group<opr::ConvolutionForward::Param, decltype(func)>(
  162. 20, 4, 3, 48, 24, 3, 5, 2, 2, 2, 2, func);
  163. }
  164. TEST(TestOprFootprint, ConvolutionBackwardFilter) {
  165. using OprType = opr::ConvolutionBackwardFilter;
  166. using Param = OprType::Param;
  167. auto func = [](SymbolVar src, SymbolVar filter, SymbolVar diff,
  168. const Param& param) {
  169. return OprType::make(src, diff, filter, param);
  170. };
  171. // n, ic, oc, ih, iw, fh, fw, ph, pw, sh, sw
  172. test_conv_group<Param, decltype(func)>(10, 3, 2, 24, 24, 3, 3, 1, 1, 3, 3, func);
  173. test_conv_group<Param, decltype(func)>(20, 4, 3, 48, 24, 3, 5, 2, 2, 2, 2, func);
  174. }
  175. TEST(TestOprFootprint, MatrixMul) {
  176. using OprType = opr::MatrixMul;
  177. using Param = OprType::Param;
  178. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param) {
  179. return OprType::make(x, y, param);
  180. };
  181. run_test(
  182. func, {3, 5}, {5, 7}, {0}, 3 * 5 * 7 * 2, 3 * 5 + 5 * 7 + 3 * 7,
  183. dtype::Float32(), Param{});
  184. run_test(
  185. func, {7, 3}, {8, 7}, {0}, 3 * 7 * 8 * 2, 3 * 7 + 8 * 7 + 3 * 8,
  186. dtype::Float32(), Param{true, true});
  187. }
  188. TEST(TestOprFootprint, PoolingForward) {
  189. using OprType = opr::PoolingForward;
  190. using Param = OprType::Param;
  191. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param) {
  192. return OprType::make(x, param);
  193. };
  194. Param param;
  195. param.window_h = param.stride_h = 2;
  196. param.window_w = param.stride_w = 3;
  197. run_test(
  198. func, {10, 7, 8, 6}, {0}, {0}, 10 * 7 * 8 * 6, 10 * 7 * (8 * 6 + 4 * 3),
  199. dtype::Float32(), Param{});
  200. }
  201. TEST(TestOprFootprint, Concat) {
  202. using OprType = opr::Concat;
  203. using Param = OprType::Param;
  204. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param) {
  205. return OprType::make({x, y, z}, param.axis);
  206. };
  207. Param param;
  208. run_test(
  209. func, {1, 3, 5}, {2, 3, 5}, {3, 3, 5}, 6 * 3 * 5, 6 * 3 * 5 * 2,
  210. dtype::Float32(), param);
  211. }
  212. TEST(TestOprFootprint, Reduce) {
  213. using OprType = opr::Reduce;
  214. using Param = OprType::Param;
  215. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param) {
  216. return OprType::make(x, param);
  217. };
  218. Param param;
  219. param.axis = 1;
  220. run_test(
  221. func, {5, 3, 3}, {0}, {0}, 5 * 3 * 3, 5 * 3 * 3 + 5 * 3, dtype::Float32(),
  222. param);
  223. }
  224. TEST(TestOprFootprint, Dimshuffle) {
  225. using OprType = opr::Dimshuffle;
  226. using Param = OprType::Param;
  227. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param) {
  228. return OprType::make(x, {1, 2, 0}, 0);
  229. };
  230. run_test(
  231. func, {2, 3, 5}, {3, 5, 2}, {0}, 2 * 3 * 5, 2 * 3 * 5 * 2, dtype::Float32(),
  232. Param());
  233. }
  234. TEST(TestOprFootprint, Host2DeviceCopy) {
  235. using OprType = opr::Host2DeviceCopy;
  236. REQUIRE_GPU(1);
  237. auto&& cpu = CompNode::load("cpu1");
  238. auto float32 = dtype::Float32();
  239. auto data = std::make_shared<HostTensorND>(HostTensorND(cpu, {2, 3, 5}, float32));
  240. auto graph = ComputingGraph::make();
  241. auto out_var = OprType::make_no_value_infer(*graph.get(), data);
  242. HostTensorND host_out(cpu, float32);
  243. compile_and_run(graph, out_var, host_out, 2 * 3 * 5, float32.size(2 * 3 * 5));
  244. }
  245. TEST(TestOprFootprint, NCHW4Convolution) {
  246. using OprType = opr::ConvBias;
  247. using Param = OprType::Param;
  248. auto func = [](SymbolVar x, SymbolVar y, SymbolVar z, const Param& param) {
  249. x = opr::TypeCvt::make(x, dtype::QuantizedS8(1.3f));
  250. y = opr::TypeCvt::make(y, dtype::QuantizedS8(1.4f));
  251. z = opr::TypeCvt::make(z, dtype::QuantizedS32(1.3f * 1.4f));
  252. return OprType::make(
  253. x, y, z, param, {}, OperatorNodeConfig{dtype::QuantizedS8(0.6f)});
  254. };
  255. test_conv_bias_group_nchw4<Param, decltype(func)>(
  256. 10, 4, 8, 24, 24, 3, 3, 1, 1, 3, 3, func, 1);
  257. test_conv_bias_group_nchw4<Param, decltype(func)>(
  258. 20, 4, 4, 48, 24, 3, 5, 2, 3, 2, 1, func, 4);
  259. }
  260. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}