You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

comp_node_helper.cpp 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /**
  2. * \file src/core/test/comp_node_helper.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "./comp_node_helper.h"
  12. #include "megbrain/opr/basic_arith_wrapper.h"
  13. #include "megbrain/opr/io.h"
  14. #include "megbrain/opr/tensor_manip.h"
  15. #include "megbrain/opr/utility.h"
  16. #include "megbrain/serialization/serializer.h"
  17. using namespace mgb;
  18. using namespace comp_node_test;
  19. namespace {
  20. void run_comp_seq_rec_basic(CompNode cn, bool fake_first) {
  21. using ConvParam = opr::Convolution::Param;
  22. ConvParam param;
  23. param.sparse = ConvParam::Sparse::GROUP;
  24. HostTensorGenerator<> gen;
  25. auto host_x = gen({3, 4, 10, 8}, cn), host_y = gen({2, 3, 2, 3, 3}, cn);
  26. int iter = 0;
  27. std::vector<int> executed;
  28. HostTensorND host_z;
  29. auto graph = ComputingGraph::make();
  30. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  31. y = opr::Host2DeviceCopy::make(*graph, host_y),
  32. z = opr::CallbackInjector::make(
  33. opr::Convolution::make(x, y, param),
  34. [&](DeviceTensorND&dv) { executed.push_back(iter); });
  35. graph->options().comp_node_seq_record_level = 1;
  36. if (fake_first) {
  37. graph->options().fake_next_exec = true;
  38. graph->options().var_sanity_check_first_run = false;
  39. }
  40. auto func = graph->compile({make_callback_copy(z, host_z)});
  41. if (fake_first) {
  42. func->execute(); // first exec
  43. }
  44. int change = 5;
  45. for (; iter < 10; ++iter) {
  46. if (iter == change) {
  47. *host_x = *gen({2, 4, 15, 13}, cn);
  48. }
  49. host_x->copy_from_fixlayout(*gen(host_x->shape(), cn));
  50. func->execute();
  51. auto expect = eval_conv_cpu<opr::Convolution>(*host_x, *host_y, param);
  52. MGB_ASSERT_TENSOR_NEAR(expect, host_z, 1e-3) << "iter " << iter;
  53. }
  54. ASSERT_EQ(executed.size(), 4u);
  55. // if fake_first, both warmup exec and exec with recorder will perform in
  56. // iter0 else, normal exec will perform in iter0 and exec with recorder in
  57. // iter1
  58. ASSERT_EQ(executed[0], 0);
  59. ASSERT_EQ(executed[1], fake_first ? 0 : 1);
  60. // recorder would be reset, normal exec
  61. ASSERT_EQ(executed[2], change);
  62. // create new recorder, exec with recorder
  63. ASSERT_EQ(executed[3], change + 1);
  64. }
  65. void run_comp_seq_rec_basic_level2(CompNode cn) {
  66. using ConvParam = opr::ConvBias::Param;
  67. ConvParam param;
  68. param.sparse = ConvParam::Sparse::GROUP;
  69. HostTensorGenerator<> gen;
  70. auto host_x = gen({3, 4, 10, 8}, cn), host_y = gen({2, 3, 2, 3, 3}, cn);
  71. int iter = 0;
  72. std::vector<int> executed;
  73. HostTensorND host_z;
  74. auto graph = ComputingGraph::make();
  75. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  76. y = opr::Host2DeviceCopy::make(*graph, host_y),
  77. z = opr::CallbackInjector::make(
  78. opr::ConvBias::make(x, y, param),
  79. [&](DeviceTensorND&dv) { executed.push_back(iter); });
  80. graph->options().comp_node_seq_record_level = 2;
  81. graph->options().var_sanity_check_first_run = false;
  82. auto func = graph->compile({make_callback_copy(z, host_z)});
  83. ComputingGraph::assert_destroy(graph);
  84. for (; iter < 10; ++iter) {
  85. host_x->copy_from_fixlayout(*gen(host_x->shape(), cn));
  86. func->execute();
  87. auto expect = eval_conv_cpu<opr::ConvBias>(*host_x, *host_y, param);
  88. MGB_ASSERT_TENSOR_NEAR(expect, host_z, 1e-3) << "iter " << iter;
  89. }
  90. ASSERT_EQ(executed.size(), 2u);
  91. }
  92. void run_comp_seq_rec_dyn_elemwise(CompNode cn, bool fake_first) {
  93. // dynamic memory is allocated in elemwise
  94. HostTensorGenerator<> gen;
  95. auto host_x = gen({3, 3}, cn), host_y = gen({1, 3}, cn),
  96. host_z = gen({3, 1}, cn);
  97. auto check = [&]() {
  98. HostTensorND ret(CompNode::load("cpux"), host_x->shape());
  99. auto px = host_x->ptr<float>(), py = host_y->ptr<float>(),
  100. pz = host_z->ptr<float>(), pw = ret.ptr<float>();
  101. auto sz0 = host_x->shape()[0], sz1 = host_x->shape()[1];
  102. for (size_t i = 0; i < sz0; ++i) {
  103. for (size_t j = 0; j < sz1; ++j) {
  104. pw[i * sz1 + j] = px[i * sz1 + j] * py[j] + pz[i];
  105. }
  106. }
  107. return ret;
  108. };
  109. auto graph = ComputingGraph::make();
  110. // test record on first run
  111. graph->options().var_sanity_check_first_run = false;
  112. graph->options().graph_opt_level = 0;
  113. graph->options().comp_node_seq_record_level = 1;
  114. if (fake_first) {
  115. graph->options().fake_next_exec = true;
  116. }
  117. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  118. y = opr::Host2DeviceCopy::make(*graph, host_y),
  119. z = opr::Host2DeviceCopy::make(*graph, host_z),
  120. w = opr::Elemwise::make({x, y, z}, opr::Elemwise::Mode::FUSE_MUL_ADD3);
  121. HostTensorND host_w;
  122. auto func = graph->compile({make_callback_copy(w, host_w)});
  123. if (fake_first) {
  124. func->execute();
  125. }
  126. for (int i = 0; i < 10; ++i) {
  127. if (i == 5) {
  128. *host_x = *gen({10, 8}, cn);
  129. *host_y = *gen({1, 8}, cn);
  130. *host_z = *gen({10, 1}, cn);
  131. }
  132. host_x->copy_from(*gen(host_x->shape(), cn));
  133. func->execute();
  134. auto expect = check();
  135. MGB_ASSERT_TENSOR_EQ(expect, host_w) << "iter " << i;
  136. }
  137. }
  138. void run_level2(CompNode cn, bool use_multi_holder) {
  139. HostTensorGenerator<> gen;
  140. auto host_x = gen({4, 3, 6, 7}, cn), host_w = gen({2, 3, 2, 3}, cn),
  141. host_y = gen({1, 25}, cn), host_z = gen({8, 1}, cn),
  142. host_large = gen({8, 25}, cn);
  143. auto make_func = [&](bool enable) -> thin_function<const HostTensorND&()> {
  144. auto graph = ComputingGraph::make();
  145. graph->options().graph_opt_level = 0;
  146. if (enable) {
  147. graph->options().var_sanity_check_first_run = false;
  148. graph->options().comp_node_seq_record_level = 2;
  149. }
  150. auto repeat2 = [](SymbolVar x) { return opr::Concat::make({x, x}, 0); };
  151. SymbolVar w;
  152. auto dev_w = std::make_shared<DeviceTensorND>();
  153. // test shared dev tensor with 1 refcnt
  154. if (use_multi_holder) {
  155. dev_w->copy_from(*host_w).sync();
  156. w = opr::MultipleDeviceTensorHolder::make(*graph, {dev_w})[0];
  157. } else {
  158. w = opr::SharedDeviceTensor::make(*graph, *host_w);
  159. }
  160. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  161. // test shared dev tensor with 1 refcnt
  162. c = opr::Convolution::make(x, w).reshape({8, 25}),
  163. y = opr::Host2DeviceCopy::make(*graph, host_y),
  164. large = opr::ImmutableTensor::make(*graph, *host_large),
  165. z = opr::Host2DeviceCopy::make(*graph, host_z),
  166. // elemwise with larger tmp storage
  167. t0 = opr::Elemwise::make({c, y, z},
  168. opr::Elemwise::Mode::FUSE_MUL_ADD3) +
  169. large,
  170. // t1 shape is {8, 1}
  171. t1 = opr::reduce_sum(t0, z.symshape()),
  172. t2 = opr::Elemwise::make({repeat2(c), y, repeat2(t1)},
  173. opr::Elemwise::Mode::FUSE_MUL_ADD3),
  174. large1 = opr::ImmutableTensor::make(*graph, *host_large);
  175. t2 * 2; // unused opr
  176. // used large static infer
  177. graph->static_infer_manager().infer_value(large.node());
  178. // unused large static infer
  179. graph->static_infer_manager().infer_value(large1.node());
  180. // static infer value
  181. graph->static_infer_manager().infer_value((t1.symshape() + 1).node());
  182. auto result = std::make_shared<HostTensorND>();
  183. auto func = graph->compile({make_callback_copy(t2, *result)});
  184. std::shared_ptr<cg::AsyncExecutable> sh_func(func.release());
  185. if (enable) {
  186. ComputingGraph::assert_destroy(graph);
  187. }
  188. auto exec = [result, sh_func]() -> const HostTensorND& {
  189. sh_func->execute();
  190. return *result;
  191. };
  192. return exec;
  193. };
  194. auto f0 = make_func(false), f1 = make_func(true);
  195. for (int i = 0; i < 3; ++i) {
  196. host_x->copy_from(*gen(host_x->shape(), cn));
  197. host_y->copy_from(*gen(host_y->shape(), cn));
  198. host_z->copy_from(*gen(host_z->shape(), cn));
  199. auto&& expect = f0();
  200. auto&& get = f1();
  201. MGB_ASSERT_TENSOR_EQ(expect, get);
  202. }
  203. host_x->resize({1});
  204. ASSERT_THROW(f1(), MegBrainError);
  205. }
  206. } // anonymous namespace
  207. namespace mgb {
  208. namespace comp_node_test {
  209. namespace seq_rec {
  210. template <>
  211. void run<basic>(CompNode cn) {
  212. run_comp_seq_rec_basic(cn, false);
  213. }
  214. template <>
  215. void run<basic_level2>(CompNode cn) {
  216. run_comp_seq_rec_basic_level2(cn);
  217. }
  218. template <>
  219. void run<basic_fake_exec>(CompNode cn) {
  220. run_comp_seq_rec_basic(cn, true);
  221. }
  222. template <>
  223. void run<dyn_elemwise>(CompNode cn) {
  224. run_comp_seq_rec_dyn_elemwise(cn, false);
  225. }
  226. template <>
  227. void run<dyn_elemwise_fake_exec>(CompNode cn) {
  228. run_comp_seq_rec_dyn_elemwise(cn, true);
  229. }
  230. template <>
  231. void run<level2>(CompNode cn) {
  232. run_level2(cn, false);
  233. }
  234. template <>
  235. void run<level2_multi_holder>(CompNode cn) {
  236. run_level2(cn, true);
  237. }
  238. template <>
  239. void run<level2_share_storage>(CompNode cn) {
  240. HostTensorGenerator<> gen;
  241. auto host_x = gen({1}, cn), host_y = gen({1}, cn), host_z = gen({10}, cn);
  242. auto make_func = [&](bool enable)
  243. -> thin_function<std::array<const HostTensorND*, 2>()> {
  244. auto g0 = ComputingGraph::make(), g1 = ComputingGraph::make();
  245. if (enable) {
  246. g0->options().var_sanity_check_first_run = false;
  247. g0->options().comp_node_seq_record_level = 2;
  248. g1->options().var_sanity_check_first_run = false;
  249. g1->options().comp_node_seq_record_level = 2;
  250. g0->share_device_memory_with(*g1);
  251. }
  252. auto x0 = opr::Host2DeviceCopy::make(*g0, host_x),
  253. x1 = opr::Host2DeviceCopy::make(*g1, host_x),
  254. y = opr::Host2DeviceCopy::make(*g0, host_y),
  255. z = opr::Host2DeviceCopy::make(*g1, host_z);
  256. auto t0 = x0 + y, t1 = x1 + z;
  257. auto host_t0 = std::make_shared<HostTensorND>(),
  258. host_t1 = std::make_shared<HostTensorND>();
  259. auto f0 = g0->compile({make_callback_copy(t0, *host_t0)});
  260. auto f1 = g1->compile({make_callback_copy(t1, *host_t1)});
  261. std::shared_ptr<cg::AsyncExecutable> sh_f0(f0.release()),
  262. sh_f1(f1.release());
  263. if (enable) {
  264. ComputingGraph::assert_destroy(g0);
  265. ComputingGraph::assert_destroy(g1);
  266. }
  267. auto exec = [host_t0, host_t1, sh_f0,
  268. sh_f1]() -> std::array<const HostTensorND*, 2> {
  269. sh_f0->execute();
  270. sh_f1->execute();
  271. return {host_t0.get(), host_t1.get()};
  272. };
  273. return exec;
  274. };
  275. auto f0 = make_func(false), f1 = make_func(true);
  276. for (int i = 0; i < 3; ++i) {
  277. host_x->copy_from(*gen(host_x->shape(), cn));
  278. host_y->copy_from(*gen(host_y->shape(), cn));
  279. host_z->copy_from(*gen(host_z->shape(), cn));
  280. auto&& expect = f0();
  281. auto&& get = f1();
  282. MGB_ASSERT_TENSOR_EQ(*expect[0], *get[0]);
  283. MGB_ASSERT_TENSOR_EQ(*expect[1], *get[1]);
  284. }
  285. }
  286. template <>
  287. void run<level2_exec_check>(CompNode cn) {
  288. HostTensorGenerator<> gen;
  289. auto host_x = gen({1}, cn);
  290. for (int testcase = 0; testcase < 3; ++testcase) {
  291. host_x->copy_from(*gen({1}));
  292. auto graph = ComputingGraph::make();
  293. auto x = opr::Host2DeviceCopy::make(*graph, host_x), y = x * 2;
  294. HostTensorND host_y;
  295. graph->options().var_sanity_check_first_run = false;
  296. graph->options().comp_node_seq_record_level = 2;
  297. auto func = graph->compile({make_callback_copy(y, host_y)});
  298. ASSERT_EQ(host_y.shape(), host_x->shape());
  299. auto expect = host_x->ptr<float>()[0] * 2;
  300. ASSERT_NE(expect, host_y.ptr<float>()[0]);
  301. if (testcase == 0) {
  302. ComputingGraph::assert_destroy(graph);
  303. func->execute();
  304. ASSERT_EQ(expect, host_y.ptr<float>()[0]);
  305. } else if (testcase == 1) {
  306. ASSERT_THROW(func->execute(), MegBrainError);
  307. } else {
  308. // it should be OK to destroy func and then graph
  309. func.reset();
  310. graph.reset();
  311. }
  312. };
  313. }
  314. template <>
  315. void run<sync_from_func>(CompNode cn) {
  316. REQUIRE_THREAD();
  317. HostTensorGenerator<> gen;
  318. auto host_x = gen({1}, cn);
  319. for (int level : {1, 2}) {
  320. for (bool sync : {false, true}) {
  321. auto graph = ComputingGraph::make();
  322. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  323. y = opr::Sleep::make(x, 0.15) * 2;
  324. HostTensorND host_y;
  325. graph->options().var_sanity_check_first_run = false;
  326. graph->options().comp_node_seq_record_level = level;
  327. auto cb = [&](const DeviceTensorND& dv) {
  328. host_y.copy_from(dv);
  329. if (sync) {
  330. host_y.sync();
  331. }
  332. };
  333. auto func = graph->compile({{y, cb}});
  334. if (level == 2) {
  335. ComputingGraph::assert_destroy(graph);
  336. }
  337. for (int i = 0; i < 3; ++i) {
  338. host_x->ptr<float>()[0] = i + 0.3;
  339. func->execute();
  340. if (!sync) {
  341. func->wait();
  342. }
  343. auto got = host_y.ptr<float>()[0];
  344. MGB_ASSERT_FLOAT_EQ((i + 0.3) * 2, got)
  345. << "level=" << level << " i=" << i;
  346. }
  347. }
  348. }
  349. }
  350. template <>
  351. void run<cb_non_contig>(CompNode cn) {
  352. REQUIRE_THREAD();
  353. HostTensorGenerator<> gen;
  354. auto host_x = gen({4, 5}, cn);
  355. for (int level : {1, 2}) {
  356. for (bool sync : {false, true}) {
  357. auto graph = ComputingGraph::make();
  358. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  359. y = opr::Dimshuffle::make(x, {1, 0});
  360. HostTensorND host_y;
  361. graph->options().var_sanity_check_first_run = false;
  362. graph->options().comp_node_seq_record_level = level;
  363. auto cb = [&](const DeviceTensorND& dv) {
  364. host_y.copy_from(dv);
  365. if (sync) {
  366. host_y.sync();
  367. }
  368. };
  369. auto func = graph->compile({{y, cb}});
  370. if (level == 2) {
  371. ComputingGraph::assert_destroy(graph);
  372. }
  373. for (int i = 0; i < 3; ++i) {
  374. host_x->copy_from(*gen(host_x->shape()));
  375. HostTensorND expect{host_x->comp_node(), {5, 4}};
  376. auto px = host_x->ptr<float>(), py = expect.ptr<float>();
  377. for (int i = 0; i < 5; ++i) {
  378. for (int j = 0; j < 4; ++j) {
  379. py[i * 4 + j] = px[j * 5 + i];
  380. }
  381. }
  382. func->execute();
  383. if (!sync) {
  384. func->wait();
  385. }
  386. MGB_ASSERT_TENSOR_EQ(expect, host_y);
  387. }
  388. }
  389. }
  390. }
  391. template <>
  392. void run<shape_dep_const_shape>(CompNode cn) {
  393. // load model using const var shape to work around shape dependencies
  394. using namespace serialization;
  395. HostTensorGenerator<> gen;
  396. auto host_x = gen({4, 5}, cn);
  397. auto fname = output_file("test_comp_node_record_shape_dep_const_shape");
  398. HostTensorND y_expect;
  399. {
  400. // dump graph
  401. auto graph = ComputingGraph::make();
  402. auto x = opr::Host2DeviceCopy::make(*graph, host_x,
  403. OperatorNodeConfig{"x"}),
  404. y = x.flatten() +
  405. opr::reduce_sum(opr::GetVarShape::make(x), x.make_scalar(1));
  406. graph->compile({make_callback_copy(y, y_expect)})->execute();
  407. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()));
  408. dumper->dump({y});
  409. }
  410. HostTensorND host_y;
  411. {
  412. GraphLoadConfig config;
  413. config.const_var_shape = true;
  414. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()));
  415. auto load_rst = loader->load(config);
  416. load_rst.graph->options().comp_node_seq_record_level = 2;
  417. load_rst.graph->options().var_sanity_check_first_run = false;
  418. auto x_inp = load_rst.tensor_map.at("x");
  419. auto y = load_rst.output_var_list.at(0);
  420. auto func = load_rst.graph_compile({make_callback_copy(y, host_y)});
  421. x_inp->copy_from(*host_x);
  422. func->execute();
  423. }
  424. MGB_ASSERT_TENSOR_EQ(y_expect, host_y);
  425. }
  426. template <>
  427. void run<void>(CompNode) {}
  428. } // namespace seq_rec
  429. } // namespace comp_node_test
  430. } // namespace mgb
  431. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台