You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

serializer_oss.cpp 29 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. /**
  2. * \file src/serialization/test/serializer_oss.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #if MGB_ENABLE_FBS_SERIALIZATION
  12. #include "megbrain/serialization/serializer.h"
  13. #include "megbrain/opr/io.h"
  14. #include "megbrain/opr/tensor_manip.h"
  15. #include "megbrain/opr/utility.h"
  16. #include "megbrain/opr/basic_arith_wrapper.h"
  17. #include "megbrain/opr/dnn/convolution.h"
  18. #include "megbrain/test/helper.h"
  19. using namespace mgb;
  20. using namespace serialization;
  21. #define GET_OUTPUT_FILE() output_file(ssprintf("TestSerializer2.%d", __LINE__))
  22. TEST(TestSerializer2, GraphDumpLoad) {
  23. auto fname = GET_OUTPUT_FILE();
  24. auto orig_id = -1;
  25. auto dump = [&]() {
  26. auto cn = CompNode::load("cpu0");
  27. auto graph = ComputingGraph::make();
  28. auto x = opr::ImmutableTensor::make(*graph, 1926.0817f, {cn});
  29. x.rename("varz");
  30. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  31. GraphDumpFormat::FLATBUFFERS);
  32. auto rst = dumper->dump({x});
  33. ASSERT_EQ(rst.nr_opr, 1);
  34. ASSERT_EQ(rst.inputs.size(), 0);
  35. ASSERT_EQ(rst.outputs.size(), 1);
  36. ASSERT_EQ(rst.params.size(), 0);
  37. orig_id = x.node()->id();
  38. mgb_log("%zu of %zu", rst.tensor_value_bytes, rst.tot_bytes);
  39. };
  40. auto load = [&]() {
  41. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  42. GraphDumpFormat::FLATBUFFERS);
  43. auto rst = loader->load();
  44. ASSERT_EQ(rst.tensor_map.size(), 0);
  45. ASSERT_EQ(rst.output_var_list.size(), 1);
  46. ASSERT_EQ(rst.output_var_map.size(), 1);
  47. ASSERT_EQ(rst.output_var_map_id.size(), 1);
  48. ASSERT_EQ(rst.output_var_map.count("varz"), 1);
  49. ASSERT_EQ(rst.output_var_map_id.count(orig_id), 1);
  50. HostTensorND host_x;
  51. auto func = rst.graph_compile(
  52. {make_callback_copy(rst.output_var_list[0], host_x)});
  53. func->execute().wait();
  54. EXPECT_NEAR(*host_x.ptr<float>(), 1926.0817f, 1e-6);
  55. };
  56. dump();
  57. load();
  58. }
  59. TEST(TestSerializer2, MultiGraphDumpLoad) {
  60. auto fname = GET_OUTPUT_FILE();
  61. auto dump = [&]() {
  62. auto cn = CompNode::load("cpu0");
  63. auto graph = ComputingGraph::make();
  64. auto x = opr::ImmutableTensor::make(*graph, 1926.0817f, {cn});
  65. x.rename("varz");
  66. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  67. GraphDumpFormat::FLATBUFFERS);
  68. // dump twice
  69. dumper->dump({x});
  70. dumper->dump({x});
  71. };
  72. auto load = [&]() {
  73. GraphLoader::LoadConfig load_config = {};
  74. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  75. GraphDumpFormat::FLATBUFFERS);
  76. // load twice
  77. loader->load(load_config, false);
  78. loader = GraphLoader::make(loader->reset_file(), loader->format());
  79. loader->load(load_config, false);
  80. };
  81. dump();
  82. load();
  83. }
  84. TEST(TestSerializer2, APlusB) {
  85. auto fname = GET_OUTPUT_FILE();
  86. TensorShape shape{2, 3};
  87. auto dump = [&]() {
  88. auto cn = CompNode::load("xpu0");
  89. auto host_x = std::make_shared<HostTensorND>(cn, shape),
  90. host_y = std::make_shared<HostTensorND>(cn, shape);
  91. auto graph = ComputingGraph::make();
  92. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"}),
  93. y = opr::Host2DeviceCopy::make(*graph, host_y, {"y"});
  94. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  95. GraphDumpFormat::FLATBUFFERS);
  96. // test dump duplicated
  97. auto rst = dumper->dump({(x + y).rename("z"), x + y});
  98. ASSERT_EQ(2u, rst.outputs.size());
  99. };
  100. auto load = [&]() {
  101. HostTensorGenerator<> gen;
  102. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  103. GraphDumpFormat::FLATBUFFERS);
  104. auto rst = loader->load();
  105. auto xv = rst.tensor_map.at("x");
  106. auto yv = rst.tensor_map.at("y");
  107. ASSERT_EQ(shape, xv->shape());
  108. ASSERT_EQ(shape, yv->shape());
  109. *xv = *gen(shape);
  110. *yv = *gen(shape);
  111. HostTensorND host_z, host_z_expect;
  112. host_z_expect.copy_from(*xv);
  113. for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i)
  114. host_z_expect.ptr<float>()[i] += yv->ptr<float>()[i];
  115. auto func = rst.graph_compile(
  116. {make_callback_copy(rst.output_var_map.at("z"), host_z)});
  117. func->execute();
  118. MGB_ASSERT_TENSOR_EQ(host_z_expect, host_z);
  119. };
  120. dump();
  121. load();
  122. }
  123. TEST(TestSerializer2, APlusBParam) {
  124. auto cns = load_multiple_xpus(2);
  125. auto fname = GET_OUTPUT_FILE();
  126. TensorShape shape{2, 3};
  127. HostTensorGenerator<> gen;
  128. auto bias = std::make_shared<DeviceTensorND>();
  129. auto bias_hv = gen(shape, cns[0]);
  130. bias->copy_from(*bias_hv);
  131. {
  132. // dump
  133. auto host_x = std::make_shared<HostTensorND>(cns[0], shape);
  134. auto graph = ComputingGraph::make();
  135. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"}),
  136. y = opr::SharedDeviceTensor::make(*graph, bias, {"y"});
  137. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  138. GraphDumpFormat::FLATBUFFERS);
  139. GraphDumper::DumpConfig config;
  140. config.keep_param_name = true;
  141. dumper->dump({(x + y).rename("z")}, config);
  142. }
  143. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  144. GraphDumpFormat::FLATBUFFERS);
  145. auto load = [&](CompNode dest_cn) {
  146. auto dest_cn_loc = dest_cn.locator_logical();
  147. auto rst = loader->load({
  148. [&](CompNode::Locator &loc){ loc = dest_cn_loc;}});
  149. auto xv = rst.tensor_map.at("x");
  150. ASSERT_EQ(1u, rst.tensor_map.size());
  151. ASSERT_EQ(shape, xv->shape());
  152. *xv = *gen(shape, cns[0]);
  153. HostTensorND host_z, host_z_expect;
  154. host_z_expect.copy_from(*xv);
  155. for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i)
  156. host_z_expect.ptr<float>()[i] += bias_hv->ptr<float>()[i];
  157. auto func = rst.graph_compile(
  158. {make_callback_copy(rst.output_var_map.at("z"), host_z)});
  159. func->execute();
  160. MGB_ASSERT_TENSOR_EQ(host_z_expect, host_z);
  161. };
  162. load(cns[0]);
  163. auto &&shmap = loader->shared_tensor_name_map();
  164. ASSERT_EQ(1u, shmap.at("y")->size());
  165. load(cns[0].change_stream(1));
  166. ASSERT_EQ(1u, shmap.at("y")->size());
  167. load(cns[1]);
  168. ASSERT_EQ(1u + (cns[1].mem_node() != cns[0].mem_node()),
  169. shmap.at("y")->size());
  170. }
  171. TEST(TestSerializer2, Immutable) {
  172. auto fname = GET_OUTPUT_FILE();
  173. TensorShape shape{2, 3};
  174. auto dump = [&]() {
  175. auto cn = CompNode::load("xpu0");
  176. auto host_x = std::make_shared<HostTensorND>(cn, shape);
  177. auto graph = ComputingGraph::make();
  178. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"});
  179. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  180. GraphDumpFormat::FLATBUFFERS);
  181. dumper->dump({(x + 1.f).rename("y")});
  182. };
  183. auto load = [&]() {
  184. HostTensorGenerator<> gen;
  185. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  186. GraphDumpFormat::FLATBUFFERS);
  187. auto rst = loader->load();
  188. auto xv = rst.tensor_map.at("x");
  189. ASSERT_EQ(shape, xv->shape());
  190. *xv = *gen(shape);
  191. HostTensorND host_y, host_y_expect;
  192. host_y_expect.copy_from(*xv);
  193. for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i)
  194. host_y_expect.ptr<float>()[i] += 1;
  195. auto func = rst.graph_compile(
  196. {make_callback_copy(rst.output_var_map.at("y"), host_y)});
  197. func->execute();
  198. MGB_ASSERT_TENSOR_EQ(host_y_expect, host_y);
  199. };
  200. dump();
  201. load();
  202. }
  203. TEST(TestSerializer2, CustomLoader) {
  204. auto fname = GET_OUTPUT_FILE();
  205. TensorShape shape{2, 3};
  206. int load_nr_null_ptr = 0, load_nr_call = 0;
  207. std::vector<HostTensorND> saved_val;
  208. auto tensor_value_dumper = [&saved_val](
  209. OutputFile &fout, const cg::OperatorNodeBase &opr,
  210. const HostTensorND &tensor) {
  211. size_t idx = saved_val.size();
  212. saved_val.emplace_back();
  213. saved_val.back().copy_from(tensor);
  214. fout.write(&idx, sizeof(idx));
  215. };
  216. auto tensor_value_loader = [&saved_val, &load_nr_null_ptr, &load_nr_call](
  217. void *ptr, const TensorLayout &layout, InputFile &fin) {
  218. ++ load_nr_call;
  219. size_t idx;
  220. if (!ptr) {
  221. load_nr_null_ptr ++;
  222. fin.skip(sizeof(idx));
  223. return;
  224. }
  225. fin.read(&idx, sizeof(idx));
  226. auto &&val = saved_val.at(idx);
  227. ASSERT_TRUE(val.layout().eq_layout(layout));
  228. memcpy(ptr, val.raw_ptr(), layout.span().high_byte);
  229. };
  230. auto dump = [&]() {
  231. auto cn = CompNode::load("xpu0");
  232. auto host_x = std::make_shared<HostTensorND>(cn, shape);
  233. HostTensorND y_val(cn, {1});
  234. y_val.ptr<float>()[0] = 2.3f;
  235. auto graph = ComputingGraph::make();
  236. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"}),
  237. y = opr::SharedDeviceTensor::make(*graph, y_val),
  238. z = ((x + 1.f) * y).rename("z");
  239. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  240. GraphDumpFormat::FLATBUFFERS);
  241. GraphDumpConfig config;
  242. config.tensor_value_dumper = tensor_value_dumper;
  243. dumper->dump({z}, config);
  244. };
  245. dump();
  246. GraphLoadConfig config;
  247. config.tensor_value_loader = tensor_value_loader;
  248. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  249. GraphDumpFormat::FLATBUFFERS);
  250. auto load = [&]() {
  251. HostTensorGenerator<> gen;
  252. auto rst = loader->load(config);
  253. auto xv = rst.tensor_map.at("x");
  254. ASSERT_EQ(shape, xv->shape());
  255. *xv = *gen(shape);
  256. HostTensorND host_y, host_y_expect;
  257. host_y_expect.copy_from(*xv);
  258. auto py = host_y_expect.ptr<float>();
  259. for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i) {
  260. py[i] = (py[i] + 1.f) * 2.3f;
  261. }
  262. auto func = rst.graph_compile(
  263. {make_callback_copy(rst.output_var_map.at("z"), host_y)});
  264. func->execute();
  265. MGB_ASSERT_TENSOR_EQ(host_y_expect, host_y);
  266. };
  267. load();
  268. load();
  269. ASSERT_EQ(2u, saved_val.size());
  270. ASSERT_EQ(1, load_nr_null_ptr); // immutable tensor is not shared
  271. ASSERT_EQ(4, load_nr_call);
  272. }
  273. TEST(TestSerializer2, ManyIOVars) {
  274. auto fname = GET_OUTPUT_FILE();
  275. constexpr size_t NR_VARS = 32;
  276. auto dump = [&]() {
  277. auto graph = ComputingGraph::make();
  278. SymbolVarArray xs;
  279. cg::OperatorNodeConfig::CompNodeArray y_comp_nodes;
  280. for (size_t i = 0; i < NR_VARS; ++ i) {
  281. CompNode::Locator loc;
  282. loc.type = CompNode::DeviceType::CPU;
  283. loc.device = 0;
  284. loc.stream = i;
  285. auto cn = CompNode::load(loc);
  286. auto host_x = std::make_shared<HostTensorND>(cn, TensorShape{1});
  287. xs.push_back(opr::Host2DeviceCopy::make(
  288. *graph, host_x, std::to_string(i)));
  289. loc.device = 1;
  290. y_comp_nodes.push_back(CompNode::load(loc));
  291. }
  292. auto con = opr::Concat::make(xs, 0, CompNode::load("cpu2")) * 2 + 1;
  293. auto ys = opr::Split::make(con,
  294. opr::Split::Options::make_partition(
  295. con, 0, std::vector<size_t>(NR_VARS, 1)),
  296. OperatorNodeConfig{}.comp_node_arr(y_comp_nodes));
  297. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  298. GraphDumpFormat::FLATBUFFERS);
  299. auto rst = dumper->dump(ys);
  300. };
  301. auto load = [&]() {
  302. HostTensorGenerator<> gen;
  303. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  304. GraphDumpFormat::FLATBUFFERS);
  305. auto rst = loader->load();
  306. ASSERT_EQ(NR_VARS, rst.output_var_list.size());
  307. ComputingGraph::OutputSpec out_spec(NR_VARS);
  308. std::vector<HostTensorND> host_ys(NR_VARS);
  309. for (size_t i = 0; i < NR_VARS; ++ i) {
  310. auto y = rst.output_var_list[i];
  311. auto loc = y.node()->comp_node().locator_logical();
  312. ASSERT_EQ(1, loc.device);
  313. ASSERT_EQ(static_cast<int>(i), loc.stream);
  314. out_spec[i] = make_callback_copy(y, host_ys[i]);
  315. auto &&inp = rst.tensor_map.at(std::to_string(i));
  316. inp->resize({1}).ptr<float>()[0] = i;
  317. }
  318. auto func = rst.graph_compile(out_spec);
  319. func->execute();
  320. for (size_t i = 0; i < NR_VARS; ++ i) {
  321. auto &&val = host_ys[i];
  322. ASSERT_EQ(TensorShape{1}, val.shape());
  323. ASSERT_EQ(static_cast<float>(i * 2 + 1), val.ptr<float>()[0]);
  324. }
  325. };
  326. dump();
  327. load();
  328. }
  329. TEST(TestSerializer2, RemoveSetGrad) {
  330. auto fname = GET_OUTPUT_FILE();
  331. TensorShape shape{2, 3};
  332. auto dump = [&]() {
  333. auto cn = CompNode::load("xpu0");
  334. auto host_x = std::make_shared<HostTensorND>(cn, shape),
  335. host_y = std::make_shared<HostTensorND>(cn, shape);
  336. auto graph = ComputingGraph::make();
  337. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"}),
  338. y = opr::Host2DeviceCopy::make(*graph, host_y, {"y"});
  339. auto sg = [](SymbolVar var) {
  340. return opr::SetGrad::make(var, opr::SetGrad::zero_grad);
  341. };
  342. // SetGrad as output
  343. auto z0 = sg(x + y);
  344. // SetGrad as internal
  345. auto z1 = sg(x) + sg(sg(y));
  346. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  347. GraphDumpFormat::FLATBUFFERS);
  348. dumper->dump({z0, z1});
  349. };
  350. auto load = [&]() {
  351. HostTensorGenerator<> gen;
  352. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  353. GraphDumpFormat::FLATBUFFERS);
  354. auto rst = loader->load();
  355. auto xv = rst.tensor_map.at("x");
  356. auto yv = rst.tensor_map.at("y");
  357. ASSERT_EQ(shape, xv->shape());
  358. ASSERT_EQ(shape, yv->shape());
  359. *xv = *gen(shape);
  360. *yv = *gen(shape);
  361. HostTensorND host_z0, host_z1, host_z_expect;
  362. host_z_expect.copy_from(*xv);
  363. for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i)
  364. host_z_expect.ptr<float>()[i] += yv->ptr<float>()[i];
  365. ASSERT_EQ(2u, rst.output_var_list.size());
  366. auto func = rst.graph_compile({
  367. {make_callback_copy(rst.output_var_list[0], host_z0)},
  368. {make_callback_copy(rst.output_var_list[1], host_z1)}});
  369. func->execute();
  370. MGB_ASSERT_TENSOR_EQ(host_z_expect, host_z0);
  371. MGB_ASSERT_TENSOR_EQ(host_z_expect, host_z1);
  372. };
  373. dump();
  374. load();
  375. }
  376. TEST(TestSerializer2, MultipleParamNDIMDTypeCompNode) {
  377. auto fname = GET_OUTPUT_FILE();
  378. std::vector<std::shared_ptr<DeviceTensorND>> values;
  379. auto add_value = [&](int stream, int ndim, DType dtype) {
  380. CompNode::Locator loc;
  381. loc.type = CompNode::DeviceType::CPU;
  382. loc.device = 0;
  383. loc.stream = stream;
  384. auto cn = CompNode::load(loc);
  385. TensorShape shp;
  386. shp.ndim = ndim;
  387. for (int i = 0; i < ndim; ++ i)
  388. shp[i] = i + 1;
  389. auto cur = std::make_shared<DeviceTensorND>(cn, shp, dtype);
  390. uint8_t *ptr = reinterpret_cast<uint8_t*>(cur->raw_ptr());
  391. for (size_t i = 0, it = cur->layout().span().dist_byte();
  392. i < it; ++ i) {
  393. ptr[i] = i;
  394. }
  395. values.push_back(cur);
  396. return cur;
  397. };
  398. auto dump = [&]() {
  399. auto graph = ComputingGraph::make();
  400. int stream = 0;
  401. auto mkvar = [&](int ndim, DType dtype) {
  402. auto dv = add_value(stream ++, ndim, dtype);
  403. auto var = opr::SharedDeviceTensor::make(*graph, dv);
  404. var = opr::TypeCvt::make(
  405. opr::reduce_sum(var, var.make_scalar(1)),
  406. dtype::Int32());
  407. var = opr::Copy::make(var, CompNode::load("cpu1"));
  408. return var;
  409. };
  410. auto x = mkvar(1, dtype::Float32());
  411. for (size_t ndim = 1; ndim <= TensorShape::MAX_NDIM; ++ ndim) {
  412. #define cb(_dt) x = x + mkvar(ndim, _dt());
  413. MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
  414. #undef cb
  415. }
  416. ASSERT_GT(values.size(), 8u);
  417. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  418. GraphDumpFormat::FLATBUFFERS);
  419. dumper->dump({x});
  420. };
  421. auto load = [&]() {
  422. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  423. GraphDumpFormat::FLATBUFFERS);
  424. ASSERT_THROW(loader->shared_tensor_id_map(), MegBrainError);
  425. loader->load();
  426. auto &&got = loader->shared_tensor_id_map();
  427. ASSERT_EQ(values.size(), got.size());
  428. for (size_t i = 0; i < values.size(); ++ i) {
  429. ASSERT_EQ(1u, got[i].second.size());
  430. auto &&vi = *values[i], &&gi = *got[i].second.begin()->second;
  431. ASSERT_EQ(vi.shape(), gi.shape());
  432. ASSERT_EQ(vi.comp_node(), gi.comp_node());
  433. ASSERT_EQ(vi.dtype(), gi.dtype());
  434. ASSERT_EQ(0, memcmp(vi.raw_ptr(), gi.raw_ptr(),
  435. vi.layout().span().dist_byte()));
  436. }
  437. };
  438. dump();
  439. load();
  440. }
  441. TEST(TestSerializer2, ConstVarShape) {
  442. auto fname = GET_OUTPUT_FILE();
  443. TensorShape shape{2, 3};
  444. HostTensorGenerator<> gen;
  445. auto host_x = gen({2, 3});
  446. {
  447. // dump
  448. auto graph = ComputingGraph::make();
  449. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"});
  450. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  451. GraphDumpFormat::FLATBUFFERS);
  452. dumper->dump({x + 1.f});
  453. }
  454. auto run_and_check = [&](const GraphLoadConfig& config) {
  455. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  456. GraphDumpFormat::FLATBUFFERS);
  457. auto rst = loader->load(config);
  458. rst.tensor_map.at("x")->copy_from(*host_x);
  459. auto y = rst.output_var_list[0];
  460. ASSERT_EQ(shape, y.shape());
  461. auto infer_type = y.node()->owner_graph()->static_infer_manager().
  462. get_infer_type(y.node()).shape;
  463. if (config.const_var_shape) {
  464. ASSERT_EQ(cg::static_infer::InferType::CONST, infer_type);
  465. } else {
  466. ASSERT_EQ(cg::static_infer::InferType::RT_STATIC, infer_type);
  467. }
  468. HostTensorND host_y, host_y_expect;
  469. host_y_expect.copy_from(*host_x);
  470. for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++ i)
  471. host_y_expect.ptr<float>()[i] += 1;
  472. auto func = rst.graph_compile({make_callback_copy(y, host_y)});
  473. func->execute();
  474. MGB_ASSERT_TENSOR_EQ(host_y_expect, host_y);
  475. if (config.const_var_shape) {
  476. rst.tensor_map.at("x")->resize({4, 5});
  477. ASSERT_THROW(func->execute(), MegBrainError);
  478. }
  479. };
  480. for (bool const_shape: {false, true}) {
  481. GraphLoadConfig config;
  482. config.const_var_shape = const_shape;
  483. run_and_check(config);
  484. };
  485. // test const shape with tensor modifier
  486. {
  487. int nr_tensor = 0, nr_mod = 0;
  488. shape = {7, 6};
  489. *host_x = *gen(shape);
  490. GraphLoadConfig config;
  491. config.const_var_shape = true;
  492. config.tensor_modifier = [&](const std::string& name, bool has_value,
  493. HostTensorND& tensor) {
  494. ++nr_tensor;
  495. if (!has_value) {
  496. ASSERT_EQ("x", name);
  497. tensor.resize(shape);
  498. ++nr_mod;
  499. }
  500. };
  501. run_and_check(config);
  502. ASSERT_EQ(2, nr_tensor);
  503. ASSERT_EQ(1, nr_mod);
  504. }
  505. }
  506. TEST(TestSerializer2, ConstVarShapeOutputName) {
  507. auto fname = GET_OUTPUT_FILE();
  508. TensorShape shape{2, 3};
  509. HostTensorGenerator<> gen;
  510. auto host_x = gen({2, 3});
  511. {
  512. // dump
  513. auto graph = ComputingGraph::make();
  514. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"}),
  515. y = opr::GetVarShape::make(x) + 1;
  516. y.rename("out");
  517. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  518. GraphDumpFormat::FLATBUFFERS);
  519. dumper->dump({y});
  520. }
  521. {
  522. // load
  523. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  524. GraphDumpFormat::FLATBUFFERS);
  525. GraphLoadConfig config;
  526. config.const_var_shape = true;
  527. auto rst = loader->load(config);
  528. ASSERT_EQ(1u, rst.tensor_map.count("x"));
  529. auto y = rst.output_var_map.at("out");
  530. ASSERT_TRUE(y.node()->owner_opr()->same_type<opr::ImmutableTensor>());
  531. }
  532. }
  533. TEST(TestSerializer2, Priority) {
  534. auto fname = GET_OUTPUT_FILE();
  535. TensorShape shape{2, 3};
  536. auto dump = [&](bool keep_pri) {
  537. auto cn = CompNode::load("xpu0");
  538. auto host_x = std::make_shared<HostTensorND>(cn, shape),
  539. host_y = std::make_shared<HostTensorND>(cn, shape);
  540. auto graph = ComputingGraph::make();
  541. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"}) + 1,
  542. y = opr::Host2DeviceCopy::make(*graph, host_y, {"y"}) + 1;
  543. set_priority(x, 1);
  544. set_priority(y, 2);
  545. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  546. GraphDumpFormat::FLATBUFFERS);
  547. GraphDumper::DumpConfig config;
  548. if (keep_pri) {
  549. config.keep_opr_priority = true;
  550. }
  551. dumper->dump({x * y}, config);
  552. };
  553. auto load = [&](bool has_pri) {
  554. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  555. GraphDumpFormat::FLATBUFFERS);
  556. auto rst = loader->load();
  557. VarNode* x, *y;
  558. unpack_vector(rst.output_var_list.front().node()->owner_opr()->input(),
  559. x, y);
  560. auto get_pri = [](VarNode *var) {
  561. return var->owner_opr()->node_prop().attribute().priority;
  562. };
  563. int xpri = get_pri(x), ypri = get_pri(y);
  564. if (has_pri) {
  565. ASSERT_EQ(1, xpri);
  566. ASSERT_EQ(2, ypri);
  567. } else {
  568. ASSERT_EQ(0, xpri);
  569. ASSERT_EQ(0, ypri);
  570. }
  571. };
  572. dump(false);
  573. load(false);
  574. dump(true);
  575. load(true);
  576. }
  577. TEST(TestSerializer2, MultipleParams) {
  578. auto fname = GET_OUTPUT_FILE();
  579. HostTensorGenerator<> gen;
  580. std::vector<std::shared_ptr<HostTensorND>> tensors{
  581. gen({2, 3}), gen({1}), gen({3, 2}), gen({1, 1})};
  582. auto dump = [&]() {
  583. auto graph = ComputingGraph::make();
  584. SymbolVarArray outputs;
  585. for (auto&& i : tensors) {
  586. outputs.push_back(opr::SharedDeviceTensor::make(*graph, *i));
  587. }
  588. GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  589. GraphDumpFormat::FLATBUFFERS)
  590. ->dump(outputs);
  591. };
  592. auto load = [&]() {
  593. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  594. GraphDumpFormat::FLATBUFFERS);
  595. auto rst = loader->load();
  596. ASSERT_EQ(tensors.size(), rst.output_var_list.size());
  597. for (size_t i = 0; i < tensors.size(); ++i) {
  598. HostTensorND got;
  599. got.copy_from(rst.output_var_list[i]
  600. .node()
  601. ->owner_opr()
  602. ->cast_final_safe<opr::SharedDeviceTensor>()
  603. .get_dev_tensor())
  604. .sync();
  605. MGB_ASSERT_TENSOR_EQ(*tensors[i], got);
  606. }
  607. };
  608. dump();
  609. load();
  610. }
  611. TEST(TestSerializer2, ParamerizedDType) {
  612. auto fname = GET_OUTPUT_FILE();
  613. TensorShape shape{2, 3, 3};
  614. dtype::Quantized8Asymm dtype(0.01f, (uint8_t) 123);
  615. auto dump = [&]() {
  616. auto cn = CompNode::load("cpu0");
  617. auto host_x = std::make_shared<HostTensorND>(cn, shape, dtype);
  618. for (size_t i = 0; i < host_x->layout().span().dist_elem(); i++) {
  619. host_x->ptr<dt_quint8>()[i] =
  620. dt_quint8(static_cast<uint8_t>(i & 255));
  621. }
  622. auto graph = ComputingGraph::make();
  623. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"});
  624. auto rst = opr::Dimshuffle::make(x, {1, 2, 0});
  625. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  626. GraphDumpFormat::FLATBUFFERS);
  627. dumper->dump({rst});
  628. };
  629. auto load = [&]() {
  630. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  631. GraphDumpFormat::FLATBUFFERS);
  632. auto rst = loader->load();
  633. ASSERT_EQ(rst.output_var_list.size(), 1u);
  634. EXPECT_EQ(rst.output_var_list.front().node()->dtype(), dtype);
  635. };
  636. dump();
  637. load();
  638. }
  639. TEST(TestSerializer2, HasOutputDtype) {
  640. auto fname = GET_OUTPUT_FILE();
  641. HostTensorGenerator<> gen;
  642. auto graph = ComputingGraph::make();
  643. auto gen_tensor = [&](const TensorShape& shape, const DType& dtype) {
  644. return opr::TypeCvt::make(
  645. opr::Host2DeviceCopy::make(*graph, gen(shape)), dtype);
  646. };
  647. auto dump = [&]() {
  648. auto x = gen_tensor({20, 4, 56, 56}, dtype::QuantizedS8(0.5f));
  649. auto w = gen_tensor({4, 4, 1, 1}, dtype::QuantizedS8(0.1f));
  650. auto b = gen_tensor({1, 4, 1, 1}, dtype::QuantizedS32(0.05f));
  651. opr::ConvBias::Param param;
  652. auto y0 = opr::ConvBias::make(
  653. x, w, b, param, {},
  654. OperatorNodeConfig{dtype::QuantizedS32(0.05f)});
  655. auto y1 = opr::ConvBias::make(
  656. x, w, b, param, {},
  657. OperatorNodeConfig{dtype::QuantizedS8(0.3f)});
  658. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  659. GraphDumpFormat::FLATBUFFERS);
  660. dumper->dump({y0, y1});
  661. };
  662. auto check = [](const serialization::GraphLoader::LoadResult& rst,
  663. size_t idx, const DType& expected_dtype) {
  664. auto&& dtype = rst.output_var_list[idx]
  665. .node()->owner_opr()->config().output_dtype();
  666. ASSERT_TRUE(dtype.valid());
  667. ASSERT_EQ(dtype, expected_dtype);
  668. };
  669. auto load = [&]() {
  670. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  671. GraphDumpFormat::FLATBUFFERS);
  672. auto rst = loader->load();
  673. ASSERT_EQ(rst.output_var_list.size(), 2u);
  674. check(rst, 0, dtype::QuantizedS32(0.05f));
  675. check(rst, 1, dtype::QuantizedS8(0.3f));
  676. };
  677. dump();
  678. load();
  679. }
  680. TEST(TestSerializer2, LOGEXP) {
  681. auto fname = GET_OUTPUT_FILE();
  682. TensorShape shape{2, 3};
  683. using Mode = opr::Elemwise::Mode;
  684. bool inplace_opt = true;
  685. auto dump = [&]() {
  686. auto cn = CompNode::load("xpu0");
  687. auto host_x = std::make_shared<HostTensorND>(cn, shape);
  688. for (size_t i = 0, it = shape.total_nr_elems(); i < it; ++i)
  689. host_x->ptr<float>()[i] = 0.0; // To avoid NAN
  690. auto graph = ComputingGraph::make();
  691. if (!inplace_opt)
  692. graph->options().graph_opt_level = 0;
  693. auto x = opr::Host2DeviceCopy::make(*graph, host_x, {"x"});
  694. auto y = opr::Elemwise::make({x}, Mode::EXP);
  695. auto z = opr::Elemwise::make({y}, Mode::LOG);
  696. auto dumper = GraphDumper::make(OutputFile::make_fs(fname.c_str()),
  697. GraphDumpFormat::FLATBUFFERS);
  698. auto rst = dumper->dump({z.rename("z"), z});
  699. size_t expected_nr_opr = inplace_opt? 1: 3;
  700. ASSERT_EQ(expected_nr_opr, rst.nr_opr);
  701. };
  702. auto load = [&]() {
  703. auto loader = GraphLoader::make(InputFile::make_fs(fname.c_str()),
  704. GraphDumpFormat::FLATBUFFERS);
  705. auto rst = loader->load();
  706. };
  707. dump();
  708. load();
  709. inplace_opt = !inplace_opt;
  710. dump();
  711. load();
  712. }
  713. #endif

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台