You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

misc.cpp 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /**
  2. * \file src/opr/test/misc.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/opr/misc.h"
  12. #include "megbrain/opr/basic_arith_wrapper.h"
  13. #include "megbrain/opr/blas.h"
  14. #include "megbrain/opr/io.h"
  15. #include "megbrain/opr/tensor_manip.h"
  16. #include "megbrain/opr/utility.h"
  17. #include "megbrain/test/autocheck.h"
  18. #include "megbrain/test/helper.h"
  19. #include "megbrain/test/megdnn_helper.h"
  20. #include <numeric>
  21. #include <random>
  22. using namespace mgb;
  23. namespace {
  24. void shape_abc(const TensorShape &shape, size_t axis,
  25. size_t &A, size_t &B, size_t &C) {
  26. auto acc_mul = [](const size_t *first, const size_t *last) {
  27. return std::accumulate(
  28. first, last, 1u, std::multiplies<size_t>());
  29. };
  30. A = acc_mul(shape.shape, shape.shape+axis);
  31. B = shape.shape[axis];
  32. C = acc_mul(shape.shape+axis+1, shape.shape+shape.ndim);
  33. }
  34. void argsort_data_gen(HostTensorND& dest) {
  35. mgb_assert(dest.layout().ndim == 2 && dest.layout().is_contiguous());
  36. size_t m = dest.layout()[0], n = dest.layout()[1];
  37. auto ptr = dest.ptr<float>();
  38. RNGxorshf rng{next_rand_seed()};
  39. std::uniform_real_distribution<float> dist_base{-10.f, 10.f},
  40. dist_delta{0.1f, 1.2f};
  41. for (size_t i = 0; i < m; ++i) {
  42. auto v = dist_base(rng);
  43. for (size_t j = 0; j < n; ++j) {
  44. ptr[j] = v;
  45. v += dist_delta(rng);
  46. }
  47. std::shuffle(ptr, ptr + n, rng);
  48. ptr += n;
  49. }
  50. }
  51. }
  52. TEST(TestOprMisc, Argmxx) {
  53. auto run = [](bool is_max, int32_t axis, TensorShape sshape) {
  54. auto dshape = sshape;
  55. dshape.shape[axis] = 1;
  56. using Checker = AutoOprChecker<1, 1>;
  57. auto make_graph = [&](const Checker::SymInpArray &inputs) ->
  58. Checker::SymOutArray {
  59. if (is_max)
  60. return {opr::Argmax::make(inputs[0], {axis})};
  61. else
  62. return {opr::Argmin::make(inputs[0], {axis})};
  63. };
  64. auto better_than = [&](float curr, float best)
  65. {
  66. if (is_max)
  67. return curr > best;
  68. else
  69. return curr < best;
  70. };
  71. auto fwd = [&](Checker::NumOutArray &out, Checker::NumInpArray inp) {
  72. out[0].dtype(dtype::Int32()).resize(dshape);
  73. size_t A, B, C;
  74. shape_abc(sshape, axis, A, B, C);
  75. for (size_t a = 0; a < A; ++a) for (size_t c = 0; c < C; ++c) {
  76. float best_val;
  77. size_t best_arg = -1;
  78. if (is_max)
  79. best_val = std::numeric_limits<float>::lowest();
  80. else
  81. best_val = std::numeric_limits<float>::max();
  82. for (size_t b = 0; b < B; ++b) {
  83. float curr_val = inp[0]->ptr<float>()[(a*B+b)*C+c];
  84. if (better_than(curr_val, best_val)) {
  85. best_val = curr_val;
  86. best_arg = b;
  87. }
  88. }
  89. out[0].ptr<int>()[a*C+c] = best_arg;
  90. }
  91. };
  92. Checker{make_graph, fwd}.
  93. set_input_allow_grad(0, false).
  94. set_output_allow_grad(0, false).
  95. run({sshape}).
  96. run({sshape}).
  97. run({sshape});
  98. };
  99. run(true, 0, {5});
  100. run(true, 1, {2, 3, 4, 5});
  101. run(true, 2, {2, 3, 4, 5});
  102. run(true, 3, {2, 3, 4, 5});
  103. run(false, 0, {3, 4, 5});
  104. run(false, 1, {2, 3, 4, 5});
  105. run(false, 2, {2, 3, 4, 5});
  106. run(false, 3, {2, 3, 4, 5});
  107. }
  108. TEST(TestOprMisc, Argsort) {
  109. using Order = opr::Argsort::Param::Order;
  110. auto run = [](Order order) {
  111. using Checker = AutoOprChecker<1, 2>;
  112. auto make_graph = [&](const Checker::SymInpArray& inputs)
  113. -> Checker::SymOutArray {
  114. return opr::Argsort::make(inputs[0], order);
  115. };
  116. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  117. size_t m = inp[0]->shape()[0], n = inp[0]->shape()[1];
  118. auto pi = inp[0]->ptr<float>();
  119. auto poval = out[0].resize({m, n}).ptr<float>();
  120. auto poidx = out[1].resize({m, n}).ptr<int>();
  121. using KV = std::pair<float, int>;
  122. std::vector<KV> row(n);
  123. for (size_t i = 0; i < m; ++i) {
  124. for (size_t j = 0; j < n; ++j) {
  125. row[j].first = pi[i * n + j];
  126. row[j].second = j;
  127. }
  128. if (order == Order::ASCENDING) {
  129. std::sort(row.begin(), row.end());
  130. } else {
  131. std::sort(row.begin(), row.end(), std::greater<KV>{});
  132. }
  133. for (size_t j = 0; j < n; ++j) {
  134. poval[i * n + j] = row[j].first;
  135. poidx[i * n + j] = row[j].second;
  136. }
  137. }
  138. };
  139. Checker::RunOptions opt;
  140. opt.numdiff_eps = 0.045;
  141. Checker{make_graph, fwd}
  142. .set_input_generator(0, argsort_data_gen)
  143. .set_output_allow_grad(1, false)
  144. .run({TensorShape{1, 1}}, opt)
  145. .run({TensorShape{5, 3}}, opt)
  146. .run({TensorShape{10, 24}}, opt);
  147. };
  148. run(Order::ASCENDING);
  149. run(Order::DESCENDING);
  150. }
  151. TEST(TestOprMisc, Cumsum) {
  152. using Param = opr::Cumsum::Param;
  153. auto run = [](const Param &param) {
  154. using Checker = AutoOprChecker<1, 1>;
  155. auto make_graph = [&](const Checker::SymInpArray &inputs) ->
  156. Checker::SymOutArray {
  157. return {opr::Cumsum::make(inputs[0], param)};
  158. };
  159. auto fwd = [&](Checker::NumOutArray &out, Checker::NumInpArray inp) {
  160. out[0].resize(inp[0]->shape());
  161. auto pin = inp[0]->ptr<float>(), pout = out[0].ptr<float>();
  162. size_t A, B, C;
  163. int real_axis = param.axis;
  164. if (real_axis < 0) real_axis += 3;
  165. shape_abc(inp[0]->shape(), real_axis, A, B, C);
  166. ptrdiff_t stride = C;
  167. if (param.reverse)
  168. stride = -stride;
  169. for (size_t i = 0; i < A; ++ i) {
  170. for (size_t k = 0; k < C; ++ k) {
  171. auto pi = pin + i * B * C + k,
  172. po = pout + i * B * C + k;
  173. if (param.reverse) {
  174. pi += (B - 1) * C;
  175. po += (B - 1) * C;
  176. }
  177. if (param.exclusive) {
  178. *po = 0;
  179. po += stride;
  180. }
  181. float sum = 0;
  182. for (size_t j = 0; j < B - 1; ++ j) {
  183. sum += pi[j * stride];
  184. po[j * stride] = sum;
  185. }
  186. if (!param.exclusive) {
  187. po[(B - 1) * stride] = sum + pi[(B - 1) * stride];
  188. }
  189. }
  190. }
  191. };
  192. Checker{make_graph, fwd}.
  193. run({TensorShape{2, 3, 4}}).
  194. run({TensorShape{3, 1, 2}}).
  195. run({TensorShape{4, 2, 3}});
  196. };
  197. // test negative axis
  198. for (int32_t axis = -3; axis < 3; ++axis)
  199. for (int mask = 0; mask < 4; ++mask)
  200. run({axis, bool(mask >> 1), bool(mask & 1)});
  201. }
  202. TEST(TestOprMisc, CondTake) {
  203. using Param = opr::CondTake::Param;
  204. using Checker = AutoOprChecker<2, 1>;
  205. auto make_graph = [&](const Checker::SymInpArray &inputs) ->
  206. Checker::SymOutArray {
  207. return {opr::CondTake::make(
  208. inputs[0], inputs[1], {Param::Mode::LT})[0]};
  209. };
  210. auto fwd = [&](Checker::NumOutArray &out, Checker::NumInpArray inp) {
  211. std::vector<float> values;
  212. auto data = inp[0]->ptr<float>(), mask = inp[1]->ptr<float>();
  213. auto isize = inp[0]->shape().total_nr_elems();
  214. for (size_t i = 0; i < isize; ++ i) {
  215. if (mask[i] < 0) {
  216. values.push_back(data[i]);
  217. }
  218. }
  219. out[0].resize({values.size()});
  220. memcpy(out[0].ptr<float>(),
  221. values.data(), sizeof(float) * values.size());
  222. };
  223. auto ensure_nonempty = [](Checker::NumInpArray inp) {
  224. auto mask = inp[1]->ptr<float>();
  225. auto isize = inp[1]->shape().total_nr_elems();
  226. for (size_t i = 0; i < isize; ++ i) {
  227. if (mask[i] < 0)
  228. return;
  229. }
  230. mask[isize - 1] = -1;
  231. };
  232. auto mki = [](const TensorShape &shp) -> Checker::ShapeInpArray {
  233. return {shp, shp};
  234. };
  235. Checker{make_graph, fwd}.
  236. set_input_allow_grad(1, false).
  237. set_input_coordinator(ensure_nonempty).
  238. run(mki({2})).
  239. run(mki({3, 5, 8})).
  240. run(mki({100}));
  241. }
  242. TEST(TestOprMisc, CondTakeEmptyOut) {
  243. using Param = opr::CondTake::Param;
  244. HostTensorGenerator<> gen;
  245. auto host_x = gen({1});
  246. host_x->ptr<float>()[0] = 1;
  247. auto graph = ComputingGraph::make();
  248. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  249. auto out = opr::CondTake::make(x, x, {Param::Mode::LT});
  250. HostTensorND host_out0, host_out1;
  251. auto func = graph->compile({make_callback_copy(out[0], host_out0),
  252. make_callback_copy(out[1], host_out1)});
  253. func->execute();
  254. ASSERT_EQ(TensorShape{0}, host_out0.shape());
  255. ASSERT_EQ(TensorShape{0}, host_out1.shape());
  256. }
  257. TEST(TestOprMisc, TopKValueOnly) {
  258. auto run = [](bool dyn_k, bool non_contig) {
  259. using Checker = AutoOprChecker<1, 1>;
  260. std::shared_ptr<HostTensorND> host_k;
  261. SymbolVar var_x0, var_x1;
  262. auto make_graph = [&](const Checker::SymInpArray& inputs)
  263. -> Checker::SymOutArray {
  264. auto k = opr::Host2DeviceCopy::make(
  265. *inputs[0].node()->owner_graph(), host_k);
  266. if (dyn_k) {
  267. k = opr::MarkDynamicVar::make(k);
  268. }
  269. auto x = inputs[0];
  270. if (non_contig) {
  271. var_x0 = x;
  272. x = opr::Subtensor::make(
  273. x, {opr::Subtensor::AxisIndexer::make_interval(
  274. 1, None, opr::GetVarShape::make(x, 1) / 2,
  275. None)});
  276. var_x1 = x;
  277. }
  278. auto outs = opr::TopK::make(x, k, opr::TopK::Param::Mode::KTH_ONLY);
  279. return {outs[0]};
  280. };
  281. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  282. auto opr = megdnn_naive_handle()->create_operator<megdnn::TopK>();
  283. int k = host_k->ptr<int>()[0];
  284. HostTensorND x = *inp[0];
  285. if (non_contig) {
  286. auto layout = x.layout();
  287. layout.shape[1] /= 2;
  288. x = x.sub(SubTensorSpec::make_from_layout(layout));
  289. }
  290. TensorLayout outl0, outl1;
  291. opr->deduce_layout(k, x.layout(), outl0, outl1);
  292. size_t wk_size =
  293. opr->get_workspace_in_bytes(k, x.layout(), outl0, outl1);
  294. std::unique_ptr<dt_byte[]> wk_store{new dt_byte[wk_size]};
  295. opr->exec(k, x.as_megdnn(), out[0].resize(outl0).as_megdnn(), {},
  296. {wk_store.get(), wk_size});
  297. };
  298. Checker checker{make_graph, fwd};
  299. checker.set_input_generator(0, argsort_data_gen);
  300. host_k = std::make_shared<HostTensorND>(checker.comp_node(),
  301. TensorShape{1}, dtype::Int32{});
  302. host_k->ptr<int>()[0] = 1;
  303. Checker::RunOptions opt;
  304. opt.numdiff_eps = 0.047;
  305. auto invoke = [&](int k, size_t m, size_t n) {
  306. host_k->ptr<int>()[0] = k;
  307. checker.run({TensorShape{m, n}}, opt);
  308. };
  309. if (!non_contig) {
  310. invoke(1, 1, 1);
  311. }
  312. invoke(-2, 3, 2);
  313. invoke(-1, 4, 5);
  314. invoke(3, 10, 33);
  315. invoke(-8, 23, 35);
  316. if (non_contig) {
  317. ASSERT_EQ(prev_dev_ptr(var_x0), prev_dev_ptr(var_x1));
  318. }
  319. };
  320. for (auto i : {false, true}) {
  321. for (auto j : {false, true}) {
  322. run(i, j);
  323. }
  324. }
  325. }
  326. TEST(TestOprMisc, TopKSorted) {
  327. using Checker = AutoOprChecker<1, 2>;
  328. std::shared_ptr<HostTensorND> host_k;
  329. auto constexpr mode = opr::TopK::Param::Mode::VALUE_IDX_SORTED;
  330. auto make_graph =
  331. [&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  332. auto k = opr::Host2DeviceCopy::make(*inputs[0].node()->owner_graph(),
  333. host_k);
  334. auto x = inputs[0];
  335. return opr::TopK::make(x, k, mode);
  336. };
  337. auto fwd = [&](Checker::NumOutArray& out, Checker::NumInpArray inp) {
  338. auto opr = megdnn_naive_handle()->create_operator<megdnn::TopK>();
  339. opr->param().mode = mode;
  340. int k = host_k->ptr<int>()[0];
  341. TensorLayout outl0, outl1;
  342. opr->deduce_layout(k, inp[0]->layout(), outl0, outl1);
  343. size_t wk_size =
  344. opr->get_workspace_in_bytes(k, inp[0]->layout(), outl0, outl1);
  345. std::unique_ptr<dt_byte[]> wk_store{new dt_byte[wk_size]};
  346. opr->exec(k, inp[0]->as_megdnn(), out[0].resize(outl0).as_megdnn(),
  347. out[1].resize(outl1).as_megdnn(), {wk_store.get(), wk_size});
  348. };
  349. Checker checker{make_graph, fwd};
  350. checker.set_input_generator(0, argsort_data_gen)
  351. .set_output_allow_grad(1, false);
  352. host_k = std::make_shared<HostTensorND>(checker.comp_node(), TensorShape{1},
  353. dtype::Int32{});
  354. host_k->ptr<int>()[0] = 1;
  355. Checker::RunOptions opt;
  356. opt.numdiff_eps = 0.047;
  357. auto invoke = [&](int k, size_t m, size_t n) {
  358. host_k->ptr<int>()[0] = k;
  359. checker.run({TensorShape{m, n}}, opt);
  360. };
  361. invoke(1, 1, 1);
  362. invoke(-1, 3, 5);
  363. invoke(5, 13, 23);
  364. invoke(-8, 35, 4);
  365. }
  366. TEST(TestOprMisc, TopKSortedIdxOnly) {
  367. HostTensorGenerator<> gen;
  368. auto graph = ComputingGraph::make();
  369. std::shared_ptr<HostTensorND> host_x = gen({2, 5});
  370. std::shared_ptr<HostTensorND> host_y = gen({2, 5});
  371. for (size_t i = 0; i < 10; ++i) {
  372. host_y->ptr<float>()[i] = 0.0f;
  373. }
  374. auto x = opr::Host2DeviceCopy::make(*graph, host_x),
  375. idx = opr::TopK::make(x, x.make_scalar(3),
  376. opr::TopK::Param::Mode::VALUE_IDX_SORTED)[1],
  377. y = opr::TypeCvt::make(idx, dtype::Float32{}),
  378. gx = cg::grad(opr::reduce_sum(y, y.make_scalar(1)), x);
  379. HostTensorND host_gx;
  380. auto func = graph->compile({make_callback_copy(gx, host_gx)});
  381. func->execute();
  382. MGB_ASSERT_TENSOR_EQ(host_gx, *host_y);
  383. }
  384. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台