You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tensor.cpp 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /**
  2. * \file src/core/test/tensor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/test/helper.h"
  12. #include "megbrain/comp_node_env.h"
  13. #include "megbrain/tensor.h"
  14. #include "megbrain/opr/utility.h"
  15. #include "megbrain/utils/timer.h"
  16. #include "megbrain/utils/debug.h"
  17. #include "megbrain/exception.h"
  18. #include "megdnn/tensor_format.h"
  19. #include <cmath>
  20. using namespace mgb;
  21. constexpr double ASYNC_SLEEP_TIME = 0.15,
  22. ASYNC_MAX_ISSUE_TIME = 0.07;
  23. namespace {
  24. template<class Src, class Dst>
  25. void run_noncontig_test() {
  26. // use a relatively large size so synchronization problems can be detected
  27. constexpr size_t S0 = 200, S1 = 500;
  28. HostTensorND hv_init{CompNode::load("xpu0"), dtype::Float32()};
  29. hv_init.resize({S0, S1});
  30. for (size_t i = 0; i < S0 * S1; ++ i)
  31. hv_init.ptr<float>()[i] = i;
  32. Src src;
  33. src.copy_from(hv_init);
  34. bool failed = false;
  35. auto check = [&](size_t begin, size_t end) {
  36. ASSERT_FALSE(failed);
  37. failed = true;
  38. Src src_sub;
  39. Dst dst;
  40. src_sub = src.sub(Slice(begin, end).apply(src.layout(), 1));
  41. dst.copy_from(src_sub).sync();
  42. HostTensorND rst;
  43. rst.copy_from(dst).sync();
  44. auto ptr = rst.ptr<float>();
  45. for (size_t i = 0; i < S0; ++ i)
  46. for (size_t j = begin; j < end; ++ j) {
  47. ASSERT_EQ(float(i * S1 + j), *ptr);
  48. ++ ptr;
  49. }
  50. HostTensorND hv_zero{hv_init.comp_node(), dtype::Float32()};
  51. hv_zero.resize({S0, end - begin});
  52. memset(hv_zero.ptr<float>(), 0, hv_zero.layout().span().dist_byte());
  53. Dst dst_zero;
  54. dst_zero.copy_from(hv_zero);
  55. src_sub.copy_from_fixlayout(dst_zero);
  56. HostTensorND src_hv;
  57. src_hv.copy_from(src).sync();
  58. ptr = src_hv.ptr<float>();
  59. for (size_t i = 0; i < S0; ++ i)
  60. for (size_t j = begin; j < end; ++ j) {
  61. ASSERT_EQ(0.f, ptr[i * S1 + j]);
  62. }
  63. src_sub.copy_from_fixlayout(dst).sync();
  64. failed = false;
  65. };
  66. check(0, 1);
  67. check(S1 - 1, S1);
  68. check(0, S1 - 1);
  69. check(1, S1);
  70. check(12, 21);
  71. }
  72. } // anonymous namespace
  73. TEST(TestTensorStorage, InvalidAlloc) {
  74. {
  75. TensorStorage<HostTensorStorageTrait> storage;
  76. EXPECT_THROW(storage.ensure_size(100), MegBrainError);
  77. }
  78. {
  79. TensorStorage<DeviceTensorStorageTrait> storage;
  80. EXPECT_THROW(storage.ensure_size(100), MegBrainError);
  81. }
  82. }
  83. TEST(TestTensorStorage, CopyFromFixLayoutImage2DPack4TensorFormat) {
  84. CompNode cn = CompNode::load("xpu0");
  85. HostTensorND dst(
  86. cn, TensorLayout(TensorShape{1, 1, 1, 1, 4}, dtype::Float32{},
  87. megdnn::DefaultTensorFormat::make()));
  88. HostTensorGenerator<> gen;
  89. auto src_default = gen({1, 1, 1, 1, 4});
  90. HostTensorND src(
  91. cn,
  92. TensorLayout(TensorShape{1, 1, 1, 1, 4}, dtype::Float32{},
  93. megdnn::Image2DPack4TensorFormat::make_raw(2, 64)));
  94. EXPECT_NO_THROW(src.copy_from_fixlayout(*src_default).sync());
  95. EXPECT_NO_THROW(dst.copy_from_fixlayout(src).sync());
  96. MGB_ASSERT_TENSOR_EQ(src, dst);
  97. }
  98. TEST(TestTensorStorage, H2HCopy) {
  99. HostTensorGenerator<> gen;
  100. HostTensorND t1;
  101. auto t0 = gen({123, 456});
  102. t1.copy_from(*t0);
  103. MGB_ASSERT_TENSOR_EQ(*t0, t1);
  104. }
  105. TEST(TestTensorStorage, H2DCopy) {
  106. HostTensorGenerator<> gen;
  107. auto t0 = gen({123, 456});
  108. DeviceTensorND t1;
  109. t1.copy_from(*t0);
  110. HostTensorND t2;
  111. t2.copy_from(t1).sync();
  112. MGB_ASSERT_TENSOR_EQ(*t0, t2);
  113. }
  114. TEST(TestTensorStorage, D2DGPU2DefaultCPU) {
  115. REQUIRE_GPU(1);
  116. HostTensorGenerator<> gen;
  117. HostTensorND host_get;
  118. auto host_val = gen({123});
  119. auto cn0 = CompNode::load("gpu0");
  120. DeviceTensorND t0{cn0}, t1{CompNode::default_cpu()};
  121. opr::Sleep::sleep(cn0, 0.1);
  122. t0.copy_from(*host_val);
  123. t1.copy_from(t0);
  124. host_get.copy_from(t1);
  125. MGB_ASSERT_TENSOR_EQ(*host_val, host_get);
  126. }
  127. TEST(TestTensorStorage, D2DCopyNoSync) {
  128. auto cns = load_multiple_xpus(2);
  129. HostTensorND t0(cns[0], {1}), t3(cns[1], {1});
  130. DeviceTensorND t1(cns[0]), t2(cns[1]);
  131. t0.ptr<float>()[0] = 1;
  132. t3.ptr<float>()[0] = -1;
  133. t1.copy_from(t3).sync();
  134. t2.copy_from(t3).sync();
  135. RealTimer timer;
  136. opr::Sleep::sleep(t1.comp_node(), ASYNC_SLEEP_TIME);
  137. t1.copy_from(t0);
  138. t2.copy_from(t1);
  139. t3.copy_from(t2);
  140. // sleep kernel in cuda is easily affected by the frequency change of GPU,
  141. // so we just print warn log instead assert. more refer to
  142. // XPU-226
  143. auto use_time = timer.get_secs();
  144. if (use_time >= ASYNC_MAX_ISSUE_TIME) {
  145. mgb_log_warn("expect time [%f < %f], got %f", use_time,
  146. ASYNC_MAX_ISSUE_TIME, use_time);
  147. }
  148. t1.sync();
  149. use_time = timer.get_secs();
  150. if (use_time <= ASYNC_SLEEP_TIME) {
  151. mgb_log_warn("expect time [%f > %f], got %f", use_time,
  152. ASYNC_MAX_ISSUE_TIME, use_time);
  153. }
  154. ASSERT_GT(fabs(t3.sync().ptr<float>()[0] - t0.ptr<float>()[0]), 0.1);
  155. }
  156. TEST(TestTensorStorage, TensorSub) {
  157. HostTensorND t0(CompNode::load("xpu0"), {123, 456});
  158. auto t0_sub = t0[{{0, 5}, {1, 9}}];
  159. ASSERT_EQ(TensorShape({5, 8}), t0_sub.shape());
  160. }
  161. TEST(TestTensorStorage, D2DCopyNonCont) {
  162. auto cns = load_multiple_xpus(2);
  163. constexpr size_t S0 = 12, S1 = 8, S2 = 9;
  164. auto cn0 = cns[0], cn1 = cns[1];
  165. auto event = cn0.create_event();
  166. HostTensorND hv(cn0, {S0, S1, S2});
  167. for (size_t i = 0, it = hv.layout().total_nr_elems(); i < it; i ++)
  168. hv.ptr<float>()[i] = i;
  169. DeviceTensorND dv, dv_sub0(cn1), dv_sub1;
  170. dv.copy_from(hv);
  171. event->record();
  172. cn1.device_wait_event(*event);
  173. dv_sub0.copy_from(dv[{{}, {2, 4}}]);
  174. dv_sub1.copy_from(dv[{{None, None, 4}, {None, None, 2}, {None, None, 3}}]);
  175. HostTensorND hv_sub0, hv_sub1;
  176. hv_sub0.copy_from(dv_sub0);
  177. hv_sub1.copy_from(dv_sub1);
  178. auto idx = [](size_t i, size_t j, size_t k) {
  179. return i * S1 * S2 + j * S2 + k;
  180. };
  181. {
  182. auto ptr = hv_sub0.sync().ptr<float>();
  183. ASSERT_EQ(TensorShape({S0, 2, S2}), hv_sub0.shape());
  184. for (size_t i = 0; i < S0; i ++)
  185. for (size_t j = 0; j < 2; j ++)
  186. for (size_t k = 0; k < S2; k ++) {
  187. MGB_ASSERT_FLOAT_EQ(idx(i, j + 2, k), *(ptr ++)) <<
  188. ssprintf("sub0: failed at (%zu, %zu, %zu)", i, j, k);
  189. }
  190. }
  191. {
  192. auto ptr = hv_sub1.sync().ptr<float>();
  193. ASSERT_EQ(TensorShape({S0 / 4, S1 / 2, S2 / 3}), hv_sub1.shape());
  194. for (size_t i = 0; i < S0 / 4; i ++)
  195. for (size_t j = 0; j < S1 / 2; j ++)
  196. for (size_t k = 0; k < S2 / 3; k ++) {
  197. MGB_ASSERT_FLOAT_EQ(idx(i * 4, j * 2, k * 3), *(ptr ++)) <<
  198. ssprintf("sub1: failed at (%zu, %zu, %zu)", i, j, k);
  199. }
  200. }
  201. }
  202. TEST(TestTensorStorage, CrossCNCopy2D) {
  203. auto cns = load_multiple_xpus(2);
  204. constexpr size_t S0 = 200, S1 = 500;
  205. HostTensorND hv{cns[0], dtype::Float32()};
  206. hv.resize({S0, S1});
  207. for (size_t i = 0; i < S0 * S1; ++ i)
  208. hv.ptr<float>()[i] = i;
  209. DeviceTensorND dev0;
  210. dev0.copy_from(hv).sync();
  211. bool failed = false;
  212. auto check = [&](size_t begin, size_t end) {
  213. ASSERT_FALSE(failed);
  214. failed = true;
  215. DeviceTensorND dev0_sub, dev1(cns[1]);
  216. dev0_sub = dev0.sub(Slice(begin, end).apply(dev0.layout(), 1));
  217. dev1.copy_from(dev0_sub);
  218. HostTensorND rst;
  219. rst.copy_from(dev1).sync();
  220. auto ptr = rst.ptr<float>();
  221. for (size_t i = 0; i < S0; ++ i)
  222. for (size_t j = begin; j < end; ++ j) {
  223. ASSERT_EQ(float(i * S1 + j), *ptr);
  224. ++ ptr;
  225. }
  226. failed = false;
  227. };
  228. check(0, 1);
  229. check(S1 - 1, S1);
  230. check(0, S1 - 1);
  231. check(1, S1);
  232. check(12, 21);
  233. }
  234. TEST(TestTensor, LayoutSlice) {
  235. TensorLayout ly0({4, 4, 4, 4}, dtype::Int32());
  236. auto ly = ly0;
  237. ly[1] = 2;
  238. auto sub = Slice(1, 3, 1).apply(ly0, 1);
  239. ASSERT_EQ(16u, sub.offset_elem());
  240. ASSERT_EQ(ly, sub.layout());
  241. ly0.init_contiguous_stride({1, 4, 4, 4});
  242. ly = ly0;
  243. ly[1] = 2;
  244. ly.stride[0] = 32;
  245. ly.stride[1] = 16;
  246. sub = Slice(1, 3, 1).apply(ly0, 1);
  247. ASSERT_EQ(16u, sub.offset_elem());
  248. ASSERT_EQ(ly, sub.layout());
  249. ly = ly0;
  250. ly[1] = 2;
  251. ly.stride[0] = -32;
  252. ly.stride[1] = -16;
  253. sub = Slice(3, 1, -1).apply(ly0, 1);
  254. ASSERT_EQ(48u, sub.offset_elem());
  255. ASSERT_EQ(ly, sub.layout());
  256. ly0.init_contiguous_stride({1, 4, 4, 4});
  257. ly = ly0;
  258. ly[1] = 1;
  259. ly.stride[0] = 16;
  260. ly.stride[1] = 16;
  261. sub = Slice(3, 4, 1).apply(ly0, 1);
  262. ASSERT_EQ(48u, sub.offset_elem());
  263. ASSERT_EQ(ly, sub.layout());
  264. }
  265. TEST(TestTensor, NoncontigCopyH2H) {
  266. run_noncontig_test<HostTensorND, HostTensorND>();
  267. }
  268. TEST(TestTensor, NoncontigCopyD2D) {
  269. run_noncontig_test<DeviceTensorND, DeviceTensorND>();
  270. }
  271. TEST(TestTensor, NoncontigCopyD2H) {
  272. run_noncontig_test<DeviceTensorND, HostTensorND>();
  273. }
  274. TEST(TestTensor, NoncontigCopyH2D) {
  275. run_noncontig_test<HostTensorND, DeviceTensorND>();
  276. }
  277. TEST(TestTensor, EmptyCheck) {
  278. HostTensorGenerator<> gen;
  279. auto hv = *gen({23});
  280. ASSERT_FALSE(hv.empty());
  281. hv.resize({});
  282. ASSERT_TRUE(hv.empty());
  283. hv.resize({2});
  284. ASSERT_FALSE(hv.empty());
  285. hv.resize({0});
  286. ASSERT_TRUE(hv.empty());
  287. }
  288. TEST(TestTensor, ValueDump) {
  289. HostTensorGenerator<> gen;
  290. auto val = debug::dump_tensor(*gen({23, 45}), "test");
  291. debug::write_to_file(output_file("TestTensor.ValueDump.bin").c_str(), val);
  292. }
  293. template <class Src, class Dst>
  294. void run_negative_index_test() {
  295. constexpr size_t S0 = 200, S1 = 200;
  296. HostTensorND hv_init{CompNode::load("xpu0"), dtype::Float32()};
  297. hv_init.resize({S0, S1});
  298. for (size_t i = 0; i < S0 * S1; ++i)
  299. hv_init.ptr<float>()[i] = i;
  300. Src src;
  301. Src src_sub;
  302. Dst dst;
  303. auto check = [&](size_t begin, size_t end, int axis) {
  304. src.copy_from(hv_init).sync();
  305. src_sub = src.sub(Slice(begin, end).apply(src.layout(), axis));
  306. dst.copy_from(src_sub).sync();
  307. if (axis < 0)
  308. axis += 2;
  309. ASSERT_EQ(dst.layout().ndim, 2u);
  310. for (int i = 0; i < 2; i++) {
  311. if (i == axis)
  312. ASSERT_EQ(dst.layout()[i], end - begin);
  313. else
  314. ASSERT_EQ(dst.layout()[i], 200u);
  315. }
  316. };
  317. check(100, 200, -1);
  318. check(10, 20, -1);
  319. check(100, 200, -2);
  320. check(10, 20, -2);
  321. EXPECT_THROW(check(100, 200, -3), MegBrainError);
  322. EXPECT_THROW(check(10, 20, -3), MegBrainError);
  323. EXPECT_THROW(check(100, 200, 2), MegBrainError);
  324. EXPECT_THROW(check(10, 20, 2), MegBrainError);
  325. }
  326. TEST(TestTensor, NegativeIndex) {
  327. run_negative_index_test<HostTensorND, HostTensorND>();
  328. run_negative_index_test<DeviceTensorND, DeviceTensorND>();
  329. run_negative_index_test<DeviceTensorND, HostTensorND>();
  330. run_negative_index_test<HostTensorND, DeviceTensorND>();
  331. }
  332. TEST(TestTensor, CpuCudaD2DCopy) {
  333. REQUIRE_GPU(1);
  334. auto cn_cpu = CompNode::load("cpu0"),
  335. cn_gpu = CompNode::load("gpu0");
  336. HostTensorGenerator<> gen;
  337. constexpr size_t length = 233333;
  338. auto a = gen({length});
  339. for (auto config: {true, false}) {
  340. DeviceTensorND dev_a{cn_cpu}, dev_b{cn_gpu, a->shape(), a->dtype()};
  341. dev_a.copy_from(*a).sync();
  342. if (!config) {
  343. auto subspec = Slice(0, length, 3).apply(a->layout(), 0);
  344. dev_a = dev_a.sub(subspec);
  345. dev_b = dev_b.sub(subspec);
  346. }
  347. auto iadd = [ptr = dev_a.ptr<float>(), length = dev_a.shape()[0],
  348. stride = dev_a.layout().stride[0]]() {
  349. for (size_t i = 0; i < length; ++ i) {
  350. ptr[i * stride] += 1;
  351. }
  352. };
  353. CompNodeEnv::from_comp_node(cn_cpu).cpu_env().dispatch(iadd);
  354. auto event = cn_cpu.create_event();
  355. event->record();
  356. cn_gpu.device_wait_event(*event);
  357. dev_b.copy_from_fixlayout(dev_a);
  358. HostTensorND res;
  359. res.copy_from(dev_b).sync();
  360. MGB_ASSERT_TENSOR_EQ(HostTensorND::make_proxy(dev_a), res);
  361. }
  362. }
  363. TEST(TestTensor, ProxyToDefaultCPU) {
  364. auto cn = CompNode::load("xpux");
  365. auto x = HostTensorND(cn, TensorLayout({1, 2, 3}, dtype::Float32{}));
  366. auto y = x.proxy_to_default_cpu();
  367. ASSERT_EQ(y.comp_node(), CompNode::default_cpu());
  368. ASSERT_EQ(x.layout(), y.layout());
  369. ASSERT_EQ(x.raw_ptr(), y.raw_ptr());
  370. }
  371. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台