You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

memory_swap.cpp 3.1 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. #include "megbrain/test/helper.h"
  2. #include "megbrain/opr/basic_arith_wrapper.h"
  3. #include "megbrain/opr/blas.h"
  4. #include "megbrain/opr/dnn/convolution.h"
  5. #include "megbrain/opr/io.h"
  6. using namespace mgb;
  7. using Elemwise = opr::Elemwise;
  8. using Mode = Elemwise::Mode;
  9. #if MGB_ENABLE_MEMORY_SWAP
  10. auto run = [](const int flag) {
  11. auto KEY = "MGB_MEMORY_SWAP_PARAM_BUCKET_IMPLEMENT";
  12. auto old_value = getenv(KEY);
  13. if (flag)
  14. setenv(KEY, "1", 1);
  15. else
  16. setenv(KEY, "0", 1);
  17. HostTensorGenerator<> gen_;
  18. auto gen = [&](const TensorShape& shp) { return gen_(shp, "gpu0"); };
  19. constexpr size_t batch_size = 5, C = 8, H = 100, W = 128;
  20. constexpr size_t limit = 200;
  21. auto host_data = gen({batch_size, C, H, W});
  22. auto graph = ComputingGraph::make();
  23. SymbolVarArray kernels;
  24. SymbolVarArray conv_res;
  25. auto data = opr::Host2DeviceCopy::make(*graph, host_data).rename("data");
  26. conv_res.push_back(data);
  27. size_t out_chl = host_data->shape(1), layer_count = 0;
  28. auto add_layer = [&](size_t oc, size_t kernal_shape, size_t padding) {
  29. gen_.std(sqrt(2.0 / (out_chl * kernal_shape * kernal_shape)));
  30. auto host_kern = gen({oc, out_chl, kernal_shape, kernal_shape});
  31. auto dev_kern = std::make_shared<DeviceTensorND>();
  32. dev_kern->copy_from(*host_kern);
  33. auto current_param = opr::Convolution::Param();
  34. kernels.emplace_back(opr::SharedDeviceTensor::make(*graph, dev_kern));
  35. current_param.pad_h = current_param.pad_w = padding;
  36. conv_res.push_back(opr::relu(opr::Convolution::make(
  37. conv_res[layer_count],
  38. kernels.back().rename(ssprintf("param%zu", layer_count)),
  39. current_param)));
  40. layer_count++;
  41. out_chl = oc;
  42. };
  43. for (size_t i = 1; i <= limit; ++i)
  44. add_layer(30, 5, 2);
  45. auto loss = opr::Dot::make(conv_res[limit].flatten(), conv_res[limit].flatten());
  46. std::vector<HostTensorND> grad_kernels_get(kernels.size());
  47. ComputingGraph::OutputSpec out_spec;
  48. for (size_t i = 0; i < kernels.size(); ++i) {
  49. out_spec.emplace_back(
  50. make_callback_copy(cg::grad(loss, kernels[i]), grad_kernels_get[i]));
  51. }
  52. std::vector<HostTensorND> grad_kernels_expect(grad_kernels_get.size());
  53. for (bool swap : {false, true}) {
  54. graph->options().enable_memory_swap = swap;
  55. auto func = graph->compile(out_spec);
  56. func->execute();
  57. if (!swap) {
  58. for (size_t i = 0; i < grad_kernels_get.size(); ++i)
  59. grad_kernels_expect[i].copy_from(grad_kernels_get[i]);
  60. }
  61. }
  62. for (size_t i = 0; i < grad_kernels_get.size(); ++i)
  63. MGB_ASSERT_TENSOR_NEAR(grad_kernels_get[i], grad_kernels_expect[i], 1e-3);
  64. if (old_value) {
  65. setenv(KEY, old_value, 1);
  66. } else {
  67. unsetenv(KEY);
  68. }
  69. };
  70. TEST(TestMemorySwap, FullConvSerial) {
  71. REQUIRE_GPU(1);
  72. run(0);
  73. }
  74. TEST(TestMemorySwap, FullConvParallel) {
  75. REQUIRE_GPU(1);
  76. run(0);
  77. }
  78. #endif // MGB_ENABLE_MEMORY_SWAP
  79. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}