You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

backward_graph.cpp 9.8 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. /**
  2. * \file imperative/src/test/backward_graph.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "./helper.h"
  13. #include "megbrain/imperative/backward_graph_opt.h"
  14. #include "megbrain/imperative/ops/autogen.h"
  15. #include "megbrain/imperative/ops/opr_attr.h"
  16. #include "megbrain/opr/basic_arith.h"
  17. #include "megbrain/opr/dnn/batch_norm.h"
  18. using namespace mgb;
  19. using namespace cg;
  20. using namespace imperative;
  21. template <typename T>
  22. T prepare_backward_graph_inputs(const BackwardGraphResult& bg, const T& inputs,
  23. const T& outputs, const T& grads) {
  24. T ret;
  25. size_t i = 0;
  26. for (auto&& t : inputs) {
  27. if (bg.save_for_backward[i++]) {
  28. ret.push_back(t);
  29. }
  30. }
  31. for (auto&& t : outputs) {
  32. if (bg.save_for_backward[i++]) {
  33. ret.push_back(t);
  34. }
  35. }
  36. for (auto&& t : grads) {
  37. if (bg.save_for_backward[i++]) {
  38. ret.push_back(t);
  39. }
  40. }
  41. return ret;
  42. }
  43. template <typename T, typename U>
  44. T expand_grads(const U& bg, const T& outputs) {
  45. T ret(bg.input_has_grad.size());
  46. for (size_t i = 0, j = 0; i < bg.input_has_grad.size(); ++i) {
  47. if (bg.input_has_grad[i]) {
  48. ret[i] = outputs[j++];
  49. }
  50. }
  51. return ret;
  52. }
  53. template <typename T>
  54. T prepare_optimized_backward_inputs(const OptimizedBackwardGraphResult& bg,
  55. const T& precomp, const T& inputs,
  56. const T& outputs, const T& grads) {
  57. T ret = precomp;
  58. size_t i = 0;
  59. for (auto&& t : inputs) {
  60. if (bg.save_for_backward[i++]) {
  61. ret.push_back(t);
  62. }
  63. }
  64. for (auto&& t : outputs) {
  65. if (bg.save_for_backward[i++]) {
  66. ret.push_back(t);
  67. }
  68. }
  69. for (auto&& t : grads) {
  70. if (bg.save_for_backward[i++]) {
  71. ret.push_back(t);
  72. }
  73. }
  74. return ret;
  75. }
  76. SmallVector<TensorPtr> apply_shared_on_physical_tensor(
  77. std::shared_ptr<OpDef> def, SmallVector<TensorPtr> inputs) {
  78. return OpDef::apply_on_physical_tensor(*def, inputs);
  79. }
  80. TEST(TestImperative, BackwardGraphBasic) {
  81. HostTensorGenerator<> gen;
  82. SmallVector<HostTensorND> hvs;
  83. SmallVector<TensorPtr> inputs;
  84. for (size_t i = 0; i < 2; ++i) {
  85. hvs.push_back(*gen({42}));
  86. inputs.push_back(Tensor::make(hvs.back()));
  87. }
  88. using Param = opr::Elemwise::Param;
  89. Param param{Param::Mode::MUL};
  90. auto attr = OprAttr::make("Elemwise");
  91. attr->cast_final_safe<OprAttr>().param.write_pod(param);
  92. SmallVector<LogicalTensorDesc> input_descs;
  93. for (auto&& i : inputs) {
  94. input_descs.push_back({i->layout(), i->comp_node()});
  95. }
  96. auto result = OpDef::make_backward_graph(*attr, input_descs, {true, true},
  97. {true});
  98. auto&& save_for_backward = result.save_for_backward;
  99. auto&& input_has_grad = result.input_has_grad;
  100. auto outputs = OpDef::apply_on_physical_tensor(*attr, inputs);
  101. inputs.push_back(outputs[0]);
  102. hvs.push_back(*gen({42}));
  103. inputs.push_back(Tensor::make(hvs.back()));
  104. mgb_assert(save_for_backward.size() == inputs.size());
  105. for (size_t i = 0; i < inputs.size(); ++i) {
  106. if (!save_for_backward[i]) {
  107. inputs[i].reset(); // drop unused tensor
  108. }
  109. }
  110. SmallVector<TensorPtr> backward_graph_inputs;
  111. for (auto&& i : inputs) {
  112. if (i) {
  113. backward_graph_inputs.push_back(i);
  114. }
  115. }
  116. inputs.clear();
  117. auto input_grads = result.backward.apply(backward_graph_inputs,
  118. apply_shared_on_physical_tensor,
  119. [&](auto&& x) { return x; });
  120. mgb_assert(input_grads.size() == input_has_grad.size());
  121. for (size_t i = 0; i < input_has_grad.size(); ++i) {
  122. mgb_assert(input_has_grad[i] == static_cast<bool>(input_grads[i]));
  123. }
  124. SmallVector<HostTensorND> res;
  125. for (auto&& i : input_grads) {
  126. res.emplace_back();
  127. res.back().copy_from(i->dev_tensor()).sync();
  128. }
  129. for (size_t i = 0; i < 42; ++i) {
  130. for (size_t j = 0; j < 1; ++j) {
  131. ASSERT_EQ(hvs[2].ptr<float>()[i] * hvs[j].ptr<float>()[i],
  132. res[j ^ 1].ptr<float>()[i]);
  133. }
  134. }
  135. }
  136. TEST(TestImperative, BackwardGraphIdentity) {
  137. HostTensorGenerator<> gen;
  138. auto host_a = gen({42}), host_dc = gen({42});
  139. auto a = Tensor::make(*host_a), dc = Tensor::make(*host_dc);
  140. SmallVector<TensorPtr> inputs;
  141. inputs.push_back(a);
  142. auto attr = OprAttr::make("Identity");
  143. attr->cast_final_safe<OprAttr>().param.write_pod<megdnn::param::Empty>({});
  144. SmallVector<LogicalTensorDesc> input_descs;
  145. input_descs.push_back({a->layout(), a->comp_node()});
  146. auto result =
  147. OpDef::make_backward_graph(*attr, input_descs, {true}, {true});
  148. auto&& save_for_backward = result.save_for_backward;
  149. auto&& input_has_grad = result.input_has_grad;
  150. auto outputs = OpDef::apply_on_physical_tensor(*attr, inputs);
  151. inputs.push_back(outputs[0]);
  152. inputs.push_back(dc);
  153. mgb_assert(save_for_backward.size() == inputs.size());
  154. for (size_t i = 0; i < inputs.size(); ++i) {
  155. if (!save_for_backward[i]) {
  156. inputs[i].reset(); // drop unused tensor
  157. }
  158. }
  159. SmallVector<TensorPtr> backward_graph_inputs;
  160. for (auto&& i : inputs) {
  161. if (i) {
  162. backward_graph_inputs.push_back(i);
  163. }
  164. }
  165. inputs.clear();
  166. auto input_grads = result.backward.apply(backward_graph_inputs,
  167. apply_shared_on_physical_tensor,
  168. [&](auto&& x) { return x; });
  169. mgb_assert(input_grads.size() == input_has_grad.size());
  170. for (size_t i = 0; i < input_has_grad.size(); ++i) {
  171. mgb_assert(input_has_grad[i] == static_cast<bool>(input_grads[i]));
  172. }
  173. HostTensorND hv;
  174. hv.copy_from(input_grads[0]->dev_tensor()).sync();
  175. for (size_t i = 0; i < 42; ++i) {
  176. ASSERT_EQ(host_dc->ptr<float>()[i], hv.ptr<float>()[i]);
  177. }
  178. }
  179. TEST(TestImperative, BatchNormGrad) {
  180. auto cn = CompNode::load("xpux");
  181. using Param = opr::BatchNorm::Param;
  182. size_t N = 2, C = 3, H = 5, W = 5;
  183. LogicalTensorDesc inp{TensorLayout{{N, C, H, W}, dtype::Float32()}, cn};
  184. LogicalTensorDesc stat{TensorLayout{{C}, dtype::Float32()}, cn};
  185. {
  186. auto op = OprAttr::make("BatchNorm");
  187. auto&& attr = op->cast_final_safe<OprAttr>();
  188. Param param;
  189. param.fwd_mode = Param::FwdMode::TRAINING;
  190. attr.param.write_pod(param);
  191. OpDef::make_backward_graph(attr, {inp, stat, stat, stat, stat},
  192. {true, true, true, false, false},
  193. {false, false, false, false, true});
  194. }
  195. {
  196. auto op = OprAttr::make("BatchNorm");
  197. auto&& attr = op->cast_final_safe<OprAttr>();
  198. Param param;
  199. param.fwd_mode = Param::FwdMode::TRAINING;
  200. attr.param.write_pod(param);
  201. OpDef::make_backward_graph(attr, {inp, stat, stat}, {true, true, true},
  202. {false, false, true});
  203. }
  204. }
  205. TEST(TestImperative, OptimizedBackwardGraphBasic) {
  206. auto cn = CompNode::load("xpux");
  207. LogicalTensorDesc desc = {TensorLayout(dtype::Float32()), cn};
  208. HostTensorGenerator<> gen;
  209. auto op = std::shared_ptr<OpDef>(Elemwise::make(Elemwise::Mode::ADD));
  210. auto bg =
  211. OpDef::make_backward_graph(*op, {desc, desc}, {true, true}, {true});
  212. auto obg = OptimizedBackwardGraphResult(bg);
  213. ASSERT_EQ(obg.save_for_backward.size(), 4);
  214. ASSERT_FALSE(obg.save_for_backward[0]);
  215. ASSERT_FALSE(obg.save_for_backward[1]);
  216. ASSERT_FALSE(obg.save_for_backward[2]);
  217. auto a_hv = gen({42});
  218. auto b_hv = gen({5, 42});
  219. auto dc_hv = gen({5, 42});
  220. auto a_tn = Tensor::make(*a_hv);
  221. auto b_tn = Tensor::make(*b_hv);
  222. auto dc_tn = Tensor::make(*dc_hv);
  223. auto c_tn = OpDef::apply_on_physical_tensor(*op, {a_tn, b_tn})[0];
  224. auto backward_graph_inputs =
  225. prepare_backward_graph_inputs<SmallVector<TensorPtr>>(
  226. bg, {a_tn, b_tn}, {c_tn}, {dc_tn});
  227. auto grads =
  228. expand_grads(bg, bg.backward.apply(backward_graph_inputs,
  229. apply_shared_on_physical_tensor,
  230. [&](auto&& x) { return x; }));
  231. auto precomp = obg.precomp.apply(SmallVector<TensorPtr>{a_tn, b_tn, c_tn},
  232. apply_shared_on_physical_tensor,
  233. [&](auto&& x) { return x; });
  234. ASSERT_EQ(precomp.size(), 2);
  235. ASSERT_EQ(precomp[0]->shape().ndim, 1);
  236. ASSERT_LE(precomp[0]->shape()[0], 2);
  237. ASSERT_EQ(precomp[1]->shape().ndim, 1);
  238. ASSERT_LE(precomp[1]->shape()[0], 2);
  239. auto backward_inputs =
  240. prepare_optimized_backward_inputs<SmallVector<TensorPtr>>(
  241. obg, precomp, {a_tn, b_tn}, {c_tn}, {dc_tn});
  242. auto grads2 = expand_grads(
  243. obg,
  244. obg.backward.apply(backward_inputs, apply_shared_on_physical_tensor,
  245. [&](auto&& x) { return x; }));
  246. ASSERT_EQ(grads2.size(), 2);
  247. MGB_ASSERT_TENSOR_EQ(grads[0]->get_value(), grads2[0]->get_value());
  248. MGB_ASSERT_TENSOR_EQ(grads[1]->get_value(), grads2[1]->get_value());
  249. }

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台