You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

subgraph_extractor.cpp 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /**
  2. * \file src/gopt/test/subgraph_extractor.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
  10. * implied.
  11. */
  12. #include "./helper.h"
  13. #include "megbrain/gopt/subgraph_extractor.h"
  14. #include "megbrain/opr/basic_arith.h"
  15. #include "megbrain/opr/blas.h"
  16. #include "megbrain/opr/dnn/convolution.h"
  17. #include "megbrain/opr/dnn/pooling.h"
  18. #include "megbrain/opr/imgproc.h"
  19. #include "megbrain/opr/internal/identical_fwd.h"
  20. #include "megbrain/opr/nn_int.h"
  21. #include "megbrain/opr/tensor_manip.h"
  22. #include "megbrain/serialization/serializer.h"
  23. using namespace mgb;
  24. using namespace gopt;
  25. using namespace serialization;
  26. namespace {
  27. // clang-format off
  28. MGB_DEFINE_OPR_CLASS(MultipleInputOutput,
  29. cg::SingleCNOperatorNodeBase) // {
  30. public:
  31. MultipleInputOutput(const VarNodeArray& inputs, const OperatorNodeConfig& config);
  32. static SymbolVarArray make(const SymbolVarArray& inputs, const OperatorNodeConfig& config = {});
  33. private:
  34. void scn_do_execute() override { }
  35. void init_output_static_infer_desc() override { }
  36. };
  37. // clang-format on
  38. MGB_DYN_TYPE_OBJ_FINAL_IMPL(MultipleInputOutput);
  39. MultipleInputOutput::MultipleInputOutput(const VarNodeArray& inputs,
  40. const OperatorNodeConfig& config)
  41. : Super(inputs[0]->owner_graph(), config, "multiple_input_output",
  42. inputs) {
  43. for (auto&& i : inputs)
  44. add_input({i});
  45. if (inputs.size() == 1) {
  46. add_output(None);
  47. } else {
  48. for (size_t i = 0; i < inputs.size(); ++i)
  49. add_output(ssprintf("o%zu", i));
  50. }
  51. cg::add_workspace_output(this);
  52. }
  53. SymbolVarArray MultipleInputOutput::make(const SymbolVarArray& inputs,
  54. const OperatorNodeConfig& config) {
  55. auto src = cg::to_var_node_array(inputs);
  56. auto multiple_io = std::make_unique<MultipleInputOutput>(src, config);
  57. auto ret =
  58. cg::to_symbol_var_array(src[0]->owner_graph()
  59. ->insert_opr(std::move(multiple_io))
  60. ->output());
  61. ret.pop_back();
  62. return ret;
  63. }
  64. }
  65. TEST(TestSubGraphExtractor, MultipleOutputs) {
  66. HostTensorGenerator<> gen;
  67. auto graph = ComputingGraph::make();
  68. auto mkvar = [&](const char* name, const TensorShape& shp) {
  69. return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name);
  70. };
  71. auto mkcvar = [&](const char* name, const TensorShape& shp) {
  72. return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name);
  73. };
  74. graph->options().graph_opt_level = 0;
  75. auto x = mkvar("x", {8, 8, 8, 8}), w1 = mkcvar("w1", {4, 8, 3, 3});
  76. auto y = mkvar("y", {1, 8, 1, 1});
  77. auto add = x + y;
  78. opr::Convolution::Param param;
  79. param.pad_h = param.pad_w = 1;
  80. auto c1 = opr::Convolution::make(add, w1, param);
  81. auto w2 = mkcvar("w2", {8, 4, 3, 3});
  82. auto c2 = opr::ConvolutionBackwardData::make(w2, add, param, {}, {});
  83. auto sym_var_arr = MultipleInputOutput::make({c1, c2});
  84. auto z = sym_var_arr[1];
  85. z = z + (-128);
  86. using OprList = SubGraphExtractor::OprList;
  87. static const OprList opr_list = {
  88. opr::ConvolutionForward::typeinfo(),
  89. opr::Elemwise::typeinfo(),
  90. opr::TypeCvt::typeinfo(),
  91. MultipleInputOutput::typeinfo(),
  92. };
  93. SubGraphExtractor extractor(opr_list);
  94. auto partitions = extractor.extract({z});
  95. ASSERT_EQ(partitions.size(), 1u);
  96. // outputs: sym_var_arr[0], z, add
  97. ASSERT_EQ(partitions[0].output().size(), 3u);
  98. ASSERT_TRUE(partitions[0].output().count(add.node()) > 0);
  99. ASSERT_TRUE(partitions[0].output().count(z.node()) > 0);
  100. ASSERT_TRUE(partitions[0].output().count(sym_var_arr[0].node()) > 0);
  101. ASSERT_TRUE(partitions[0].output().count(sym_var_arr[1].node()) == 0);
  102. // inputs: x, y, w1, c2, (-128)
  103. ASSERT_EQ(partitions[0].input().size(), 5u);
  104. ASSERT_TRUE(partitions[0].input().count(x.node()) > 0);
  105. ASSERT_TRUE(partitions[0].input().count(c2.node()) > 0);
  106. // opr: (x + y) conv1 multi_io, (z - 128)
  107. ASSERT_EQ(partitions[0].opr_set().size(), 4u);
  108. ASSERT_TRUE(partitions[0].opr_set().count(add.node()->owner_opr()) > 0);
  109. ASSERT_TRUE(partitions[0].opr_set().count(c1.node()->owner_opr()) > 0);
  110. ASSERT_TRUE(partitions[0].opr_set().count(
  111. sym_var_arr[0].node()->owner_opr()) > 0);
  112. ASSERT_TRUE(partitions[0].opr_set().count(z.node()->owner_opr()) > 0);
  113. }
  114. TEST(TestSubGraphExtractor, MultipleReaders) {
  115. HostTensorGenerator<> gen;
  116. auto graph = ComputingGraph::make();
  117. auto mkvar = [&](const char* name, const TensorShape& shp) {
  118. return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name);
  119. };
  120. auto mkcvar = [&](const char* name, const TensorShape& shp) {
  121. return opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name);
  122. };
  123. graph->options().graph_opt_level = 0;
  124. auto x = mkvar("x", {8, 8, 8, 8}), w1 = mkcvar("w1", {4, 8, 3, 3});
  125. auto y = mkvar("y", {1, 8, 1, 1});
  126. auto add = x + y;
  127. opr::Convolution::Param param;
  128. param.pad_h = param.pad_w = 1;
  129. auto c1 = opr::Convolution::make(add, w1, param);
  130. auto w2 = mkcvar("w2", {8, 4, 3, 3});
  131. auto c2 = opr::ConvolutionBackwardData::make(w2, add, param, {}, {});
  132. auto z = c1 + c2;
  133. using OprList = SubGraphExtractor::OprList;
  134. static const OprList opr_list = {
  135. opr::ConvolutionForward::typeinfo(),
  136. opr::Elemwise::typeinfo(),
  137. opr::TypeCvt::typeinfo(),
  138. };
  139. SubGraphExtractor extractor(opr_list);
  140. auto partitions = extractor.extract({z});
  141. ASSERT_EQ(partitions.size(), 1u);
  142. ASSERT_EQ(partitions[0].output().size(), 2u);
  143. ASSERT_TRUE(partitions[0].output().count(add.node()) > 0);
  144. ASSERT_TRUE(partitions[0].output().count(z.node()) > 0);
  145. ASSERT_EQ(partitions[0].input().size(), 4u);
  146. ASSERT_TRUE(partitions[0].input().count(x.node()) > 0);
  147. partitions[0].to_json()->writeto_fpath(
  148. output_file("TestSubGraphExtractor.MultipleReaders.json"));
  149. }
  150. TEST(TestSubGraphExtractor, Complicated) {
  151. const size_t N = 16, C = 3, H = 768, W = 1280;
  152. HostTensorGenerator<dtype::Uint8> gen;
  153. auto graph = ComputingGraph::make();
  154. /* h2d
  155. |
  156. v
  157. astype(f32)
  158. |
  159. add(-128)
  160. |
  161. v
  162. astype(q8)
  163. |
  164. v
  165. conv1
  166. |
  167. v
  168. astype(u4)
  169. |
  170. / \
  171. conv2 conv3 -> astype(q32) -> output
  172. \ /
  173. qadd
  174. |
  175. v
  176. astype(q8)
  177. / \
  178. deconv conv4
  179. \ /
  180. concat -> output */
  181. auto h2d = opr::Host2DeviceCopy::make(*graph, gen({N, C, H, W}));
  182. auto data = opr::TypeCvt::make(h2d, dtype::Float32());
  183. auto sub_128 = data + (-128);
  184. auto x = opr::TypeCvt::make(sub_128, dtype::QuantizedS8(1.f));
  185. auto mkcvar = [&](const char* name, const TensorShape& shp,
  186. const DType& dtype) {
  187. return opr::TypeCvt::make(
  188. opr::SharedDeviceTensor::make(*graph, *gen(shp)).rename(name),
  189. dtype);
  190. };
  191. auto w1 = mkcvar("w1", {16, 3, 3, 3}, dtype::QuantizedS8(1.f));
  192. auto b1 = mkcvar("b1", {1, 16, 1, 1}, dtype::QuantizedS32(1.f));
  193. opr::ConvBias::Param param;
  194. param.stride_h = param.stride_w = 2;
  195. param.pad_h = param.pad_w = 1;
  196. auto conv1 = opr::ConvBias::make(
  197. x, w1, b1, param, {}, OperatorNodeConfig(dtype::QuantizedS8(1.f)));
  198. conv1 = opr::TypeCvt::make(
  199. conv1, dtype::Quantized4Asymm(1.f, static_cast<uint8_t>(8)));
  200. auto w2 = mkcvar("w2", {16, 16, 3, 3}, dtype::QuantizedS4(1.f));
  201. auto b2 = mkcvar("b2", {1, 16, 1, 1}, dtype::QuantizedS32(1.f));
  202. auto conv2 = opr::ConvBias::make(conv1, w2, b2, param, {},
  203. OperatorNodeConfig(dtype::Quantized4Asymm(
  204. 1.f, static_cast<uint8_t>(8))));
  205. param.pad_h = param.pad_w = 0;
  206. auto w3 = mkcvar("w3", {16, 16, 1, 1}, dtype::QuantizedS4(1.f));
  207. auto b3 = mkcvar("b3", {1, 16, 1, 1}, dtype::QuantizedS32(1.f));
  208. auto conv3 = opr::ConvBias::make(conv1, w3, b3, param, {},
  209. OperatorNodeConfig(dtype::Quantized4Asymm(
  210. 1.f, static_cast<uint8_t>(8))));
  211. auto conv3f = opr::TypeCvt::make(conv3, dtype::Float32());
  212. auto qadd = opr::ElemwiseMultiType::make(
  213. {conv2, conv3}, {opr::ElemwiseMultiType::Mode::QADD},
  214. OperatorNodeConfig(
  215. dtype::Quantized4Asymm(1.f, static_cast<uint8_t>(8))));
  216. auto q8 = opr::TypeCvt::make(qadd, dtype::QuantizedS8(1.f));
  217. auto w4 = mkcvar("w4", {16, 16, 3, 3}, dtype::QuantizedS8(1.f));
  218. param.stride_h = param.stride_w = 1;
  219. param.pad_h = param.pad_w = 1;
  220. auto conv4 = opr::ConvBiasForward::make(
  221. q8, w4, param, {}, OperatorNodeConfig(dtype::QuantizedS8(1.f)));
  222. conv4 = opr::TypeCvt::make(conv4, dtype::Float32());
  223. opr::Convolution::Param conv_param;
  224. conv_param.stride_h = param.stride_w = 1;
  225. conv_param.pad_h = param.pad_w = 0;
  226. auto w5 = mkcvar("w4", {16, 16, 1, 1}, dtype::QuantizedS8(1.f));
  227. auto deconv = opr::ConvolutionBackwardData::make(
  228. w5, q8, conv_param, {},
  229. OperatorNodeConfig(dtype::QuantizedS8(1.f)));
  230. deconv = opr::TypeCvt::make(deconv, dtype::Float32());
  231. auto z = opr::Concat::make({conv4, deconv}, 1);
  232. using OprList = SubGraphExtractor::OprList;
  233. static const OprList opr_list = {
  234. opr::ConvBiasForward::typeinfo(),
  235. opr::ConvolutionForward::typeinfo(),
  236. opr::ConvolutionBackwardData::typeinfo(),
  237. opr::ElemwiseMultiType::typeinfo(),
  238. opr::Elemwise::typeinfo(),
  239. opr::TypeCvt::typeinfo(),
  240. opr::PoolingForward::typeinfo(),
  241. opr::WarpPerspectiveForward::typeinfo(),
  242. };
  243. SubGraphExtractor extractor(opr_list);
  244. auto partitions = extractor.extract({conv3f.node(), z.node()});
  245. ASSERT_EQ(partitions.size(), 1u);
  246. const char* prefix = "TestSubGraphExtractor.Complicated";
  247. partitions[0].to_json()->writeto_fpath(
  248. output_file(ssprintf("%s.json", prefix).c_str()));
  249. }
  250. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台