You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gopt_old_model.cpp 9.0 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. #include "megbrain/opr/dnn/local.h"
  2. #include "megbrain/test/helper.h"
  3. #include "megbrain/gopt/basic_arith.h"
  4. #include "megbrain/gopt/gtrans.h"
  5. #include "megbrain/gopt/inference.h"
  6. #include "megbrain/opr/basic_arith_wrapper.h"
  7. #include "megbrain/opr/blas.h"
  8. #include "megbrain/opr/dnn/adaptive_pooling.h"
  9. #include "megbrain/opr/io.h"
  10. #include "megbrain/opr/nn_int.h"
  11. #include "megbrain/opr/tensor_gen.h"
  12. #include "megbrain/opr/tensor_manip.h"
  13. #include "megbrain/opr/utility.h"
  14. #include "./helper.h"
  15. #include "megbrain/comp_node_env.h"
  16. #include "megdnn/tensor_format.h"
  17. #include <random>
  18. #include <vector>
  19. using namespace mgb;
  20. namespace {
  21. //! find first the operator of specific type; raise exception if not found
  22. template <typename T>
  23. T& find_opr(SymbolVar endpoint) {
  24. T* found = nullptr;
  25. auto cb = [&found](cg::OperatorNodeBase* opr) {
  26. if (!found && opr->same_type<T>()) {
  27. found = &opr->cast_final_safe<T>();
  28. }
  29. };
  30. cg::DepOprIter{cb}.add(endpoint.node()->owner_opr());
  31. mgb_assert(found, "not found opr from %s", endpoint.node()->name().c_str());
  32. return *found;
  33. }
  34. template <typename T>
  35. T& find_opr(SymbolVar endpoint, const std::string& node_name) {
  36. T* found = nullptr;
  37. auto cb = [&found, &node_name](cg::OperatorNodeBase* opr) {
  38. if (!found && opr->same_type<T>() && opr->name() == node_name) {
  39. found = &opr->cast_final_safe<T>();
  40. }
  41. };
  42. cg::DepOprIter{cb}.add(endpoint.node()->owner_opr());
  43. mgb_assert(
  44. found, "not found opr %s from %s", node_name.c_str(),
  45. endpoint.node()->name().c_str());
  46. return *found;
  47. }
  48. template <typename T>
  49. size_t find_opr_num(SymbolVar endpoint) {
  50. size_t opr_num = 0;
  51. auto cb = [&opr_num](cg::OperatorNodeBase* opr) {
  52. if (opr->same_type<T>()) {
  53. opr_num++;
  54. }
  55. };
  56. cg::DepOprIter{cb}.add(endpoint.node()->owner_opr());
  57. return opr_num;
  58. }
  59. } // namespace
  60. TEST(TestGoptOldModel, FoldingGlobalPooling) {
  61. HostTensorGenerator<> gen;
  62. auto cn = CompNode::load("cpu0");
  63. auto graph = ComputingGraph::make();
  64. graph->options().graph_opt_level = 0;
  65. auto mkcvar = [&](const char* name, const TensorShape& shp) {
  66. return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name);
  67. };
  68. auto host_x = gen({2, 3, 16, 16}, cn);
  69. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  70. opr::Convolution::Param param_conv;
  71. param_conv.stride_h = param_conv.stride_w = 1;
  72. param_conv.pad_h = param_conv.pad_w = 1;
  73. auto w1 = mkcvar("w1", {8, 3, 3, 3});
  74. auto conv1 =
  75. opr::Convolution::make(x, w1, param_conv, {}, OperatorNodeConfig("conv1"));
  76. auto conv_n = opr::GetVarShape::make(conv1, 0);
  77. auto conv_c = opr::GetVarShape::make(conv1, 1);
  78. auto conv_h = opr::GetVarShape::make(conv1, 2);
  79. auto conv_w = opr::GetVarShape::make(conv1, 3);
  80. auto hxw = conv_h * conv_w;
  81. auto reshape_shape = opr::Concat::make({conv_n, conv_c, hxw}, 0);
  82. auto reshape1 = opr::Reshape::make(conv1, reshape_shape);
  83. opr::Reduce::Param param_reduce;
  84. param_reduce.axis = 2;
  85. param_reduce.mode = opr::Reduce::Mode::SUM;
  86. auto reduce = opr::Reduce::make(reshape1, param_reduce);
  87. auto reduce_remove_axis = opr::AxisAddRemove::make(
  88. reduce, {opr::AxisAddRemove::AxisDesc::make_remove(2)});
  89. auto hw_count = opr::GetVarShape::make(reshape1, 2);
  90. auto fp32_hw_count = opr::TypeCvt::make(hw_count, dtype::Float32());
  91. auto true_div = reduce_remove_axis / fp32_hw_count;
  92. auto y = opr::AxisAddRemove::make(
  93. true_div, {opr::AxisAddRemove::AxisDesc::make_add(2),
  94. opr::AxisAddRemove::AxisDesc::make_add(3)});
  95. SymbolVar y_opt = y;
  96. {
  97. auto options = gopt::OptimizeForInferenceOptions{};
  98. options.fuse_grain = true;
  99. unpack_vector(gopt::optimize_for_inference({y}, options), y_opt);
  100. }
  101. ASSERT_EQ(
  102. opr::AdaptivePooling::Param::Mode::AVERAGE,
  103. find_opr<opr::AdaptivePooling>(y_opt).param().mode);
  104. graph->compile({{y_opt, {}}})
  105. ->to_json()
  106. ->writeto_fpath(output_file("TestGoptOldModel.FoldingGlobalPooling.json"));
  107. HostTensorND host_y_opt, host_y;
  108. auto func = graph->compile(
  109. {make_callback_copy(y, host_y), make_callback_copy(y_opt, host_y_opt)});
  110. func->execute();
  111. MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3);
  112. }
  113. TEST(TestGoptOldModel, FoldingGlobalPooling2) {
  114. HostTensorGenerator<> gen;
  115. auto cn = CompNode::load("cpu0");
  116. auto graph = ComputingGraph::make();
  117. graph->options().graph_opt_level = 0;
  118. auto mkcvar = [&](const char* name, const TensorShape& shp) {
  119. return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name);
  120. };
  121. auto host_x = gen({2, 3, 16, 16}, cn);
  122. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  123. opr::Convolution::Param param_conv;
  124. param_conv.stride_h = param_conv.stride_w = 1;
  125. param_conv.pad_h = param_conv.pad_w = 1;
  126. auto w1 = mkcvar("w1", {8, 3, 3, 3});
  127. auto conv1 =
  128. opr::Convolution::make(x, w1, param_conv, {}, OperatorNodeConfig("conv1"));
  129. auto conv_n = opr::GetVarShape::make(conv1, 0);
  130. auto conv_c = opr::GetVarShape::make(conv1, 1);
  131. auto conv_h = opr::GetVarShape::make(conv1, 2);
  132. auto conv_w = opr::GetVarShape::make(conv1, 3);
  133. auto hxw = conv_h * conv_w;
  134. auto reshape_shape = opr::Concat::make({conv_n, conv_c, hxw}, 0);
  135. auto reshape1 = opr::Reshape::make(conv1, reshape_shape);
  136. opr::Reduce::Param param_reduce;
  137. param_reduce.axis = 2;
  138. param_reduce.mode = opr::Reduce::Mode::SUM;
  139. auto reduce = opr::Reduce::make(reshape1, param_reduce);
  140. auto reduce_remove_axis = opr::AxisAddRemove::make(
  141. reduce, {opr::AxisAddRemove::AxisDesc::make_remove(2)});
  142. auto hw_count = opr::GetVarShape::make(reshape1, 2);
  143. auto fp32_hw_count = opr::TypeCvt::make(hw_count, dtype::Float32());
  144. auto true_div = reduce_remove_axis / fp32_hw_count;
  145. auto y = opr::Dimshuffle::make(true_div, {0, 1, -1, -1});
  146. SymbolVar y_opt = y;
  147. {
  148. auto options = gopt::OptimizeForInferenceOptions{};
  149. options.fuse_grain = true;
  150. unpack_vector(gopt::optimize_for_inference({y}, options), y_opt);
  151. }
  152. ASSERT_EQ(
  153. opr::AdaptivePooling::Param::Mode::AVERAGE,
  154. find_opr<opr::AdaptivePooling>(y_opt).param().mode);
  155. graph->compile({{y_opt, {}}})
  156. ->to_json()
  157. ->writeto_fpath(output_file("TestGoptOldModel.FoldingGlobalPooling2.json"));
  158. HostTensorND host_y_opt, host_y;
  159. auto func = graph->compile(
  160. {make_callback_copy(y, host_y), make_callback_copy(y_opt, host_y_opt)});
  161. func->execute();
  162. MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3);
  163. }
  164. TEST(TestGoptOldModel, FoldingReduceMean) {
  165. HostTensorGenerator<> gen;
  166. auto cn = CompNode::load("cpu0");
  167. auto graph = ComputingGraph::make();
  168. graph->options().graph_opt_level = 0;
  169. auto mkcvar = [&](const char* name, const TensorShape& shp) {
  170. return opr::SharedDeviceTensor::make(*graph, *gen(shp, cn)).rename(name);
  171. };
  172. auto host_x = gen({2, 3, 16, 16}, cn);
  173. auto x = opr::Host2DeviceCopy::make(*graph, host_x);
  174. opr::Convolution::Param param_conv;
  175. param_conv.stride_h = param_conv.stride_w = 1;
  176. param_conv.pad_h = param_conv.pad_w = 1;
  177. auto w1 = mkcvar("w1", {8, 3, 3, 3});
  178. auto conv1 =
  179. opr::Convolution::make(x, w1, param_conv, {}, OperatorNodeConfig("conv1"));
  180. auto conv_n = opr::GetVarShape::make(conv1, 0);
  181. auto conv_c = opr::GetVarShape::make(conv1, 1);
  182. auto conv_h = opr::GetVarShape::make(conv1, 2);
  183. auto conv_w = opr::GetVarShape::make(conv1, 3);
  184. auto hxw = conv_h * conv_w;
  185. auto reshape_shape = opr::Concat::make({conv_n, conv_c, hxw}, 0);
  186. auto reshape1 = opr::Reshape::make(conv1, reshape_shape);
  187. opr::Reduce::Param param_reduce;
  188. param_reduce.axis = 2;
  189. param_reduce.mode = opr::Reduce::Mode::SUM;
  190. auto reduce = opr::Reduce::make(reshape1, param_reduce);
  191. auto hw_count = opr::GetVarShape::make(reshape1, 2);
  192. auto y = reduce / hw_count;
  193. SymbolVar y_opt = y;
  194. {
  195. auto options = gopt::OptimizeForInferenceOptions{};
  196. options.fuse_grain = true;
  197. unpack_vector(gopt::optimize_for_inference({y}, options), y_opt);
  198. }
  199. ASSERT_EQ(
  200. opr::Reduce::Param::Mode::MEAN, find_opr<opr::Reduce>(y_opt).param().mode);
  201. graph->compile({{y_opt, {}}})
  202. ->to_json()
  203. ->writeto_fpath(output_file("TestGoptOldModel.FoldingReduceMean.json"));
  204. HostTensorND host_y_opt, host_y;
  205. auto func = graph->compile(
  206. {make_callback_copy(y, host_y), make_callback_copy(y_opt, host_y_opt)});
  207. func->execute();
  208. MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3);
  209. *host_x = *gen({2, 3, 16, 16}, cn);
  210. func->execute();
  211. MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-3);
  212. }