You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

autocheck.cpp 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /**
  2. * \file test/src/autocheck.cpp
  3. *
  4. * This file is part of MegBrain, a deep learning framework developed by Megvii.
  5. *
  6. * \copyright Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
  7. *
  8. */
  9. #include "megbrain/test/autocheck.h"
  10. #include "megbrain/opr/basic_arith.h"
  11. #include "megbrain/opr/blas.h"
  12. #include "megbrain/opr/internal/megdnn_opr_wrapper.h"
  13. #include "megbrain/opr/io.h"
  14. #include "megbrain/opr/utility.h"
  15. #include "megbrain/test/numerical_diff.h"
  16. #include <cmath>
  17. using namespace mgb;
  18. #define DEF_IMPL(_ret) \
  19. template <size_t nr_inp, size_t nr_out, class dtype> \
  20. _ret AutoOprChecker<nr_inp, nr_out, dtype>
  21. #define DEF_IMPL_CHAIN() \
  22. template <size_t nr_inp, size_t nr_out, class dtype> \
  23. AutoOprChecker<nr_inp, nr_out, dtype>& \
  24. AutoOprChecker<nr_inp, nr_out, dtype>
  25. DEF_IMPL()::AutoOprChecker(GraphMaker maker, FwdNumeric fwd, CompNode comp_node)
  26. : m_fwd(fwd), m_maker(maker), m_comp_node{comp_node} {
  27. InputGenerator default_input_gen = [this](HostTensorND& dest) {
  28. dest = *m_gen(dest.shape(), m_comp_node);
  29. };
  30. for (size_t i = 0; i < nr_inp; ++i) {
  31. m_inputs[i] = std::make_shared<HostTensorND>(m_comp_node, dtype());
  32. m_inputs_generator[i] = default_input_gen;
  33. }
  34. for (size_t i = 0; i < nr_inp; ++i) {
  35. m_inputs_allow_grad[i] = true;
  36. }
  37. for (size_t i = 0; i < nr_out; ++i) {
  38. m_outputs_allow_grad[i] = true;
  39. }
  40. for (size_t i = 0; i < nr_out; ++i) {
  41. m_outputs_allow_check[i] = true;
  42. }
  43. }
  44. DEF_IMPL(void)::build_graph() {
  45. mgb_assert(!m_built);
  46. m_built = true;
  47. m_graph = ComputingGraph::make();
  48. auto&& graph = m_graph;
  49. if (m_disable_graph_opt) {
  50. graph->options().graph_opt_level = 0;
  51. }
  52. SymInpArray sym_in;
  53. SymbolVar one, zero;
  54. {
  55. HostTensorND tmp{m_comp_node, mgb::dtype::Float32()};
  56. auto p = tmp.resize({1}).ptr<float>();
  57. p[0] = 1;
  58. one = opr::SharedDeviceTensor::make(*graph, tmp, {"one"});
  59. p[0] = 0;
  60. zero = opr::SharedDeviceTensor::make(*graph, tmp, {"zero"});
  61. }
  62. for (size_t i = 0; i < nr_inp; ++i) {
  63. // to trigger graph trans
  64. sym_in[i] = opr::Host2DeviceCopy::make(*graph, m_inputs[i],
  65. ssprintf("inp%zu", i));
  66. auto dt = sym_in[i].dtype();
  67. auto a = opr::TypeCvt::make(one, dt), b = opr::TypeCvt::make(zero, dt);
  68. sym_in[i] = sym_in[i] * a + b;
  69. }
  70. m_failed = true;
  71. auto sym_out = m_maker(sym_in);
  72. m_failed = false;
  73. for (size_t i = 0; i < nr_out; ++i) {
  74. m_outputs_truth[i].comp_node(m_comp_node).dtype(sym_out[i].dtype());
  75. m_outspec_fwd_grad.push_back(
  76. make_callback_copy(sym_out[i], m_outputs[i]));
  77. }
  78. if (!m_need_grad_check)
  79. return;
  80. SymbolVar loss;
  81. bool first_loss = true;
  82. for (size_t i = 0; i < nr_out; ++i) {
  83. if (m_outputs_allow_grad[i]) {
  84. m_loss_p[i] = std::make_shared<HostTensorND>(m_comp_node, dtype());
  85. auto cur = opr::Dot::make(
  86. sym_out[i].flatten(),
  87. opr::Host2DeviceCopy::make(*graph, m_loss_p[i],
  88. ssprintf("lossp%zu", i)));
  89. if (first_loss) {
  90. loss = cur;
  91. } else {
  92. loss = loss + cur;
  93. }
  94. first_loss = false;
  95. }
  96. }
  97. if (first_loss) {
  98. m_need_grad_check = false;
  99. return;
  100. }
  101. auto make_grad = [&](SymbolVar target, SymbolVar wrt) {
  102. if (m_use_virtual_grad)
  103. return opr::VirtualGrad::make(target, wrt);
  104. else
  105. return cg::grad(target, wrt);
  106. };
  107. auto loss2 = loss * 2;
  108. m_outspec_loss.push_back({make_callback_copy(loss, m_loss)});
  109. for (size_t i = 0; i < nr_inp; ++i)
  110. if (m_inputs_allow_grad[i]) {
  111. SymbolVar g = make_grad(loss, sym_in[i]);
  112. auto cb = [this, i](DeviceTensorND& dev) {
  113. if (m_should_copy_grad)
  114. m_grads[i].copy_from(dev).sync();
  115. };
  116. m_outspec_fwd_grad.push_back({g, cb});
  117. // test grad with a different loss var
  118. if (m_need_multi_loss_check) {
  119. auto g2 = make_grad(loss2, sym_in[i]);
  120. auto cb2 = [this, i](DeviceTensorND& dev) {
  121. if (m_should_copy_grad)
  122. m_grads_mul2[i].copy_from(dev).sync();
  123. };
  124. m_outspec_fwd_grad.push_back({g2, cb2});
  125. }
  126. }
  127. }
  128. DEF_IMPL()::~AutoOprChecker() {
  129. mgb_assert(m_failed || m_run_cnt >= 3,
  130. "less than 3 runs for autocheker; some paths not taken");
  131. }
  132. DEF_IMPL_CHAIN()::set_input_generator(size_t idx, const InputGenerator& gen) {
  133. mgb_assert(!m_built, "cannot set_input_generator after the first run");
  134. mgb_assert(idx < nr_inp);
  135. m_inputs_generator[idx] = gen;
  136. return *this;
  137. }
  138. DEF_IMPL_CHAIN()::set_input_coordinator(const InputCoordinator& coord) {
  139. mgb_assert(!m_built, "cannot set_input_generator after the first run");
  140. m_input_coordinator = coord;
  141. return *this;
  142. }
  143. DEF_IMPL_CHAIN()::set_input_allow_grad(size_t idx, bool allowed) {
  144. mgb_assert(!m_built, "cannot set_input_allow_grad after the first run");
  145. mgb_assert(idx < nr_inp);
  146. m_inputs_allow_grad[idx] = allowed;
  147. return *this;
  148. }
  149. DEF_IMPL_CHAIN()::set_input_default_shape(size_t idx,
  150. const TensorShape& shape) {
  151. mgb_assert(!m_built, "cannot set_input_allow_grad after the first run");
  152. mgb_assert(idx < nr_inp);
  153. m_inputs[idx]->resize(shape);
  154. return *this;
  155. }
  156. DEF_IMPL_CHAIN()::set_output_allow_grad(size_t idx, bool allowed) {
  157. mgb_assert(!m_built, "cannot set_output_allow_grad after the first run");
  158. mgb_assert(idx < nr_out);
  159. m_outputs_allow_grad[idx] = allowed;
  160. return *this;
  161. }
  162. DEF_IMPL_CHAIN()::set_output_allow_check(size_t idx, bool allowed) {
  163. mgb_assert(!m_built, "cannot set_output_allow_check after the first run");
  164. mgb_assert(idx < nr_out);
  165. m_outputs_allow_check[idx] = allowed;
  166. return *this;
  167. }
  168. DEF_IMPL(void)::do_run(const ShapeInpArray& shapes, const RunOptions& opt) {
  169. mgb_assert(m_built);
  170. auto failstr = [&](const std::string& type) {
  171. std::string ishp_str;
  172. for (auto&& i : shapes) {
  173. if (!ishp_str.empty())
  174. ishp_str.append(", ");
  175. ishp_str.append(i.to_string());
  176. }
  177. std::string msg = ssprintf("%s failed: input shapes: [%s]",
  178. type.c_str(), ishp_str.c_str());
  179. if (m_inp_dump_on_error) {
  180. std::string extra_msg = m_inp_dump_on_error(m_inputs);
  181. if (!extra_msg.empty()) {
  182. msg.append("\nextra message:\n");
  183. msg.append(extra_msg);
  184. }
  185. }
  186. if (!m_extra_err_msg.empty()) {
  187. msg.append("\nextra message: ");
  188. msg.append(m_extra_err_msg);
  189. }
  190. return msg;
  191. };
  192. m_failed = true;
  193. // gen input data
  194. for (size_t i = 0; i < nr_inp; ++i) {
  195. m_inputs[i]->resize(shapes[i]);
  196. m_inputs_generator[i](*m_inputs[i]);
  197. mgb_assert(m_inputs[i]->shape().eq_shape(shapes[i]));
  198. }
  199. if (MGB_GETENV("MGB_AUTOCHECK_DUMP_INPUT")) {
  200. static size_t run_id;
  201. auto fname = output_file(ssprintf("autocheck-inp-%zu.bin", run_id++));
  202. for (size_t i = 0; i < nr_inp; ++i) {
  203. write_tensor_to_file(*m_inputs[i], fname.c_str(), i ? 'a' : 'w');
  204. }
  205. mgb_log("autocheck: %zu input tensors written to %s", nr_inp,
  206. fname.c_str());
  207. }
  208. if (m_input_coordinator)
  209. m_input_coordinator(m_inputs);
  210. // forward for groundtruth
  211. m_fwd(m_outputs_truth, m_inputs);
  212. for (auto&& i : m_outputs_truth) {
  213. i.comp_node().sync();
  214. }
  215. // gen loss_p
  216. if (m_need_grad_check) {
  217. float cur_loss_v = 0;
  218. for (size_t i = 0; i < nr_out; ++i) {
  219. if (m_outputs_allow_grad[i]) {
  220. auto nr = m_outputs_truth[i].shape().total_nr_elems();
  221. mgb_assert(nr, "got empty output");
  222. if (opt.cont_loss_p) {
  223. m_loss_p[i]->resize({nr});
  224. auto ptr = m_loss_p[i]->template ptr<float>();
  225. for (size_t j = 0; j < nr; ++j)
  226. ptr[j] = ++cur_loss_v;
  227. } else {
  228. *m_loss_p[i] = *m_gen({nr}, m_comp_node);
  229. auto ptr = m_loss_p[i]->template ptr<float>();
  230. for (size_t j = 0; j < nr; ++j) {
  231. auto v = ptr[j];
  232. bool vsign = v > 0;
  233. v = std::abs(v) + 0.1;
  234. ptr[j] = vsign ? v : -v;
  235. }
  236. }
  237. }
  238. }
  239. }
  240. /*
  241. * for each 3 consecutive runs:
  242. * 0 and 1: m_func generates loss and grads
  243. * 2: m_func generates only grads in fwd, and loss in numdiff
  244. *
  245. * This scheme is used for recompiling the function a few times, so more
  246. * problems can be exposed.
  247. */
  248. if (m_run_cnt % 3 == 0) {
  249. auto spec = m_outspec_loss;
  250. spec.insert(spec.end(), m_outspec_fwd_grad.begin(),
  251. m_outspec_fwd_grad.end());
  252. m_func = m_graph->compile(spec);
  253. } else if (!m_disable_check_loss_grad_seperate_compile &&
  254. m_run_cnt % 3 == 2)
  255. m_func = m_graph->compile(m_outspec_fwd_grad);
  256. m_should_copy_grad = true;
  257. m_func->execute();
  258. m_should_copy_grad = false;
  259. if (m_on_grad_computed)
  260. m_on_grad_computed(m_graph.get(), m_func.get());
  261. for (size_t i = 0; i < nr_out; ++i) {
  262. if (m_outputs_allow_check[i]) {
  263. MGB_ASSERT_TENSOR_NEAR(m_outputs_truth[i], m_outputs[i],
  264. opt.outputs_max_err)
  265. << failstr(ssprintf("output[%zu]", i));
  266. }
  267. }
  268. if (!m_need_grad_check) {
  269. m_failed = false;
  270. return;
  271. }
  272. std::vector<HostTensorND*> numgrad_inp(nr_inp);
  273. if (!m_disable_check_loss_grad_seperate_compile && m_run_cnt % 3 == 2)
  274. m_func = m_graph->compile(m_outspec_loss);
  275. for (size_t i = 0; i < nr_inp; ++i)
  276. if (m_inputs_allow_grad[i])
  277. numgrad_inp[i] = m_inputs[i].get();
  278. else
  279. numgrad_inp[i] = nullptr;
  280. auto cost_f = [this] {
  281. m_func->execute();
  282. mgb_assert(m_loss.shape().is_scalar());
  283. return m_loss.ptr<float>()[0];
  284. };
  285. std::vector<Maybe<float>> numdiff_eps;
  286. for (size_t i = 0; i < nr_inp; ++i) {
  287. if (m_inputs_allow_grad[i]) {
  288. float v = opt.numdiff_eps;
  289. auto&& sv = opt.numdiff_eps_single_inp[i];
  290. if (sv.valid())
  291. v = sv.val();
  292. numdiff_eps.push_back(v);
  293. } else {
  294. numdiff_eps.push_back(None);
  295. }
  296. }
  297. auto numgrad = numerical_diff_pt2(numgrad_inp, cost_f, numdiff_eps);
  298. auto mul2_inplace = [](HostTensorND& t) -> HostTensorND& {
  299. auto ptr = t.ptr<typename DTypeTrait<dtype>::ctype>();
  300. for (size_t j = 0, jt = t.layout().total_nr_elems(); j < jt; ++j) {
  301. ptr[j] *= 2;
  302. }
  303. return t;
  304. };
  305. for (size_t i = 0; i < nr_inp; ++i) {
  306. if (m_inputs_allow_grad[i]) {
  307. auto err = opt.numdiff_max_err;
  308. {
  309. auto&& se = opt.numdiff_max_err_single_inp[i];
  310. if (se.valid())
  311. err = se.val();
  312. }
  313. MGB_ASSERT_TENSOR_NEAR(numgrad.at(i), m_grads[i], err)
  314. << failstr(ssprintf("grad[%zu]", i));
  315. // check that grad2 == 2 * grad
  316. if (m_need_multi_loss_check) {
  317. MGB_ASSERT_TENSOR_NEAR(mul2_inplace(m_grads[i]),
  318. m_grads_mul2[i], err)
  319. << failstr(ssprintf(
  320. "2 * grad[%zu] (grad with another loss var)",
  321. i));
  322. }
  323. }
  324. }
  325. m_failed = false;
  326. }
  327. DEF_IMPL_CHAIN()::run(const ShapeInpArray& shapes, const RunOptions& opt) {
  328. if (!m_built)
  329. build_graph();
  330. if (m_failed) {
  331. mgb_log_error("testcase not executed due to previous error");
  332. return *this;
  333. }
  334. do_run(shapes, opt);
  335. ++m_run_cnt;
  336. return *this;
  337. }
  338. namespace mgb {
  339. // explicit instantialization
  340. #define I(a, b) \
  341. template class AutoOprChecker<a, b, dtype::Float32>; \
  342. template class AutoOprChecker<a, b, dtype::Int32>;
  343. I(1, 1);
  344. I(1, 2);
  345. I(1, 3);
  346. I(1, 4);
  347. I(2, 1);
  348. I(2, 2);
  349. I(2, 4);
  350. I(3, 1);
  351. I(3, 2);
  352. I(3, 3);
  353. I(4, 1);
  354. I(5, 1);
  355. I(6, 1);
  356. #undef I
  357. }
  358. TEST(TestAutoCheck, APlusB) {
  359. using Checker = AutoOprChecker<2, 1>;
  360. auto make_graph =
  361. [](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
  362. return {inputs[0] + inputs[1] * inputs[1]};
  363. };
  364. auto fwd = [](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
  365. DeviceTensorND i0, i1, tmp, out;
  366. i0.copy_from(*inp[0]);
  367. i1.copy_from(*inp[1]);
  368. auto opr = opr::intl::create_megdnn_opr<megdnn::Elemwise>(
  369. dest[0].comp_node());
  370. using Mode = opr::Elemwise::Mode;
  371. opr::Elemwise::perform(Mode::MUL, tmp, {i1, i1}, opr);
  372. opr::Elemwise::perform(Mode::ADD, out, {tmp, i0}, opr);
  373. dest[0].copy_from(out).sync();
  374. };
  375. Checker(make_graph, fwd)
  376. .run({TensorShape{2, 3}, TensorShape{2, 3}})
  377. .run({TensorShape{5, 2, 3}, TensorShape{5, 1, 1}})
  378. .run({TensorShape{2, 3, 4, 5}, TensorShape{1}});
  379. }
  380. #undef DEF_IMPL
  381. #undef DEF_IMPL_CHAIN
  382. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台