You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

magicmind_runtime_opr.cpp 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /**
  2. * \file src/cambricon/impl/magicmind_runtime_opr.cpp
  3. * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  4. *
  5. * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  6. *
  7. * Unless required by applicable law or agreed to in writing,
  8. * software distributed under the License is distributed on an
  9. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. */
  11. #include "megbrain/cambricon/magicmind_runtime_opr.h"
  12. #include "megbrain/common.h"
  13. #include "megbrain/comp_node_env.h"
  14. #if MGB_CAMBRICON
  15. using namespace mgb;
  16. using namespace opr;
  17. using namespace magicmind;
  18. namespace {
  19. Dims mgb_shape_to_mm_dims(TensorShape mgb_shp) {
  20. size_t ndim = mgb_shp.ndim;
  21. std::vector<int64_t> dimensions(ndim);
  22. for (size_t i = 0; i < ndim; ++i) {
  23. dimensions[i] = mgb_shp[i];
  24. }
  25. return Dims{dimensions};
  26. }
  27. TensorShape mm_dims_to_mgb_shape(const Dims& dims) {
  28. TensorShape ret;
  29. ret.ndim = dims.GetDimsNum();
  30. auto&& dimensions = dims.GetDims();
  31. for (size_t i = 0; i < ret.ndim; ++i) {
  32. ret[i] = dimensions[i];
  33. }
  34. return ret;
  35. }
  36. DType mm_dtype_to_mgb_dtype(DataType data_type) {
  37. switch (data_type) {
  38. case DataType::FLOAT16:
  39. #if !MEGDNN_DISABLE_FLOAT16
  40. return dtype::Float16();
  41. #else
  42. mgb_throw(MegBrainError, "Float16 support is disabled at compile time.");
  43. #endif
  44. case DataType::FLOAT32:
  45. return dtype::Float32();
  46. case DataType::INT8:
  47. return dtype::QuantizedS8(1.f);
  48. case DataType::INT16:
  49. return dtype::Int16();
  50. case DataType::INT32:
  51. return dtype::Int32();
  52. case DataType::UINT8:
  53. return dtype::Uint8();
  54. //! TODO: check scale
  55. case DataType::QINT8:
  56. return dtype::QuantizedS8(1.f);
  57. case DataType::INT4:
  58. return dtype::QuantizedS4(1.f);
  59. case DataType::UINT4:
  60. return dtype::Quantized4Asymm(1.f, static_cast<uint8_t>(8));
  61. default:
  62. mgb_throw(
  63. MegBrainError, "DataType %u is not supported by MegEngine.",
  64. static_cast<uint32_t>(data_type));
  65. }
  66. }
  67. DataType mgb_dtype_to_mm_dtype(DType data_type) {
  68. switch (data_type.enumv()) {
  69. #if !MEGDNN_DISABLE_FLOAT16
  70. case DTypeEnum::Float16:
  71. return DataType::FLOAT16;
  72. #endif
  73. case DTypeEnum::Float32:
  74. return DataType::FLOAT32;
  75. case DTypeEnum::QuantizedS8:
  76. return DataType::QINT8;
  77. case DTypeEnum::Int8:
  78. return DataType::INT8;
  79. case DTypeEnum::Int32:
  80. return DataType::INT32;
  81. case DTypeEnum::Uint8:
  82. return DataType::UINT8;
  83. case DTypeEnum::QuantizedS4:
  84. return DataType::INT4;
  85. case DTypeEnum::Quantized4Asymm:
  86. return DataType::UINT4;
  87. default:
  88. mgb_throw(
  89. MegBrainError,
  90. "megengine data type %s is not supported by magicmind.",
  91. data_type.name());
  92. }
  93. }
  94. }; // namespace
  95. /* =========== MagicMindRuntimeOpr::CambriconAllocator =========== */
  96. class MagicMindRuntimeOpr::CambriconAllocator final : public IAllocator {
  97. CompNode m_cn;
  98. std::mutex m_ptr2size_mtx;
  99. ThinHashMap<void*, size_t> m_ptr2size;
  100. public:
  101. explicit CambriconAllocator(CompNode cn);
  102. ~CambriconAllocator() noexcept;
  103. void* AllocateRaw(size_t size, size_t alignment) override;
  104. void DeallocateRaw(void* ptr) override;
  105. CompNode comp_node() const { return m_cn; }
  106. };
  107. MagicMindRuntimeOpr::CambriconAllocator::CambriconAllocator(CompNode cn) : m_cn{cn} {
  108. mgb_assert(
  109. cn.device_type() == CompNode::DeviceType::CAMBRICON,
  110. "invalid comp node %s for CambriconAllocator", cn.to_string().c_str());
  111. }
  112. MagicMindRuntimeOpr::CambriconAllocator::~CambriconAllocator() noexcept {
  113. MGB_LOCK_GUARD(m_ptr2size_mtx);
  114. if (!m_ptr2size.empty()) {
  115. std::string msg{"there are unreleased magicmind mem buffers:\n"};
  116. for (auto&& i : m_ptr2size) {
  117. msg.append(ssprintf(" %p: %zu\n", i.first, i.second));
  118. }
  119. mgb_log_error("%sabort now", msg.c_str());
  120. mgb_trap();
  121. }
  122. }
  123. void* MagicMindRuntimeOpr::CambriconAllocator::AllocateRaw(
  124. size_t size, size_t alignment) {
  125. static bool enable_log = getenv("MGE_LOG_MAGICMIND_MEM_ALLOC");
  126. mgb_assert(!(alignment & (alignment - 1)), "invalid alignment(%zu)", alignment);
  127. auto ret = m_cn.alloc_device(size);
  128. mgb_assert(
  129. !(reinterpret_cast<uintptr_t>(ret) & (alignment - 1)),
  130. "alignment not required(ptr:%p,alignment:%zu)", ret, alignment);
  131. if (enable_log) {
  132. mgb_log("magicmind mem alloc on %s: size=%zu, align=%zu, ptr=%p",
  133. m_cn.to_string().c_str(), size, alignment, ret);
  134. }
  135. {
  136. MGB_LOCK_GUARD(m_ptr2size_mtx);
  137. m_ptr2size[ret] = size;
  138. }
  139. return ret;
  140. }
  141. void MagicMindRuntimeOpr::CambriconAllocator::DeallocateRaw(void* ptr) {
  142. {
  143. auto iter = m_ptr2size.find(ptr);
  144. mgb_assert(iter != m_ptr2size.end(), "ptr %p not found", ptr);
  145. m_ptr2size.erase(iter);
  146. }
  147. m_cn.free_device(ptr);
  148. }
  149. /* ====================== MagicMindRuntimeOpr ==================== */
  150. MGB_DYN_TYPE_OBJ_FINAL_IMPL(MagicMindRuntimeOpr);
  151. MagicMindRuntimeOpr::MagicMindRuntimeOpr(
  152. IModelPtr model, CambriconAllocatorPtr allocator, const VarNodeArray& inputs,
  153. const OperatorNodeConfig& config)
  154. : Super(inputs[0]->owner_graph(), config, "magic_runtime", inputs),
  155. m_allocator{std::move(allocator)},
  156. m_context{nullptr},
  157. m_engine{nullptr},
  158. m_model{std::move(model)} {
  159. mgb_assert(
  160. inputs[0]->comp_node().device_type() == CompNode::DeviceType::CAMBRICON,
  161. "MagicMindRuntimeOpr can only be used on cambricon comp node; "
  162. "got %s",
  163. inputs[0]->comp_node().to_string().c_str());
  164. size_t nr_inputs = m_model->GetInputNum();
  165. mgb_assert(
  166. nr_inputs == inputs.size(), "input number mismatch(got:%zu,expected:%zu)",
  167. inputs.size(), nr_inputs);
  168. for (auto i : inputs) {
  169. add_input({i});
  170. }
  171. size_t nr_outputs = m_model->GetOutputNum();
  172. for (size_t i = 0; i < nr_outputs; ++i) {
  173. add_output(m_model->GetOutputName(i));
  174. }
  175. IModel::EngineConfig engine_config;
  176. engine_config.device_type = "MLU";
  177. engine_config.allocator = m_allocator.get();
  178. auto&& cnrt_env = CompNodeEnv::from_comp_node(m_allocator->comp_node()).cnrt_env();
  179. cnrt_env.activate();
  180. m_engine = {
  181. m_model->CreateIEngine(engine_config),
  182. magicmind_intl::MagicMindDeleter<IEngine>()};
  183. mgb_assert(
  184. m_engine != nullptr,
  185. "create IEngine failed, corresponding MagicMindRuntimeOpr(%s)", cname());
  186. cg::add_workspace_output(this);
  187. add_equivalence_component<mgb::ScalarHash<void*>>(m_model.get());
  188. };
  189. void MagicMindRuntimeOpr::scn_do_execute() {
  190. mgb_assert(m_engine != nullptr);
  191. mgb_assert(m_context != nullptr);
  192. auto&& cnrt_env = CompNodeEnv::from_comp_node(input(0)->comp_node()).cnrt_env();
  193. cnrt_env.activate();
  194. std::vector<IRTTensor*> inputs, outputs;
  195. MM_CHECK(CreateInputTensors(m_context.get(), &inputs));
  196. MM_CHECK(CreateInputTensors(m_context.get(), &outputs));
  197. size_t nr_inputs = input().size();
  198. mgb_assert(nr_inputs == inputs.size());
  199. for (size_t i = 0; i < nr_inputs; ++i) {
  200. auto&& iname = m_model->GetInputName(i);
  201. auto tensor = FindIRTTensorByName(inputs, iname);
  202. mgb_assert(
  203. tensor != nullptr, "failed to find input tensor(name:%s)",
  204. iname.c_str());
  205. MM_CHECK(tensor->SetDimensions(mgb_shape_to_mm_dims(input(i)->shape())));
  206. MM_CHECK(tensor->SetData(input(i)->dev_tensor().raw_ptr()));
  207. }
  208. size_t nr_outputs = output().size();
  209. mgb_assert(nr_outputs == outputs.size() + 1);
  210. for (size_t i = 0; i < nr_outputs - 1; ++i) {
  211. auto&& oname = m_model->GetOutputName(i);
  212. auto tensor = FindIRTTensorByName(outputs, oname);
  213. mgb_assert(
  214. tensor != nullptr, "failed to find output tensor(name:%s)",
  215. oname.c_str());
  216. MM_CHECK(tensor->SetDimensions(mgb_shape_to_mm_dims(output(i)->shape())));
  217. MM_CHECK(tensor->SetData(output(i)->dev_tensor().raw_ptr()));
  218. }
  219. auto size = output().back()->dev_tensor().layout().span().dist_byte();
  220. MM_CHECK(m_context->SetWorkspace(output().back()->dev_tensor().raw_ptr(), size));
  221. MM_CHECK(m_context->Enqueue(inputs, outputs, cnrt_env.queue));
  222. for (auto&& i : inputs) {
  223. i->SetData(nullptr);
  224. i->Destroy();
  225. }
  226. for (auto&& o : outputs) {
  227. o->SetData(nullptr);
  228. o->Destroy();
  229. }
  230. }
  231. void MagicMindRuntimeOpr::get_output_var_shape(
  232. const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const {
  233. mgb_assert(m_engine != nullptr);
  234. mgb_assert(input().size() == inp_shape.size());
  235. auto&& cnrt_env = CompNodeEnv::from_comp_node(input(0)->comp_node()).cnrt_env();
  236. cnrt_env.activate();
  237. if (m_context == nullptr) {
  238. m_context = {
  239. m_engine->CreateIContext(),
  240. magicmind_intl::MagicMindDeleter<IContext>()};
  241. mgb_assert(
  242. m_context != nullptr,
  243. "failed to create IContext, corresponding MagicMindRuntimeOpr(%s)",
  244. cname());
  245. }
  246. std::vector<IRTTensor*> inputs, outputs;
  247. MM_CHECK(CreateInputTensors(m_context.get(), &inputs));
  248. MM_CHECK(CreateInputTensors(m_context.get(), &outputs));
  249. size_t nr_inputs = input().size();
  250. mgb_assert(nr_inputs == inputs.size());
  251. for (size_t i = 0; i < nr_inputs; ++i) {
  252. auto&& iname = m_model->GetInputName(i);
  253. auto tensor = FindIRTTensorByName(inputs, iname);
  254. mgb_assert(
  255. tensor != nullptr, "failed to find input tensor(name:%s)",
  256. iname.c_str());
  257. MM_CHECK(tensor->SetDimensions(mgb_shape_to_mm_dims(input(i)->shape())));
  258. }
  259. if (Status::OK() == m_context->InferOutputShape(inputs, outputs)) {
  260. size_t nr_outputs = output().size();
  261. mgb_assert(nr_outputs == outputs.size() + 1);
  262. for (size_t i = 0; i < nr_outputs - 1; ++i) {
  263. auto&& oname = m_model->GetOutputName(i);
  264. auto tensor = FindIRTTensorByName(outputs, oname);
  265. mgb_assert(
  266. tensor != nullptr, "failed to find output tensor(name:%s)",
  267. oname.c_str());
  268. auto&& dims = tensor->GetDimensions();
  269. out_shape[i] = mm_dims_to_mgb_shape(dims);
  270. }
  271. std::vector<Dims> shape(inp_shape.size());
  272. for (size_t i = 0; i < nr_inputs; ++i) {
  273. shape[i] = mgb_shape_to_mm_dims(input(i)->shape());
  274. }
  275. size_t wk_size = 0;
  276. MM_CHECK(m_engine->QueryContextMaxWorkspaceSize(shape, &wk_size));
  277. out_shape.back() = {wk_size};
  278. } else {
  279. mgb_assert(
  280. false, "static shape infer for MagicMindRuntimeOpr(%s) failed",
  281. cname());
  282. }
  283. for (auto&& i : inputs) {
  284. i->SetData(nullptr);
  285. i->Destroy();
  286. }
  287. for (auto&& o : outputs) {
  288. o->SetData(nullptr);
  289. o->Destroy();
  290. }
  291. }
  292. void MagicMindRuntimeOpr::add_input_layout_constraint() {
  293. //! default contiguous
  294. for (auto i : input()) {
  295. i->add_layout_constraint_contiguous();
  296. }
  297. }
  298. void MagicMindRuntimeOpr::init_output_dtype() {
  299. std::vector<DataType> inp_dtypes = m_model->GetInputDataTypes();
  300. mgb_assert(
  301. inp_dtypes.size() == input().size(),
  302. "input size mismatch(got:%zu,expected:%zu)", inp_dtypes.size(),
  303. input().size());
  304. size_t nr_inputs = input().size();
  305. for (size_t i = 0; i < nr_inputs; ++i) {
  306. auto dt_mm = mm_dtype_to_mgb_dtype(inp_dtypes[i]);
  307. auto dt_inp = input(i)->dtype();
  308. MGB_MARK_USED_VAR(dt_mm);
  309. MGB_MARK_USED_VAR(dt_inp);
  310. mgb_assert(
  311. dt_mm.valid() && dt_inp.valid() && dt_mm.enumv() == dt_inp.enumv(),
  312. "input %zu's data type mismatch with that in "
  313. "IModel: expected %s, got %s",
  314. i, dt_mm.name(), dt_inp.name());
  315. }
  316. std::vector<DataType> out_dtypes = m_model->GetOutputDataTypes();
  317. mgb_assert(
  318. out_dtypes.size() == output().size(),
  319. "output size mismatch(got:%zu,expected:%zu)", out_dtypes.size(),
  320. output().size());
  321. size_t nr_outputs = output().size();
  322. for (size_t i = 0; i < nr_outputs; ++i) {
  323. auto dt_mm = mm_dtype_to_mgb_dtype(out_dtypes[i]);
  324. mgb_assert(
  325. dt_mm.valid(), "output dtype checking failed: invalid dtype returned.");
  326. if (dt_mm.enumv() == DTypeEnum::QuantizedS8) {
  327. mgb_assert(
  328. output(i)->dtype().valid(),
  329. "user should specify scale of output tensor of "
  330. "MagicMindRuntimeOpr.");
  331. }
  332. if (!output(i)->dtype().valid())
  333. output(i)->dtype(dt_mm);
  334. }
  335. }
  336. SymbolVarArray MagicMindRuntimeOpr::make(
  337. IModelPtr model, CambriconAllocatorPtr allocator, const SymbolVarArray& src,
  338. const OperatorNodeConfig& config) {
  339. VarNodeArray var_node_array = cg::to_var_node_array(src);
  340. auto magicmind_runtime_opr = std::make_unique<MagicMindRuntimeOpr>(
  341. std::move(model), std::move(allocator), var_node_array, config);
  342. auto ret = cg::to_symbol_var_array(
  343. src[0].node()
  344. ->owner_graph()
  345. ->insert_opr(std::move(magicmind_runtime_opr))
  346. ->output());
  347. ret.pop_back(); // remove workspace
  348. return ret;
  349. }
  350. SymbolVarArray MagicMindRuntimeOpr::make(
  351. const void* buf, size_t size, const SymbolVarArray& src,
  352. const OperatorNodeConfig& config) {
  353. mgb_throw_if(
  354. !CompNode::get_device_count(CompNode::DeviceType::CAMBRICON), SystemError,
  355. "can not create MagicMindRuntimeOpr when MagicMind is not "
  356. "available");
  357. auto cambricon_allocator =
  358. std::make_shared<CambriconAllocator>(src[0].node()->comp_node());
  359. IModelPtr model = make_model_ptr(CreateIModel());
  360. model->DeserializeFromMemory(const_cast<void*>(buf), size);
  361. return make(std::move(model), std::move(cambricon_allocator), src, config);
  362. }
  363. #endif // MGB_CAMBRICON
  364. // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}