GitOrigin-RevId: 0fa3a534af
tags/v1.2.0
| @@ -153,7 +153,7 @@ R"__usage__( | |||||
| Print PID and wait for a line from stdin before starting execution. Useful | Print PID and wait for a line from stdin before starting execution. Useful | ||||
| for waiting for gdb attach. | for waiting for gdb attach. | ||||
| --c-opr-lib <path> | --c-opr-lib <path> | ||||
| Load external operator library. It must implement `mgb_c_opr_init` as the | |||||
| Load external operator library. It must implement MGB_C_OPR_INIT_FUNC_STR as the | |||||
| entry point. | entry point. | ||||
| --thread <num> | --thread <num> | ||||
| Number of threads to run concurrently. All threads perform the same work of | Number of threads to run concurrently. All threads perform the same work of | ||||
| @@ -1223,7 +1223,7 @@ Args Args::from_argv(int argc, char **argv) { | |||||
| auto handle = dlopen(argv[i], RTLD_LAZY); | auto handle = dlopen(argv[i], RTLD_LAZY); | ||||
| mgb_assert(handle, "failed to open c opr lib %s: %s", | mgb_assert(handle, "failed to open c opr lib %s: %s", | ||||
| argv[i], dlerror()); | argv[i], dlerror()); | ||||
| const char* entry = "mgb_c_opr_init"; | |||||
| const char* entry = MGB_C_OPR_INIT_FUNC_STR; | |||||
| auto func = dlsym(handle, entry); | auto func = dlsym(handle, entry); | ||||
| mgb_assert(func, "can not resolve %s: %s", entry, dlerror()); | mgb_assert(func, "can not resolve %s: %s", entry, dlerror()); | ||||
| typedef void (*entry_f_t)(void*); | typedef void (*entry_f_t)(void*); | ||||
| @@ -0,0 +1,29 @@ | |||||
| /** | |||||
| * \file src/core/include/megbrain/graph/extern_copr_api.h | |||||
| * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
| * | |||||
| * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, | |||||
| * software distributed under the License is distributed on an | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
| * implied. | |||||
| */ | |||||
| #pragma once | |||||
| #include "megbrain/graph/bases.h" | |||||
| #include "megbrain/serialization/extern_c_opr.h" | |||||
| namespace mgb { | |||||
| /*! | |||||
| * \brief config extern c opr dynamic param | |||||
| */ | |||||
| void config_extern_c_opr_dynamic_param( | |||||
| std::unique_ptr<cg::AsyncExecutable>& func, | |||||
| std::shared_ptr<ExternCOprParam> param); | |||||
| } // namespace mgb | |||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | |||||
| @@ -9,8 +9,9 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #include "megbrain/comp_node_env.h" | |||||
| #include "megbrain/serialization/extern_c_opr.h" | #include "megbrain/serialization/extern_c_opr.h" | ||||
| #include "megbrain/comp_node_env.h" | |||||
| #include "megbrain/graph/extern_copr_api.h" | |||||
| #include "megbrain/serialization/extern_c_opr_io.h" | #include "megbrain/serialization/extern_c_opr_io.h" | ||||
| #include "megbrain/serialization/opr_load_dump.h" | #include "megbrain/serialization/opr_load_dump.h" | ||||
| @@ -280,11 +281,14 @@ void PlaceholderMGBOprDesc::dump(OprDumpContext& ctx, MGBOprDesc* desc) { | |||||
| /* ===================== ExternCOprRunner ===================== */ | /* ===================== ExternCOprRunner ===================== */ | ||||
| MGB_DYN_TYPE_OBJ_FINAL_IMPL(ExternCOprRunner); | MGB_DYN_TYPE_OBJ_FINAL_IMPL(ExternCOprRunner); | ||||
| ExternCOprRunner::ExternCOprRunner(const VarNodeArray& inputs, | |||||
| ExternCOprRunner::ExternCOprRunner(std::string& name, | |||||
| const VarNodeArray& inputs, | |||||
| std::shared_ptr<MGBOprDesc> desc, | std::shared_ptr<MGBOprDesc> desc, | ||||
| const OperatorNodeConfig& config) | const OperatorNodeConfig& config) | ||||
| : Super{inputs[0]->owner_graph(), config, desc->type_name, inputs}, | : Super{inputs[0]->owner_graph(), config, desc->type_name, inputs}, | ||||
| m_desc{std::move(desc)} { | |||||
| m_desc{std::move(desc)}, | |||||
| m_dump_name{name}, | |||||
| m_param{nullptr} { | |||||
| mgb_assert(m_desc->size == sizeof(MGBOprDesc), | mgb_assert(m_desc->size == sizeof(MGBOprDesc), | ||||
| "invalid MGBOprDesc size: expect=%zu got=%u", sizeof(MGBOprDesc), | "invalid MGBOprDesc size: expect=%zu got=%u", sizeof(MGBOprDesc), | ||||
| m_desc->size); | m_desc->size); | ||||
| @@ -332,10 +336,61 @@ void ExternCOprRunner::init_output_dtype() { | |||||
| output(i)->dtype(dtype_c2cpp(out_dtypes[i])); | output(i)->dtype(dtype_c2cpp(out_dtypes[i])); | ||||
| } | } | ||||
| } | } | ||||
| void ExternCOprRunner::check_param() { | |||||
| //! check extern dynamic param validity | |||||
| //! nr_input=0 or nr_output=0 means do not provide input/output | |||||
| //! ExternDeviceTensor for some case, ExternCOprParam may only config | |||||
| //! device_id, extra_info, etc. so we need consider nr_input=0 or | |||||
| //! nr_output=0 | |||||
| auto check = [](size_t nr_config_tensor, size_t var_node_size, | |||||
| ExternDeviceTensor* e_tensor, | |||||
| const VarNodeArray& var_node_array, const char* msg) { | |||||
| mgb_assert(e_tensor, "%s ExternDeviceTensor should not be null!!", msg); | |||||
| mgb_assert( | |||||
| nr_config_tensor == var_node_size, | |||||
| "param %s size provided by `config_extern_c_opr_dynamic_param` " | |||||
| "mismatch with the number of %s, got %zu, expected %zu", | |||||
| msg, msg, nr_config_tensor, var_node_size); | |||||
| for (size_t i = 0; i < nr_config_tensor; i++) { | |||||
| mgb_assert(e_tensor[i].device_ptr, | |||||
| "%s ExternDeviceTensor(index: %zu) device_ptr should " | |||||
| "not be null!!", | |||||
| msg, i); | |||||
| auto param_shape = e_tensor[i].layout.shape; | |||||
| auto shape = var_node_array.at(i)->shape(); | |||||
| auto param_dtype = e_tensor[i].layout.dtype; | |||||
| auto dtype = dtype_cpp2c(var_node_array.at(i)->dtype()); | |||||
| mgb_assert(param_dtype == dtype, | |||||
| "%s dtype provided mismatch, expected: %u, got: %d", msg, | |||||
| param_dtype, dtype); | |||||
| mgb_assert(shape.ndim == param_shape.ndim, | |||||
| "%s ndim provided mismatch got: %u, expect: %zu of " | |||||
| "index: %zu", | |||||
| msg, param_shape.ndim, shape.ndim, i); | |||||
| for (size_t j = 0; j < shape.ndim; j++) { | |||||
| mgb_assert(param_shape.shape[j] == shape.shape[j], | |||||
| "config %s shape should same with c opr %s shape: " | |||||
| "(got: %u expect: %zu) of index: %zu", | |||||
| msg, msg, param_shape.shape[j], shape.shape[j], j); | |||||
| } | |||||
| } | |||||
| }; | |||||
| if (m_param && m_param->nr_input > 0) { | |||||
| check(m_param->nr_input, input().size(), m_param->input, input(), | |||||
| "input"); | |||||
| } | |||||
| if (m_param && m_param->nr_output > 0) { | |||||
| check(m_param->nr_output, output().size(), m_param->output, output(), | |||||
| "output"); | |||||
| } | |||||
| } | |||||
| void ExternCOprRunner::scn_do_execute() { | void ExternCOprRunner::scn_do_execute() { | ||||
| SmallVector<MGBTensor> c_inp(input().size()), c_out(output().size()); | SmallVector<MGBTensor> c_inp(input().size()), c_out(output().size()); | ||||
| SmallVector<HostTensorND> cpu_inp, cpu_out; | SmallVector<HostTensorND> cpu_inp, cpu_out; | ||||
| check_param(); | |||||
| bool need_copy = false; | bool need_copy = false; | ||||
| if (comp_node().device_type() == CompNode::DeviceType::CPU) { | if (comp_node().device_type() == CompNode::DeviceType::CPU) { | ||||
| @@ -399,27 +454,31 @@ cg::OperatorNodeBase* ExternCOprRunner::make_placeholder( | |||||
| var_inp[i] = inputs[i].node(); | var_inp[i] = inputs[i].node(); | ||||
| } | } | ||||
| return make_from_desc(var_inp, desc, config); | |||||
| auto dump_name = std::string{name}; | |||||
| return make_from_desc(dump_name, var_inp, desc, config); | |||||
| } | } | ||||
| cg::OperatorNodeBase* ExternCOprRunner::make_from_desc( | cg::OperatorNodeBase* ExternCOprRunner::make_from_desc( | ||||
| const VarNodeArray& inputs, MGBOprDesc* desc, | |||||
| std::string& name, const VarNodeArray& inputs, MGBOprDesc* desc, | |||||
| const OperatorNodeConfig& config) { | const OperatorNodeConfig& config) { | ||||
| auto desc_del = [](MGBOprDesc* ptr) { ptr->release(ptr); }; | auto desc_del = [](MGBOprDesc* ptr) { ptr->release(ptr); }; | ||||
| return make_from_desc_shared(inputs, {desc, desc_del}, config); | |||||
| return make_from_desc_shared(name, inputs, {desc, desc_del}, config); | |||||
| } | } | ||||
| cg::OperatorNodeBase* ExternCOprRunner::make_from_desc_shared( | cg::OperatorNodeBase* ExternCOprRunner::make_from_desc_shared( | ||||
| const VarNodeArray& inputs, std::shared_ptr<MGBOprDesc> desc, | |||||
| const OperatorNodeConfig& config) { | |||||
| std::string& name, const VarNodeArray& inputs, | |||||
| std::shared_ptr<MGBOprDesc> desc, const OperatorNodeConfig& config) { | |||||
| mgb_assert(!inputs.empty() && desc->nr_output); | mgb_assert(!inputs.empty() && desc->nr_output); | ||||
| #define CHECK(name) mgb_assert(desc->name, #name " is not given"); | #define CHECK(name) mgb_assert(desc->name, #name " is not given"); | ||||
| MGB_OPR_DESC_FOREACH_MEM_FN(CHECK); | MGB_OPR_DESC_FOREACH_MEM_FN(CHECK); | ||||
| #undef CHECK | #undef CHECK | ||||
| if (!config.name().valid()) | |||||
| const_cast<OperatorNodeConfig&>(config).name(name); | |||||
| auto opr = inputs[0]->owner_graph()->insert_opr( | auto opr = inputs[0]->owner_graph()->insert_opr( | ||||
| std::make_unique<ExternCOprRunner>(inputs, std::move(desc), | |||||
| std::make_unique<ExternCOprRunner>(name, inputs, std::move(desc), | |||||
| config)); | config)); | ||||
| return &opr->cast_final_safe<ExternCOprRunner>(); | return &opr->cast_final_safe<ExternCOprRunner>(); | ||||
| } | } | ||||
| @@ -437,7 +496,11 @@ void ExternCOprRunner::dump(OprDumpContext& ctx, | |||||
| cg::OperatorNodeBase* ExternCOprRunner::load(OprLoadContext& ctx, | cg::OperatorNodeBase* ExternCOprRunner::load(OprLoadContext& ctx, | ||||
| const cg::VarNodeArray& inputs, | const cg::VarNodeArray& inputs, | ||||
| const OperatorNodeConfig& config) { | const OperatorNodeConfig& config) { | ||||
| auto name = ctx.load_buf_with_len(); | |||||
| auto dump_name = ctx.load_buf_with_len(); | |||||
| auto name = dump_name; | |||||
| //! use to compat dump ExternCOprRunner with more info | |||||
| if (auto index = name.find(":")) | |||||
| name = name.substr(0, index); | |||||
| auto&& map = loader_map(); | auto&& map = loader_map(); | ||||
| auto iter = map.find(name); | auto iter = map.find(name); | ||||
| mgb_assert(iter != map.end(), | mgb_assert(iter != map.end(), | ||||
| @@ -448,7 +511,7 @@ cg::OperatorNodeBase* ExternCOprRunner::load(OprLoadContext& ctx, | |||||
| if (auto trans = iter->second.second) { | if (auto trans = iter->second.second) { | ||||
| desc = trans(desc); | desc = trans(desc); | ||||
| } | } | ||||
| return make_from_desc(inputs, desc, config); | |||||
| return make_from_desc(dump_name, inputs, desc, config); | |||||
| } | } | ||||
| cg::OperatorNodeBase* ExternCOprRunner::shallow_copy( | cg::OperatorNodeBase* ExternCOprRunner::shallow_copy( | ||||
| @@ -456,7 +519,8 @@ cg::OperatorNodeBase* ExternCOprRunner::shallow_copy( | |||||
| const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs, | const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs, | ||||
| const OperatorNodeConfig& config) { | const OperatorNodeConfig& config) { | ||||
| auto&& opr = opr_.cast_final_safe<ExternCOprRunner>(); | auto&& opr = opr_.cast_final_safe<ExternCOprRunner>(); | ||||
| return make_from_desc_shared(inputs, opr.m_desc, config); | |||||
| auto dump_name = opr.m_dump_name; | |||||
| return make_from_desc_shared(dump_name, inputs, opr.m_desc, config); | |||||
| } | } | ||||
| MGBTensorShape ExternCOprRunner::tensor_shape_to_c(const TensorShape& shape) { | MGBTensorShape ExternCOprRunner::tensor_shape_to_c(const TensorShape& shape) { | ||||
| @@ -481,6 +545,36 @@ TensorShape ExternCOprRunner::tensor_shape_from_c(const MGBTensorShape& shape) { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| void mgb::config_extern_c_opr_dynamic_param( | |||||
| std::unique_ptr<cg::AsyncExecutable>& func, | |||||
| std::shared_ptr<ExternCOprParam> param) { | |||||
| mgb_throw_if(!param, MegBrainError, "invalid ExternCOprParam param!!"); | |||||
| auto find_config_opr = false; | |||||
| auto cb = [&](cg::OperatorNodeBase* opr) { | |||||
| if (auto c_opr = opr->try_cast_final<opr::ExternCOprRunner>()) { | |||||
| auto dump_name = c_opr->get_dump_name().c_str(); | |||||
| if (!param->extern_c_opr_dump_name || | |||||
| !strncmp(param->extern_c_opr_dump_name, dump_name, | |||||
| strlen(dump_name))) { | |||||
| c_opr->set_param(param); | |||||
| find_config_opr = true; | |||||
| mgb_log_debug("config dynamic param for extern c opr: %s", | |||||
| dump_name); | |||||
| } | |||||
| } | |||||
| return !find_config_opr; | |||||
| }; | |||||
| func->iter_opr_seq(cb); | |||||
| mgb_throw_if(!find_config_opr, MegBrainError, | |||||
| "graph do not include a ExternCOprRunner opr or error config " | |||||
| "extern_c_opr_dump_name!!"); | |||||
| } | |||||
| /* ===================== public APIs ===================== */ | /* ===================== public APIs ===================== */ | ||||
| const MGBExternCOprApi* mgb_get_extern_c_opr_api_versioned(int version) { | const MGBExternCOprApi* mgb_get_extern_c_opr_api_versioned(int version) { | ||||
| auto unreg = [](const char* name) -> int { | auto unreg = [](const char* name) -> int { | ||||
| @@ -26,6 +26,10 @@ extern "C" { | |||||
| #define MGB_C_OPR_INIT_FUNC mgb_c_opr_init | #define MGB_C_OPR_INIT_FUNC mgb_c_opr_init | ||||
| #endif | #endif | ||||
| #define INIT_FUNCS(s) #s | |||||
| #define INIT_FUNC(s) INIT_FUNCS(s) | |||||
| #define MGB_C_OPR_INIT_FUNC_STR INIT_FUNC(MGB_C_OPR_INIT_FUNC) | |||||
| #define MGB_EXTERN_C_OPR_VERSION 0x24 | #define MGB_EXTERN_C_OPR_VERSION 0x24 | ||||
| #define MGB_TENSOR_MAX_NDIM 8 | #define MGB_TENSOR_MAX_NDIM 8 | ||||
| @@ -54,6 +58,51 @@ typedef struct MGBTensor { | |||||
| void* data; //!< the tensor value, accessible by caller CPU thread | void* data; //!< the tensor value, accessible by caller CPU thread | ||||
| } MGBTensor; | } MGBTensor; | ||||
| //! extern device tenosr struct | |||||
| typedef struct ExternDeviceTensor { | |||||
| //! layout of device extern tensor, use to validity check with MGBTensor | |||||
| MGBTensorLayout layout; | |||||
| //! different NPU API has different type define so just define a void * to | |||||
| //! compat all, need loader and SDK implement reinterpret_cast it | |||||
| //! exampe for NNIE, device_ptr may define as | |||||
| //! struct MemoryInfo { | |||||
| //! HI_U64 phy_addr; | |||||
| //! void* vir_addr; | |||||
| //! size_t size = 0; | |||||
| //! } | |||||
| void* device_ptr; | |||||
| } ExternDeviceTensor; | |||||
| //! for dynamic extern c opr param | |||||
| typedef struct ExternCOprParam { | |||||
| //! dump name of extern c opr in graph | |||||
| //! example graph: | |||||
| //! ExternCOpr1(3516:preprocess)->opr->ExternCOpr2(3559)->opr->ExternCOpr3(3516:det_face)... | |||||
| //! extern_c_opr_dump_name config case: | |||||
| //! when set 3516:preprocess, ExternCOpr1 will be config. | |||||
| //! when set 3559, ExternCOpr2 will be config. | |||||
| //! when set 3516:det_face, ExternCOpr3 will be config. | |||||
| //! when set nullptr, will auto config the first ExternCOpr. | |||||
| const char* extern_c_opr_dump_name; | |||||
| //! number of input/output, use to index and check | |||||
| //! if set nr_input = 0, means do not provide input ExternDeviceTensor | |||||
| //! if set nr_output = 0, means do not provide nr_output ExternDeviceTensor | |||||
| size_t nr_input, nr_output; | |||||
| //! ptr of input/output ExternDeviceTensor | |||||
| ExternDeviceTensor* input; | |||||
| ExternDeviceTensor* output; | |||||
| //! device id | |||||
| size_t device_id; | |||||
| //! extra info for misc dynamic config | |||||
| uint8_t* extra_info; | |||||
| //! size of extra_info | |||||
| size_t extra_info_size; | |||||
| } ExternCOprParam; | |||||
| /*! | /*! | ||||
| * \brief operator descriptor | * \brief operator descriptor | ||||
| * | * | ||||
| @@ -93,6 +142,9 @@ typedef struct MGBOprDesc { | |||||
| //! custom user data to be associated with this descriptor | //! custom user data to be associated with this descriptor | ||||
| void* user_data; | void* user_data; | ||||
| //! dynamic extern c opr param | |||||
| ExternCOprParam* dynamic_param; | |||||
| } MGBOprDesc; | } MGBOprDesc; | ||||
| //! foreach member function of MGBOprDesc to help initialization | //! foreach member function of MGBOprDesc to help initialization | ||||
| @@ -22,25 +22,30 @@ namespace opr { | |||||
| MGB_DEFINE_OPR_CLASS(ExternCOprRunner, | MGB_DEFINE_OPR_CLASS(ExternCOprRunner, | ||||
| cg::SingleCNOutshapePureByInshapeOprBase) // { | cg::SingleCNOutshapePureByInshapeOprBase) // { | ||||
| std::shared_ptr<MGBOprDesc> m_desc; | std::shared_ptr<MGBOprDesc> m_desc; | ||||
| //! store ExternCOprRunner opr full dump name | |||||
| std::string m_dump_name; | |||||
| //! store dynamic store param | |||||
| std::shared_ptr<ExternCOprParam> m_param; | |||||
| void get_output_var_shape(const TensorShapeArray& inp_shape, | void get_output_var_shape(const TensorShapeArray& inp_shape, | ||||
| TensorShapeArray& out_shape) const override; | TensorShapeArray& out_shape) const override; | ||||
| void scn_do_execute() override; | void scn_do_execute() override; | ||||
| void add_input_layout_constraint() override; | void add_input_layout_constraint() override; | ||||
| void init_output_dtype() override; | void init_output_dtype() override; | ||||
| void check_param(); | |||||
| static cg::OperatorNodeBase* make_from_desc_shared( | static cg::OperatorNodeBase* make_from_desc_shared( | ||||
| const VarNodeArray& inputs, std::shared_ptr<MGBOprDesc> desc, | |||||
| const OperatorNodeConfig& config); | |||||
| std::string& name, const VarNodeArray& inputs, | |||||
| std::shared_ptr<MGBOprDesc> desc, const OperatorNodeConfig& config); | |||||
| public: | public: | ||||
| ExternCOprRunner(const VarNodeArray& inputs, | |||||
| ExternCOprRunner(std::string& name, const VarNodeArray& inputs, | |||||
| std::shared_ptr<MGBOprDesc> desc, | std::shared_ptr<MGBOprDesc> desc, | ||||
| const OperatorNodeConfig& config); | const OperatorNodeConfig& config); | ||||
| //! create from MGBOprDesc and steal its reference | //! create from MGBOprDesc and steal its reference | ||||
| static cg::OperatorNodeBase* make_from_desc( | static cg::OperatorNodeBase* make_from_desc( | ||||
| const VarNodeArray& inputs, MGBOprDesc* desc, | |||||
| std::string& name, const VarNodeArray& inputs, MGBOprDesc* desc, | |||||
| const OperatorNodeConfig& config = {}); | const OperatorNodeConfig& config = {}); | ||||
| /*! | /*! | ||||
| @@ -87,6 +92,15 @@ public: | |||||
| //! helper for converting MGBTensorShape to TensorShape | //! helper for converting MGBTensorShape to TensorShape | ||||
| static TensorShape tensor_shape_from_c(const MGBTensorShape& shape); | static TensorShape tensor_shape_from_c(const MGBTensorShape& shape); | ||||
| const std::string& get_dump_name() { | |||||
| return m_dump_name; | |||||
| } | |||||
| void set_param(const std::shared_ptr<ExternCOprParam>& param) { | |||||
| m_param = param; | |||||
| m_desc->dynamic_param = m_param.get(); | |||||
| } | |||||
| }; | }; | ||||
| } // namespace opr | } // namespace opr | ||||
| @@ -9,6 +9,8 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #include <memory> | |||||
| #include "megbrain/graph/extern_copr_api.h" | |||||
| #include "megbrain/opr/io.h" | #include "megbrain/opr/io.h" | ||||
| #include "megbrain/opr/utility.h" | #include "megbrain/opr/utility.h" | ||||
| #include "megbrain/serialization/extern_c_opr_io.h" | #include "megbrain/serialization/extern_c_opr_io.h" | ||||
| @@ -68,31 +70,50 @@ class MGBOprDescImpl { | |||||
| static void execute(const MGBOprDesc* self, const MGBTensor* input, | static void execute(const MGBOprDesc* self, const MGBTensor* input, | ||||
| const MGBTensor* output) { | const MGBTensor* output) { | ||||
| if (self->dynamic_param) { | |||||
| auto device_id = self->dynamic_param->device_id; | |||||
| mgb_assert(0 == device_id || 8 == device_id); | |||||
| } | |||||
| bool use_extern_input = | |||||
| (self->dynamic_param && self->dynamic_param->nr_input > 0) | |||||
| ? true | |||||
| : false; | |||||
| bool use_extern_output = | |||||
| (self->dynamic_param && self->dynamic_param->nr_output > 0) | |||||
| ? true | |||||
| : false; | |||||
| auto&& i = input[0].layout; | auto&& i = input[0].layout; | ||||
| auto&& o = output[0].layout; | auto&& o = output[0].layout; | ||||
| mgb_assert(i.shape.ndim == 1 && o.shape.ndim == 1 && | mgb_assert(i.shape.ndim == 1 && o.shape.ndim == 1 && | ||||
| i.shape.shape[0] == o.shape.shape[0]); | i.shape.shape[0] == o.shape.shape[0]); | ||||
| mgb_assert(i.dtype == MGB_DTYPE_FLOAT32 && o.dtype == out_dtype); | mgb_assert(i.dtype == MGB_DTYPE_FLOAT32 && o.dtype == out_dtype); | ||||
| auto pi = static_cast<float*>(input[0].data); | |||||
| auto input_p = static_cast<float*>(input[0].data); | |||||
| if (use_extern_input) | |||||
| input_p = static_cast<float*>( | |||||
| self->dynamic_param->input[0].device_ptr); | |||||
| auto bias = user_data(self)->bias; | auto bias = user_data(self)->bias; | ||||
| if (out_dtype == MGB_DTYPE_FLOAT32) { | if (out_dtype == MGB_DTYPE_FLOAT32) { | ||||
| auto po = static_cast<float*>(output[0].data); | |||||
| auto output_p = static_cast<float*>(output[0].data); | |||||
| if (use_extern_output) | |||||
| output_p = static_cast<float*>( | |||||
| self->dynamic_param->output[0].device_ptr); | |||||
| for (size_t x = 0; x < i.shape.shape[0]; ++x) { | for (size_t x = 0; x < i.shape.shape[0]; ++x) { | ||||
| po[x] = pi[x] + bias; | |||||
| output_p[x] = input_p[x] + bias; | |||||
| } | } | ||||
| } else if (MEGDNN_FLOAT16_SELECT(out_dtype == MGB_DTYPE_FLOAT16, | } else if (MEGDNN_FLOAT16_SELECT(out_dtype == MGB_DTYPE_FLOAT16, | ||||
| false)) { | false)) { | ||||
| #if !MEGDNN_DISABLE_FLOAT16 | #if !MEGDNN_DISABLE_FLOAT16 | ||||
| auto po = static_cast<dt_float16*>(output[0].data); | |||||
| auto output_p = static_cast<dt_float16*>(output[0].data); | |||||
| for (size_t x = 0; x < i.shape.shape[0]; ++x) { | for (size_t x = 0; x < i.shape.shape[0]; ++x) { | ||||
| po[x] = pi[x] + bias; | |||||
| output_p[x] = input_p[x] + bias; | |||||
| } | } | ||||
| #endif | #endif | ||||
| } else { | } else { | ||||
| mgb_assert(out_dtype == MGB_DTYPE_INT32); | mgb_assert(out_dtype == MGB_DTYPE_INT32); | ||||
| auto po = static_cast<int32_t*>(output[0].data); | |||||
| auto output_p = static_cast<int32_t*>(output[0].data); | |||||
| for (size_t x = 0; x < i.shape.shape[0]; ++x) { | for (size_t x = 0; x < i.shape.shape[0]; ++x) { | ||||
| po[x] = pi[x] + bias; | |||||
| output_p[x] = input_p[x] + bias; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -185,7 +206,7 @@ std::vector<uint8_t> create_graph_dump(float bias, float extra_scale, | |||||
| x = opr::ExternCOprRunner::make_placeholder( | x = opr::ExternCOprRunner::make_placeholder( | ||||
| {x}, {TensorShape{1}}, | {x}, {TensorShape{1}}, | ||||
| dtype == MGB_DTYPE_FLOAT32 | dtype == MGB_DTYPE_FLOAT32 | ||||
| ? "bias_adder_dump" | |||||
| ? "bias_adder_dump:test" | |||||
| : (dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32" | : (dtype == MGB_DTYPE_INT32 ? "bias_adder_dump_i32" | ||||
| : "bias_adder_dump_f16"), | : "bias_adder_dump_f16"), | ||||
| &bias, sizeof(bias), {}, {dtype_c2cpp(dtype)}) | &bias, sizeof(bias), {}, {dtype_c2cpp(dtype)}) | ||||
| @@ -238,6 +259,65 @@ void check_dump_by_compute(std::unique_ptr<serialization::InputFile> input_file, | |||||
| MGB_ASSERT_TENSOR_EQ(y_expect, host_y); | MGB_ASSERT_TENSOR_EQ(y_expect, host_y); | ||||
| } | } | ||||
| void check_dump_by_compute_with_param( | |||||
| std::unique_ptr<serialization::InputFile> input_file, CompNode cn, | |||||
| MGBDType dtype, float bias, std::shared_ptr<ExternCOprParam> param) { | |||||
| GraphLoadConfig config; | |||||
| config.comp_node_mapper = [loc = cn.locator()](CompNode::Locator& t) { | |||||
| t = loc; | |||||
| }; | |||||
| auto loader = GraphLoader::make(std::move(input_file)); | |||||
| auto load_ret = loader->load(config); | |||||
| load_ret.graph->options().var_sanity_check_first_run = false; | |||||
| SymbolVar y; | |||||
| unpack_vector(load_ret.output_var_list, y); | |||||
| HostTensorGenerator<> gen; | |||||
| auto host_x = load_ret.tensor_map.begin()->second; | |||||
| *host_x = *gen({23}, cn); | |||||
| HostTensorND y_expect; | |||||
| y_expect.copy_from(*host_x); | |||||
| { | |||||
| auto py = y_expect.ptr<float>(); | |||||
| float* extern_input_device_ptr = nullptr; | |||||
| if (param->nr_input && param->input && param->input->device_ptr) { | |||||
| extern_input_device_ptr = | |||||
| static_cast<float*>(param->input->device_ptr); | |||||
| } | |||||
| for (int i = 0; i < 23; ++i) { | |||||
| float t = 0; | |||||
| //! this test code is run before config_extern_c_opr_dynamic_param | |||||
| //! so we need double child member ptr is valid or not | |||||
| if (param->nr_input && param->input && param->input->device_ptr) { | |||||
| t = extern_input_device_ptr[i] + bias; | |||||
| } else { | |||||
| t = py[i] + bias; | |||||
| } | |||||
| if (dtype == MGB_DTYPE_INT32) { | |||||
| t = int(t); | |||||
| #if !MEGDNN_DISABLE_FLOAT16 | |||||
| } else if (dtype == MGB_DTYPE_FLOAT16) { | |||||
| t = dt_float16(t); | |||||
| #endif | |||||
| } | |||||
| py[i] = t; | |||||
| } | |||||
| } | |||||
| HostTensorND host_y; | |||||
| auto func = load_ret.graph->compile({make_callback_copy(y, host_y)}); | |||||
| config_extern_c_opr_dynamic_param(func, param); | |||||
| func->execute(); | |||||
| if (param->nr_output) { | |||||
| auto ph = host_y.ptr<float>(); | |||||
| auto outp = static_cast<float*>(param->output->device_ptr); | |||||
| for (int i = 0; i < 23; ++i) { | |||||
| ph[i] = outp[i]; | |||||
| } | |||||
| } | |||||
| MGB_ASSERT_TENSOR_EQ(y_expect, host_y); | |||||
| } | |||||
| void run_compute_test(CompNode cn, MGBDType dtype) { | void run_compute_test(CompNode cn, MGBDType dtype) { | ||||
| float bias = 1.2, scale = -2.1; | float bias = 1.2, scale = -2.1; | ||||
| auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype); | auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype); | ||||
| @@ -245,8 +325,138 @@ void run_compute_test(CompNode cn, MGBDType dtype) { | |||||
| InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, | InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, | ||||
| dtype, bias, scale); | dtype, bias, scale); | ||||
| } | } | ||||
| void run_compute_test_with_param(CompNode cn, MGBDType dtype, | |||||
| std::shared_ptr<ExternCOprParam> param) { | |||||
| float bias = 1.2, scale = 0; | |||||
| auto graph_dump = create_graph_dump(bias, scale, 0.3, dtype); | |||||
| check_dump_by_compute_with_param( | |||||
| InputFile::make_mem_proxy(graph_dump.data(), graph_dump.size()), cn, | |||||
| dtype, bias, param); | |||||
| } | |||||
| } // namespace | } // namespace | ||||
| TEST(TestExternCOpr, ExternCOprParam) { | |||||
| //! same with check_dump_by_compute_with_param | |||||
| constexpr int input_output_size = 23; | |||||
| auto c_opr_param = std::make_shared<ExternCOprParam>(); | |||||
| MGBTensorLayout input_layput, output_layput; | |||||
| ExternDeviceTensor input, output; | |||||
| float* input_device_ptr = (float*)malloc(input_output_size * sizeof(float)); | |||||
| float* output_device_ptr = | |||||
| (float*)malloc(input_output_size * sizeof(float)); | |||||
| auto reset = [&] { | |||||
| memset(c_opr_param.get(), 0, sizeof(ExternCOprParam)); | |||||
| memset(&input_layput, 0, sizeof(MGBTensorLayout)); | |||||
| memset(&input, 0, sizeof(ExternDeviceTensor)); | |||||
| memset(&output_layput, 0, sizeof(MGBTensorLayout)); | |||||
| memset(&output, 0, sizeof(ExternDeviceTensor)); | |||||
| memset(input_device_ptr, 0, input_output_size * sizeof(float)); | |||||
| memset(output_device_ptr, 0, input_output_size * sizeof(float)); | |||||
| for (size_t i = 0; i < input_output_size; i++) { | |||||
| input_device_ptr[i] = i; | |||||
| } | |||||
| }; | |||||
| auto run_test = [&] { | |||||
| run_compute_test_with_param(CompNode::load("cpux"), MGB_DTYPE_FLOAT32, | |||||
| c_opr_param); | |||||
| }; | |||||
| auto init_param = [&] { | |||||
| reset(); | |||||
| c_opr_param->nr_input = 1; | |||||
| input_layput.shape = {1, {input_output_size}}; | |||||
| input.layout = input_layput; | |||||
| input.device_ptr = input_device_ptr; | |||||
| c_opr_param->input = &input; | |||||
| c_opr_param->nr_output = 1; | |||||
| output_layput.shape = {1, {input_output_size}}; | |||||
| output.layout = output_layput; | |||||
| output.device_ptr = output_device_ptr; | |||||
| c_opr_param->output = &output; | |||||
| }; | |||||
| //! run with null param | |||||
| reset(); | |||||
| run_test(); | |||||
| //! run with full param | |||||
| init_param(); | |||||
| run_test(); | |||||
| //! run with a right index | |||||
| init_param(); | |||||
| c_opr_param->extern_c_opr_dump_name = "bias_adder_dump:test"; | |||||
| run_test(); | |||||
| //! set a wrong index | |||||
| init_param(); | |||||
| c_opr_param->extern_c_opr_dump_name = "bias_adder_dump"; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set a wrong index | |||||
| init_param(); | |||||
| c_opr_param->extern_c_opr_dump_name = "sdfsdfs"; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong input | |||||
| init_param(); | |||||
| c_opr_param->input = nullptr; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong nr_input | |||||
| init_param(); | |||||
| c_opr_param->nr_input = 3; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong input device_ptr | |||||
| init_param(); | |||||
| c_opr_param->input->device_ptr = nullptr; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong input shape | |||||
| init_param(); | |||||
| c_opr_param->input->layout.shape.shape[0] = input_output_size - 2; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong output | |||||
| init_param(); | |||||
| c_opr_param->output = nullptr; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong nr_output | |||||
| init_param(); | |||||
| c_opr_param->nr_output = 3; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong output device_ptr | |||||
| init_param(); | |||||
| c_opr_param->output->device_ptr = nullptr; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong output shape | |||||
| init_param(); | |||||
| c_opr_param->output->layout.shape.shape[0] = input_output_size - 2; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! set wrong dtype(test MGB_DTYPE_FLOAT32) | |||||
| init_param(); | |||||
| c_opr_param->input[0].layout.dtype = MGB_DTYPE_INT32; | |||||
| ASSERT_THROW(run_test(), MegBrainError); | |||||
| //! test only device_id | |||||
| reset(); | |||||
| c_opr_param->device_id = 8; | |||||
| run_test(); | |||||
| //! free | |||||
| free(input_device_ptr); | |||||
| free(output_device_ptr); | |||||
| } | |||||
| TEST(TestExternCOpr, CPUCompute) { | TEST(TestExternCOpr, CPUCompute) { | ||||
| run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT32); | run_compute_test(CompNode::load("cpux"), MGB_DTYPE_FLOAT32); | ||||
| } | } | ||||
| @@ -280,8 +490,9 @@ TEST(TestExternCOpr, Dedup) { | |||||
| auto graph = ComputingGraph::make(); | auto graph = ComputingGraph::make(); | ||||
| auto x = opr::Host2DeviceCopy::make(*graph, host_x); | auto x = opr::Host2DeviceCopy::make(*graph, host_x); | ||||
| auto make_opr = [x](float bias) { | auto make_opr = [x](float bias) { | ||||
| std::string name = "test"; | |||||
| return opr::ExternCOprRunner::make_from_desc( | return opr::ExternCOprRunner::make_from_desc( | ||||
| {x.node()}, MGBOprDescImpl<>::make(bias)); | |||||
| name, {x.node()}, MGBOprDescImpl<>::make(bias)); | |||||
| }; | }; | ||||
| auto y0 = make_opr(0.5), y1 = make_opr(0.6), y2 = make_opr(0.5); | auto y0 = make_opr(0.5), y1 = make_opr(0.6), y2 = make_opr(0.5); | ||||
| ASSERT_EQ(y0, y2); | ASSERT_EQ(y0, y2); | ||||
| @@ -42,6 +42,10 @@ typedef struct MGBTensor { | |||||
| void* data; //!< the tensor value, accessible by caller CPU thread | void* data; //!< the tensor value, accessible by caller CPU thread | ||||
| } MGBTensor; | } MGBTensor; | ||||
| typedef struct ExternCOprParam { | |||||
| //! just for build | |||||
| size_t _; | |||||
| } ExternCOprParam; | |||||
| /*! | /*! | ||||
| * \brief operator descriptor | * \brief operator descriptor | ||||
| * | * | ||||
| @@ -74,6 +78,9 @@ typedef struct MGBOprDesc { | |||||
| //! custom user data to be associated with this descriptor | //! custom user data to be associated with this descriptor | ||||
| void* user_data; | void* user_data; | ||||
| //! dynamic extern c opr param | |||||
| ExternCOprParam* dynamic_param; | |||||
| } MGBOprDesc; | } MGBOprDesc; | ||||
| //! foreach member function of MGBOprDesc to help initialization | //! foreach member function of MGBOprDesc to help initialization | ||||