| @@ -108,11 +108,11 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector< | |||
| } | |||
| break; | |||
| } | |||
| if (index == in_tensors.size()) { | |||
| MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor " | |||
| << model_name_; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| if (index == in_tensors.size()) { | |||
| MS_LOG(ERROR) << "Can't find corresponding ms lite tensor of " << i << " input tensor for npu executor " | |||
| << model_name_; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| context.AddPara("model_name", model_name_); | |||
| @@ -27,9 +27,7 @@ | |||
| #include "include/HiAiModelManagerService.h" | |||
| namespace mindspore::lite { | |||
| static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = { | |||
| schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize, | |||
| schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion}; | |||
| struct SubGraphModel { | |||
| public: | |||
| SubGraphModel(int index, std::string model_name, std::shared_ptr<domi::ModelBufferData> model_buffer_data) | |||
| @@ -117,7 +117,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto nh2nc_name = kernel_name + "_nh2nc_" + std::to_string(total++); | |||
| auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NHWC, Tensor::VAR); | |||
| auto nh2nc_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); | |||
| if (nh2nc_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel."; | |||
| return RET_ERROR; | |||
| @@ -127,7 +127,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK | |||
| all_tensors_->push_back(nh2nc_tensors[0]); | |||
| auto nc2nh_name = kernel_name + "_nc2nh_" + std::to_string(total++); | |||
| auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NCHW, Tensor::VAR); | |||
| auto nc2nh_tensor = new (std::nothrow) Tensor(in_tensor->data_type(), nhwc_shape, schema::Format_NHWC, Tensor::VAR); | |||
| if (nc2nh_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel."; | |||
| return RET_ERROR; | |||
| @@ -15,8 +15,10 @@ | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| #include <algorithm> | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "nnacl/transpose.h" | |||
| #include "nnacl/scale.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "src/runtime/kernel/arm/fp32/transpose_fp32.h" | |||
| @@ -47,6 +49,7 @@ kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor | |||
| kernel->set_desc(key); | |||
| } else { | |||
| MS_LOG(ERROR) << "New Nchw2Nhwc Kernel failed."; | |||
| free(transpose_param); | |||
| return nullptr; | |||
| } | |||
| @@ -106,8 +109,9 @@ void NPUPassUtils::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, | |||
| pre_kernel->set_out_kernels(out_kernels); | |||
| } | |||
| void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||
| std::vector<kernel::LiteKernel *> kernels) { | |||
| void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, | |||
| const std::vector<kernel::LiteKernel *> &trans_kernels, | |||
| const std::vector<kernel::LiteKernel *> &kernels) { | |||
| // For kernel before trans, there may be multiple outputs. | |||
| auto cur_out_kernels = pre_kernel->out_kernels(); | |||
| for (size_t i = 0; i < kernels.size(); i++) { | |||
| @@ -116,11 +120,11 @@ void NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, | |||
| cur_out_kernels.erase(itr); | |||
| } | |||
| } | |||
| cur_out_kernels.push_back(trans_kernel); | |||
| std::copy(trans_kernels.begin(), trans_kernels.end(), std::back_inserter(cur_out_kernels)); | |||
| pre_kernel->set_out_kernels(cur_out_kernels); | |||
| // For kernel before trans, the output tensor is used for output tensor of trans, so replace the output tensor with | |||
| // the input tensor of trans. | |||
| pre_kernel->set_out_tensors(trans_kernel->in_tensors()); | |||
| pre_kernel->set_out_tensors({trans_kernels.at(0)->in_tensors().at(0)}); | |||
| } | |||
| void NPUPassUtils::UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel) { | |||
| @@ -230,4 +234,11 @@ kernel::LiteKernel *NPUPassUtils::KernelInputFromKernel(const kernel::LiteKernel | |||
| } | |||
| return *it; | |||
| } | |||
| bool NPUPassUtils::Scale4dCase(const kernel::LiteKernel *kernel) { | |||
| MS_ASSERT(kernel != nullptr && kernel->op_parameter() != nullptr); | |||
| auto scale_param = reinterpret_cast<ScaleParameter *>(kernel->op_parameter()); | |||
| auto in_tensor = kernel->in_tensors().at(1); | |||
| return in_tensor->shape().size() == 1 && (scale_param->axis_ == 3 || scale_param->axis_ == -1); | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -37,8 +37,9 @@ class NPUPassUtils { | |||
| static void UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||
| kernel::LiteKernel *kernel); | |||
| static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, kernel::LiteKernel *trans_kernel, | |||
| std::vector<kernel::LiteKernel *> kernels); | |||
| static void UpdateNC2NHTransNodePreKernel(kernel::LiteKernel *pre_kernel, | |||
| const std::vector<kernel::LiteKernel *> &trans_kernels, | |||
| const std::vector<kernel::LiteKernel *> &kernels); | |||
| static void UpdateNH2NCTransNodePostKernel(kernel::LiteKernel *trans_kernel, kernel::LiteKernel *post_kernel); | |||
| @@ -52,6 +53,7 @@ class NPUPassUtils { | |||
| static bool IsNchw2Nhwc(const kernel::LiteKernel *kernel); | |||
| static kernel::LiteKernel *KernelInputFromKernel(const kernel::LiteKernel *kernel, size_t in_tensor_index); | |||
| static bool Scale4dCase(const kernel::LiteKernel *kernel); | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_OPTIMIZER_NPU_PASS_UTILS_H_ | |||
| @@ -14,12 +14,18 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/agent/npu/optimizer/npu_transform_pass.h" | |||
| #include <set> | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/runtime/agent/npu/npu_manager.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_pass_utils.h" | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kNPU; | |||
| static std::set<mindspore::schema::PrimitiveType> npu_trans_nodes = { | |||
| schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_Resize, | |||
| schema::PrimitiveType_MaxPoolFusion, schema::PrimitiveType_AvgPoolFusion, schema::PrimitiveType_ScaleFusion}; | |||
| int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<kernel::LiteKernel *> *trans_kernels) { | |||
| bool is_input_kernel = kernel->in_kernels().empty(); | |||
| // single input | |||
| @@ -80,57 +86,93 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<ke | |||
| // Get the post kernel that need insert trans kernel. | |||
| // If no need for inserting trans kernel, the post kernel must be npu and in trans_nodes. | |||
| std::vector<kernel::LiteKernel *> post_insert_kernels; | |||
| std::vector<kernel::LiteKernel *> post_non_insert_kernels; | |||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||
| auto post_kernel = kernel->out_kernels()[i]; | |||
| if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) { | |||
| post_insert_kernels.push_back(post_kernel); | |||
| } else { | |||
| post_non_insert_kernels.push_back(post_kernel); | |||
| } | |||
| } | |||
| if (is_output_kernel || !post_insert_kernels.empty()) { | |||
| // Create post transform kernel's in tensor. | |||
| auto nhwc_shape = kernel->out_tensors()[0]->shape(); | |||
| std::vector<int> nchw_shape = {nhwc_shape[0], nhwc_shape[3], nhwc_shape[1], nhwc_shape[2]}; | |||
| auto tensor = | |||
| auto nc2nh_tensor = | |||
| new (std::nothrow) Tensor(kernel->out_tensors()[0]->data_type(), nchw_shape, schema::Format_NCHW, Tensor::VAR); | |||
| if (tensor == nullptr) { | |||
| if (nc2nh_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel."; | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<Tensor *> post_trans_in_tensors = {tensor}; | |||
| all_tensors_->push_back(tensor); | |||
| all_tensors_->push_back(nc2nh_tensor); | |||
| auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); | |||
| tensor->set_tensor_name(name + "/input0"); | |||
| nc2nh_tensor->set_tensor_name(name + "/input0"); | |||
| auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR); | |||
| auto nc2nh_data = nc2nh_perm_tensor->MutableData(); | |||
| if (nc2nh_data == nullptr) { | |||
| return RET_ERROR; | |||
| if (is_output_kernel) { | |||
| // perm tensor | |||
| auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR); | |||
| auto nc2nh_data = nc2nh_perm_tensor->MutableData(); | |||
| if (nc2nh_data == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<int> nc2nh_perm_vector = {0, 2, 3, 1}; | |||
| memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int)); | |||
| all_tensors_->push_back(nc2nh_perm_tensor); | |||
| std::vector<lite::Tensor *> nc2nh_out_tensors{kernel->out_tensors().at(0)}; | |||
| // Create post transform kernel: Nchw2Nhwc | |||
| auto *post_trans_kernel = | |||
| NPUPassUtils::CreateNchw2NhwcKernel({nc2nh_tensor, nc2nh_perm_tensor}, nc2nh_out_tensors, context_, name); | |||
| // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel | |||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {}, post_trans_kernel->in_tensors(), | |||
| post_trans_kernel->out_tensors()); | |||
| trans_kernels->push_back(post_trans_kernel); | |||
| } | |||
| // for each to-be-insert out kernel, create one transpose kernel, one perm tensor, one out tensor | |||
| // but using same one in_tensor. | |||
| for (auto i = 0; i < post_insert_kernels.size(); ++i) { | |||
| auto post_insert_kernel = post_insert_kernels.at(i); | |||
| // perm tensor | |||
| auto nc2nh_perm_tensor = new Tensor(kNumberTypeInt32, {4}, schema::Format_NHWC, Tensor::CONST_TENSOR); | |||
| auto nc2nh_data = nc2nh_perm_tensor->MutableData(); | |||
| if (nc2nh_data == nullptr) { | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<int> nc2nh_perm_vector = {0, 2, 3, 1}; | |||
| memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int)); | |||
| all_tensors_->push_back(nc2nh_perm_tensor); | |||
| // nc2nh kernel out tensor: 1st kernel uses original out_tensor, remaining kernels use newly created out tensor. | |||
| std::vector<lite::Tensor *> nc2nh_out_tensors{nullptr}; | |||
| std::vector<int> nc2nh_perm_vector = {0, 2, 3, 1}; | |||
| memcpy(nc2nh_data, nc2nh_perm_vector.data(), 4 * sizeof(int)); | |||
| all_tensors_->push_back(nc2nh_perm_tensor); | |||
| // Create post transform kernel: Nchw2Nhwc | |||
| auto *post_trans_kernel = NPUPassUtils::CreateNchw2NhwcKernel({post_trans_in_tensors[0], nc2nh_perm_tensor}, | |||
| kernel->out_tensors(), context_, name); | |||
| // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel | |||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, post_insert_kernels, post_trans_kernel->in_tensors(), | |||
| kernel->out_tensors()); | |||
| trans_kernels->push_back(post_trans_kernel); | |||
| if (!is_output_kernel) { | |||
| for (int i = 0; i < kernel->out_kernels().size(); i++) { | |||
| auto post_kernel = kernel->out_kernels()[i]; | |||
| if (find(post_insert_kernels.begin(), post_insert_kernels.end(), post_kernel) != post_insert_kernels.end()) { | |||
| NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_kernel); | |||
| } else { | |||
| NPUPassUtils::UpdateNC2NHPostKernelInTensors(kernel, post_trans_kernel, post_kernel); | |||
| } | |||
| auto origin_out_tensor = kernel->out_tensors().at(0); | |||
| auto out_tensor = lite::Tensor::CopyTensor(*origin_out_tensor, false); | |||
| if (out_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "New nhwc tensor failed when inserting post nchw2nhwc kernel."; | |||
| return RET_ERROR; | |||
| } | |||
| all_tensors_->push_back(out_tensor); | |||
| auto out_tensor_name = kernel->name() + "_post_trans" + "_Nchw2Nhwc_" + std::to_string(i) + "_out_tensor"; | |||
| out_tensor->set_tensor_name(out_tensor_name); | |||
| nc2nh_out_tensors[0] = out_tensor; | |||
| // Create post transform kernel: Nchw2Nhwc | |||
| auto *post_trans_kernel = | |||
| NPUPassUtils::CreateNchw2NhwcKernel({nc2nh_tensor, nc2nh_perm_tensor}, nc2nh_out_tensors, context_, name); | |||
| // Set in_kernels, out_kernels, in_tensors, out_tensors for transform kernel | |||
| NPUPassUtils::UpdateKernel(post_trans_kernel, {kernel}, {post_insert_kernel}, post_trans_kernel->in_tensors(), | |||
| post_trans_kernel->out_tensors()); | |||
| trans_kernels->push_back(post_trans_kernel); | |||
| // update post kernel in_tensors in_kernels | |||
| NPUPassUtils::UpdateNC2NHTransNodePostKernel(kernel, post_trans_kernel, post_insert_kernel); | |||
| } | |||
| NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, post_trans_kernel, post_insert_kernels); | |||
| // for those non-insert post kernels, update their in_tensor | |||
| for (auto non_insert_kernel : post_non_insert_kernels) { | |||
| auto in_tensors = non_insert_kernel->in_tensors(); | |||
| std::replace(in_tensors.begin(), in_tensors.end(), kernel->out_tensors().at(0), nc2nh_tensor); | |||
| non_insert_kernel->set_in_tensors(in_tensors); | |||
| } | |||
| // update origin kernel's out tensor and out kernel | |||
| NPUPassUtils::UpdateNC2NHTransNodePreKernel(kernel, *trans_kernels, post_insert_kernels); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -142,6 +184,10 @@ int NPUTransformPass::Run() { | |||
| i++; | |||
| continue; | |||
| } | |||
| if (kernel->Type() == schema::PrimitiveType_ScaleFusion && !NPUPassUtils::Scale4dCase(kernel)) { | |||
| i++; | |||
| continue; | |||
| } | |||
| if (kernel->Type() == schema::PrimitiveType_Resize && | |||
| kernel->in_tensors()[0]->Height() > kernel->out_tensors()[0]->Height()) { | |||
| i++; | |||
| @@ -33,6 +33,10 @@ namespace mindspore::kernel { | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| static std::set<mindspore::schema::PrimitiveType> npu_specific_weight_nodes = { | |||
| schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_Conv2dTransposeFusion, schema::PrimitiveType_ScaleFusion, | |||
| schema::PrimitiveType_BatchNorm, schema::PrimitiveType_FullConnection, schema::PrimitiveType_InstanceNorm}; | |||
| SubGraphNpuKernel::~SubGraphNpuKernel() { | |||
| subgraph_input_op_.clear(); | |||
| subgraph_output_op_.clear(); | |||
| @@ -125,7 +129,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() { | |||
| // weight tensor | |||
| if (is_weight_tensor) { | |||
| if (lite::npu_trans_nodes.find(node->Type()) == lite::npu_trans_nodes.end()) { | |||
| if (npu_specific_weight_nodes.find(node->Type()) == npu_specific_weight_nodes.end()) { | |||
| auto name = node->name() + "_" + std::to_string(count++); | |||
| auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++)); | |||
| if (weight_const == nullptr) { | |||
| @@ -15,10 +15,13 @@ | |||
| */ | |||
| #include "src/runtime/kernel/npu/scale_npu.h" | |||
| #include <memory> | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/agent/npu/npu_converter_utils.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kNPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::Format_NHWC; | |||
| using mindspore::schema::PrimitiveType_ScaleFusion; | |||
| namespace mindspore::kernel { | |||
| @@ -27,6 +30,13 @@ int ScaleNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const s | |||
| if (scale_parameter_->axis_ < 0) { | |||
| scale_parameter_->axis_ = scale_parameter_->axis_ + inputs[0]->shape().size(); | |||
| } | |||
| if (inputs.size() > 1 && inputs[0]->shape().size() == 4 && inputs[0]->format() == schema::Format_NHWC) { | |||
| if (scale_parameter_->axis_ != 3) { | |||
| MS_LOG(ERROR) << "Npu scale axis attr only support on channel, now is " << scale_parameter_->axis_; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| if (scale_parameter_->axis_ != 1) { | |||
| MS_LOG(ERROR) << "Npu scale axis attr only support 1, now is " << scale_parameter_->axis_; | |||
| return RET_ERROR; | |||
| @@ -41,22 +51,102 @@ int ScaleNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, cons | |||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| op_->set_attr_axis(scale_parameter_->axis_); | |||
| op_->set_input_x(*npu_inputs[0]); | |||
| op_->set_input_scale(*npu_inputs[1]); | |||
| if (npu_inputs[2] != nullptr) { | |||
| op_->set_input_bias(*npu_inputs[2]); | |||
| op_->set_attr_axis(1); // only support axis 1 now | |||
| op_->set_input_x(*npu_inputs.at(0)); | |||
| MS_ASSERT(inputs.size() > 1); | |||
| auto scale_shape = inputs.at(1)->shape(); | |||
| std::shared_ptr<ge::Tensor> scale_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | |||
| if (scale_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new scale_tensor failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ge::TensorDesc scale_tensor_desc(lite::ConverterToNPUShape({1, scale_shape[0], 1, 1}), ge::FORMAT_NCHW, | |||
| lite::ConverterToNPUDataType(inputs[1]->data_type())); | |||
| scale_tensor->SetTensorDesc(scale_tensor_desc); | |||
| scale_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[1]->data_c()), inputs[1]->Size()); | |||
| scale_ = new (std::nothrow) hiai::op::Const(name_ + "_scale"); | |||
| if (scale_ == nullptr) { | |||
| MS_LOG(ERROR) << "New scale_ const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| scale_->set_attr_value(scale_tensor); | |||
| op_->set_input_scale(*scale_); | |||
| if (inputs.size() > 2 && inputs[2] != nullptr) { | |||
| auto bias_shape = inputs[2]->shape(); | |||
| std::shared_ptr<ge::Tensor> bias_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor()); | |||
| if (bias_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new bias_tensor failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ge::TensorDesc bias_tensor_desc(lite::ConverterToNPUShape({1, bias_shape[0], 1, 1}), ge::FORMAT_NCHW, | |||
| lite::ConverterToNPUDataType(inputs[2]->data_type())); | |||
| bias_tensor->SetTensorDesc(bias_tensor_desc); | |||
| bias_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs[2]->data_c()), inputs[2]->Size()); | |||
| bias_ = new (std::nothrow) hiai::op::Const(name_ + "_beta"); | |||
| if (bias_ == nullptr) { | |||
| MS_LOG(ERROR) << "New beta_ const failed."; | |||
| return RET_ERROR; | |||
| } | |||
| bias_->set_attr_value(bias_tensor); | |||
| op_->set_input_bias(*bias_); | |||
| } | |||
| if (scale_parameter_->activation_type_ != schema::ActivationType_NO_ACTIVATION) { | |||
| auto ret = SetActivation(op_, scale_parameter_->activation_type_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; | |||
| return ret; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() { return this->op_; } | |||
| ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() { | |||
| if (scale_parameter_->activation_type_ == schema::ActivationType_NO_ACTIVATION) { | |||
| return op_; | |||
| } else { | |||
| return act_; | |||
| } | |||
| } | |||
| int ScaleNPUKernel::SetActivation(const ge::Operator *input, int act_type) { | |||
| act_ = new (std::nothrow) hiai::op::Activation(name_ + "_act"); | |||
| if (act_ == nullptr) { | |||
| MS_LOG(ERROR) << "New activation npu operator for op " << name_ << " failed."; | |||
| return RET_ERROR; | |||
| } | |||
| act_->set_input_x(*input); | |||
| if (act_type == schema::ActivationType_RELU) { | |||
| act_->set_attr_mode(1); | |||
| } else if (act_type == schema::ActivationType_RELU6) { | |||
| act_->set_attr_mode(14); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported activation type for scale."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| ScaleNPUKernel::~ScaleNPUKernel() { | |||
| if (op_ != nullptr) { | |||
| delete op_; | |||
| op_ = nullptr; | |||
| } | |||
| if (scale_ != nullptr) { | |||
| delete scale_; | |||
| scale_ = nullptr; | |||
| } | |||
| if (bias_ != nullptr) { | |||
| delete bias_; | |||
| bias_ = nullptr; | |||
| } | |||
| if (act_ != nullptr) { | |||
| delete act_; | |||
| act_ = nullptr; | |||
| } | |||
| } | |||
| REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_ScaleFusion, NPUKernelCreator<ScaleNPUKernel>) | |||
| @@ -19,6 +19,7 @@ | |||
| #include <vector> | |||
| #include "nnacl/scale.h" | |||
| #include "src/runtime/kernel/npu/npu_kernel.h" | |||
| #include "include/graph/op/all_ops.h" | |||
| #include "include/graph/op/nn_defs.h" | |||
| namespace mindspore::kernel { | |||
| class ScaleNPUKernel : public NPUKernel { | |||
| @@ -36,8 +37,14 @@ class ScaleNPUKernel : public NPUKernel { | |||
| const std::vector<ge::Operator *> &npu_inputs) override; | |||
| ge::Operator *GetNPUOp() override; | |||
| protected: | |||
| int SetActivation(const ge::Operator *input, int act_type); | |||
| private: | |||
| hiai::op::Scale *op_ = nullptr; | |||
| hiai::op::Const *scale_ = nullptr; | |||
| hiai::op::Const *bias_ = nullptr; | |||
| hiai::op::Activation *act_ = nullptr; | |||
| ScaleParameter *scale_parameter_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -69,3 +69,4 @@ ml_video_edit_v10_best_model_nomean_20200723 8 | |||
| #ml_edu_kit_hand_detection.onnx 1 | |||
| ml_edu_kit_hand_key_position.onnx 2 | |||
| #ml_video_edit_oneclick_adaptis.pb #too many subgraphs | |||
| densenet.tflite 3 | |||