| @@ -0,0 +1,125 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/agent/npu/npu_add_transform_pass.h" | |||||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||||
| namespace mindspore::lite { | |||||
| using kernel::KERNEL_ARCH::kNPU; | |||||
| int NPUAddTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *after_kernel) { | |||||
| std::vector<kernel::LiteKernel *> out_kernels; | |||||
| for (auto out_kernel : kernel->out_kernels()) { | |||||
| if (out_kernel == after_kernel) { | |||||
| out_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| out_kernels.push_back(out_kernel); | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUAddTransformPass::InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| auto kernel = *it; | |||||
| for (auto out_kernel : kernel->out_kernels()) { | |||||
| if (out_kernel->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||||
| continue; | |||||
| } | |||||
| std::vector<int> nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], | |||||
| kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; | |||||
| auto nh2nc_tensor = | |||||
| new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); | |||||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||||
| all_tensors->push_back(nh2nc_tensors[0]); | |||||
| auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; | |||||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); | |||||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||||
| all_tensors->push_back(nc2nh_tensors[0]); | |||||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||||
| all_kernels->push_back(nh2nc_kernel); | |||||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||||
| all_kernels->push_back(nc2nh_kernel); | |||||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||||
| UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||||
| UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUAddTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *next_kernel) { | |||||
| std::vector<Tensor *> next_in_tensors; | |||||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||||
| next_in_tensors.push_back(next_in_tensor); | |||||
| } else { | |||||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||||
| } | |||||
| } | |||||
| next_kernel->set_in_tensors(next_in_tensors); | |||||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||||
| if (in_kernel == kernel) { | |||||
| next_in_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| next_in_kernels.push_back(in_kernel); | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||||
| next_kernel->out_tensors()); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUAddTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| if (context->IsNpuEnabled()) { | |||||
| std::vector<kernel::LiteKernel *> new_kernels; | |||||
| for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { | |||||
| auto kernel = *it; | |||||
| new_kernels.push_back(kernel); | |||||
| if (kernel->desc().arch != kNPU) { | |||||
| continue; | |||||
| } | |||||
| if (kernel->Type() == schema::PrimitiveType_Add && kernel->out_kernels().size() >= 2) { | |||||
| int sum = 0; | |||||
| for (auto i : kernel->out_kernels()) { | |||||
| if (i->Type() == schema::PrimitiveType_Nhwc2Nchw) { | |||||
| sum++; | |||||
| } | |||||
| } | |||||
| if (kernel->out_kernels().size() != sum) { | |||||
| InsertNode(context, it, &new_kernels, all_tensors); | |||||
| } | |||||
| } | |||||
| } | |||||
| all_kernels->clear(); | |||||
| for (int i = 0; i < new_kernels.size(); i++) { | |||||
| all_kernels->push_back(new_kernels[i]); | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace mindspore::lite | |||||
| @@ -0,0 +1,42 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "src/ops/primitive_c.h" | |||||
| namespace mindspore::lite { | |||||
| class NPUAddTransformPass { | |||||
| public: | |||||
| int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors); | |||||
| private: | |||||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *after_kernel); | |||||
| int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *next_kernel); | |||||
| int InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||||
| private: | |||||
| int total = 0; | |||||
| }; | |||||
| } // namespace mindspore::lite | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_ADD_TRANSFORM_PASS_H_ | |||||
| @@ -0,0 +1,125 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/agent/npu/npu_concat_transform_pass.h" | |||||
| #include "src/runtime/agent/npu/npu_pass_utils.h" | |||||
| namespace mindspore::lite { | |||||
| using kernel::KERNEL_ARCH::kNPU; | |||||
| int NPUConcatTransformPass::UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *after_kernel) { | |||||
| std::vector<kernel::LiteKernel *> out_kernels; | |||||
| for (auto out_kernel : kernel->out_kernels()) { | |||||
| if (out_kernel == after_kernel) { | |||||
| out_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| out_kernels.push_back(out_kernel); | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(kernel, kernel->in_kernels(), out_kernels, kernel->in_tensors(), kernel->out_tensors()); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUConcatTransformPass::InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| for (auto kernel : (*it)->in_kernels()) { | |||||
| if (kernel->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||||
| continue; | |||||
| } | |||||
| auto out_kernel = (*it); | |||||
| std::vector<int> nh2nc_shape = {kernel->out_tensors()[0]->shape()[0], kernel->out_tensors()[0]->shape()[3], | |||||
| kernel->out_tensors()[0]->shape()[1], kernel->out_tensors()[0]->shape()[2]}; | |||||
| auto nh2nc_tensor = | |||||
| new Tensor(kernel->out_tensors()[0]->data_type(), nh2nc_shape, schema::Format_NHWC, Tensor::VAR); | |||||
| std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor}; | |||||
| all_tensors->push_back(nh2nc_tensors[0]); | |||||
| auto nc2nh_shape = {nh2nc_shape[0], nh2nc_shape[2], nh2nc_shape[3], nh2nc_shape[1]}; | |||||
| auto nc2nh_tensor = new Tensor(nh2nc_tensor->data_type(), nc2nh_shape, schema::Format_NCHW, Tensor::VAR); | |||||
| std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor}; | |||||
| all_tensors->push_back(nc2nh_tensors[0]); | |||||
| auto nh2nc_name = kernel->name() + "_nh2nc_" + std::to_string(total++); | |||||
| auto *nh2nc_kernel = NPUPassUtils::CreateNhwc2NchwKernel(kernel->out_tensors(), nh2nc_tensors, context, nh2nc_name); | |||||
| all_kernels->push_back(nh2nc_kernel); | |||||
| auto nc2nh_name = kernel->name() + "_nc2nh_" + std::to_string(total++); | |||||
| auto *nc2nh_kernel = NPUPassUtils::CreateNchw2NhwcKernel(nh2nc_tensors, nc2nh_tensors, context, nc2nh_name); | |||||
| all_kernels->push_back(nc2nh_kernel); | |||||
| NPUPassUtils::UpdateKernel(nh2nc_kernel, {kernel}, {nc2nh_kernel}, kernel->out_tensors(), nh2nc_tensors); | |||||
| NPUPassUtils::UpdateKernel(nc2nh_kernel, {nh2nc_kernel}, {out_kernel}, nh2nc_tensors, nc2nh_tensors); | |||||
| UpdateNH2NCTransNodePreKernel(kernel, nh2nc_kernel, out_kernel); | |||||
| UpdateNC2NHTransNodeAfterKernel(kernel, nc2nh_kernel, out_kernel); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUConcatTransformPass::UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, | |||||
| kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *next_kernel) { | |||||
| std::vector<Tensor *> next_in_tensors; | |||||
| for (auto next_in_tensor : next_kernel->in_tensors()) { | |||||
| if (next_in_tensor != kernel->out_tensors()[0]) { | |||||
| next_in_tensors.push_back(next_in_tensor); | |||||
| } else { | |||||
| next_in_tensors.push_back(trans_kernel->out_tensors()[0]); | |||||
| } | |||||
| } | |||||
| next_kernel->set_in_tensors(next_in_tensors); | |||||
| std::vector<kernel::LiteKernel *> next_in_kernels; | |||||
| for (auto in_kernel : next_kernel->in_kernels()) { | |||||
| if (in_kernel == kernel) { | |||||
| next_in_kernels.push_back(trans_kernel); | |||||
| } else { | |||||
| next_in_kernels.push_back(in_kernel); | |||||
| } | |||||
| } | |||||
| NPUPassUtils::UpdateKernel(next_kernel, next_in_kernels, next_kernel->out_kernels(), next_in_tensors, | |||||
| next_kernel->out_tensors()); | |||||
| return RET_OK; | |||||
| } | |||||
| int NPUConcatTransformPass::Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors) { | |||||
| if (context->IsNpuEnabled()) { | |||||
| std::vector<kernel::LiteKernel *> new_kernels; | |||||
| for (auto it = all_kernels->begin(); it != all_kernels->end(); it++) { | |||||
| auto kernel = *it; | |||||
| if (kernel->desc().arch != kNPU) { | |||||
| new_kernels.push_back(kernel); | |||||
| continue; | |||||
| } | |||||
| if (kernel->Type() == schema::PrimitiveType_Concat && kernel->in_kernels().size() >= 2) { | |||||
| int sum = 0; | |||||
| for (auto i : kernel->in_kernels()) { | |||||
| if (i->Type() == schema::PrimitiveType_Nchw2Nhwc) { | |||||
| sum++; | |||||
| } | |||||
| } | |||||
| if (kernel->out_kernels().size() != sum) { | |||||
| InsertNode(context, it, &new_kernels, all_tensors); | |||||
| } | |||||
| } | |||||
| new_kernels.push_back(kernel); | |||||
| } | |||||
| all_kernels->clear(); | |||||
| for (int i = 0; i < new_kernels.size(); i++) { | |||||
| all_kernels->push_back(new_kernels[i]); | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace mindspore::lite | |||||
| @@ -0,0 +1,42 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "src/ops/primitive_c.h" | |||||
| namespace mindspore::lite { | |||||
| class NPUConcatTransformPass { | |||||
| public: | |||||
| int Run(const InnerContext *context, std::vector<kernel::LiteKernel *> *all_kernels, | |||||
| std::vector<Tensor *> *all_tensors); | |||||
| private: | |||||
| int UpdateNH2NCTransNodePreKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *after_kernel); | |||||
| int UpdateNC2NHTransNodeAfterKernel(kernel::LiteKernel *kernel, kernel::LiteKernel *trans_kernel, | |||||
| kernel::LiteKernel *next_kernel); | |||||
| int InsertNode(const InnerContext *context, std::vector<kernel::LiteKernel *>::iterator it, | |||||
| std::vector<kernel::LiteKernel *> *all_kernels, std::vector<Tensor *> *all_tensors); | |||||
| private: | |||||
| int total = 0; | |||||
| }; | |||||
| } // namespace mindspore::lite | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONCAT_TRANSFORM_PASS_H_ | |||||
| @@ -106,24 +106,19 @@ void UpdatePreTensors(kernel::LiteKernel *cur_kernel) { | |||||
| } | } | ||||
| void UpdatePostTensors(kernel::LiteKernel *cur_kernel) { | void UpdatePostTensors(kernel::LiteKernel *cur_kernel) { | ||||
| auto tensors_vec = cur_kernel->out_tensors(); | |||||
| auto tensor = cur_kernel->out_tensors()[0]; | |||||
| for (auto out_kernel : cur_kernel->out_kernels()) { | for (auto out_kernel : cur_kernel->out_kernels()) { | ||||
| auto in_tensor = out_kernel->in_tensors()[0]; | |||||
| auto out_tensor = out_kernel->out_tensors()[0]; | auto out_tensor = out_kernel->out_tensors()[0]; | ||||
| auto post_kernel = out_kernel->out_kernels()[0]; | |||||
| lite::Tensor *cur_tensor = nullptr; | |||||
| for (size_t i = 0; i < post_kernel->in_tensors().size(); i++) { | |||||
| if (post_kernel->in_tensors()[i] == out_tensor) { | |||||
| cur_tensor = post_kernel->in_tensors()[i]; | |||||
| } | |||||
| } | |||||
| for (size_t i = 0; i < tensors_vec.size(); i++) { | |||||
| if (tensors_vec[i] == in_tensor) { | |||||
| tensors_vec[i] = cur_tensor; | |||||
| for (auto post_kernel : out_kernel->out_kernels()) { | |||||
| auto tensors_vec = post_kernel->in_tensors(); | |||||
| for (int i = 0; i < tensors_vec.size(); i++) { | |||||
| if (tensors_vec[i] == out_tensor) { | |||||
| tensors_vec[i] = tensor; | |||||
| } | |||||
| } | } | ||||
| post_kernel->set_in_tensors(tensors_vec); | |||||
| } | } | ||||
| } | } | ||||
| cur_kernel->set_out_tensors(tensors_vec); | |||||
| } | } | ||||
| int TransFormAxis(int axis) { | int TransFormAxis(int axis) { | ||||
| @@ -31,7 +31,8 @@ namespace mindspore::kernel { | |||||
| int ConcatBaseCPUKernel::Init() { return RET_OK; } | int ConcatBaseCPUKernel::Init() { return RET_OK; } | ||||
| int ConcatBaseCPUKernel::ReSize() { | int ConcatBaseCPUKernel::ReSize() { | ||||
| axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_; | |||||
| concat_param_->axis_ = | |||||
| concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_; | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -43,7 +43,6 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||||
| int Run() override { return 0; } | int Run() override { return 0; } | ||||
| protected: | protected: | ||||
| int axis_ = 0; | |||||
| const InnerContext *ctx_ = nullptr; | const InnerContext *ctx_ = nullptr; | ||||
| int thread_count_ = 1; | int thread_count_ = 1; | ||||
| ConcatParameter *concat_param_ = nullptr; | ConcatParameter *concat_param_ = nullptr; | ||||
| @@ -120,8 +120,8 @@ int ConcatFp16CPUKernel::Run() { | |||||
| fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData()); | fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData()); | ||||
| } | } | ||||
| int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t); | int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t); | ||||
| ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(), | |||||
| output_shape.size(), reinterpret_cast<void *>(fp16_output_), dtype_len); | |||||
| ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, concat_param_->axis_, | |||||
| inputs_output_shape.data(), output_shape.size(), reinterpret_cast<void *>(fp16_output_), dtype_len); | |||||
| if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) { | if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) { | ||||
| Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum()); | Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum()); | ||||
| @@ -59,8 +59,8 @@ int ConcatCPUKernel::DoConcat(int task_id) { | |||||
| inputs_output_shape[input_num] = output_shape.data(); | inputs_output_shape[input_num] = output_shape.data(); | ||||
| auto output_addr = out_tensors_.at(0)->MutableData(); | auto output_addr = out_tensors_.at(0)->MutableData(); | ||||
| Concat(inputs_addr.data(), input_num, axis_, inputs_output_shape.data(), output_shape.size(), output_addr, task_id, | |||||
| thread_count_); | |||||
| Concat(inputs_addr.data(), input_num, concat_param_->axis_, inputs_output_shape.data(), output_shape.size(), | |||||
| output_addr, task_id, thread_count_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -88,7 +88,7 @@ int ConcatInt8CPUKernel::ReSize() { | |||||
| } | } | ||||
| before_axis_size = 1; | before_axis_size = 1; | ||||
| for (int i = 0; i < axis_; i++) { | |||||
| for (int i = 0; i < concat_param_->axis_; i++) { | |||||
| before_axis_size *= out_tensors_.at(kOutputIndex)->DimensionSize(i); | before_axis_size *= out_tensors_.at(kOutputIndex)->DimensionSize(i); | ||||
| } | } | ||||
| @@ -104,7 +104,7 @@ int ConcatInt8CPUKernel::ReSize() { | |||||
| memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(), | memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(), | ||||
| sizeof(int) * output_dim); | sizeof(int) * output_dim); | ||||
| for (size_t i = axis_ + 1; i < output_dim; i++) { | |||||
| for (size_t i = concat_param_->axis_ + 1; i < output_dim; i++) { | |||||
| after_axis_size *= concat_param_->output_shapes_[i]; | after_axis_size *= concat_param_->output_shapes_[i]; | ||||
| } | } | ||||
| concat_param_->after_axis_size = after_axis_size; | concat_param_->after_axis_size = after_axis_size; | ||||
| @@ -137,7 +137,7 @@ int ConcatInt8CPUKernel::DoExecute(int task_id) { | |||||
| if (real_dst_count <= 0) { | if (real_dst_count <= 0) { | ||||
| return lite::RET_OK; | return lite::RET_OK; | ||||
| } | } | ||||
| Int8Concat(input_data_, output_data_, concat_param_, axis_, real_dst_count, task_id); | |||||
| Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id); | |||||
| return lite::RET_OK; | return lite::RET_OK; | ||||
| } | } | ||||
| @@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Concat; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ConcatNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | int ConcatNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| OpParameter *opParameter) { | OpParameter *opParameter) { | ||||
| return RET_ERROR; | |||||
| return RET_OK; | |||||
| } | } | ||||
| int ConcatNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | int ConcatNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| @@ -34,7 +34,7 @@ int ConcatNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, con | |||||
| MS_LOG(ERROR) << name_ << " op is nullptr"; | MS_LOG(ERROR) << name_ << " op is nullptr"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| op_->set_attr_concat_dim(axis_); | |||||
| op_->set_attr_concat_dim(concat_param_->axis_); | |||||
| op_->set_attr_N(npu_inputs.size()); | op_->set_attr_N(npu_inputs.size()); | ||||
| op_->create_dynamic_input_x(npu_inputs.size()); | op_->create_dynamic_input_x(npu_inputs.size()); | ||||
| for (int i = 0; i < npu_inputs.size(); ++i) { | for (int i = 0; i < npu_inputs.size(); ++i) { | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include "nnacl/concat_parameter.h" | #include "nnacl/concat_parameter.h" | ||||
| #include "src/runtime/kernel/npu/npu_kernel.h" | #include "src/runtime/kernel/npu/npu_kernel.h" | ||||
| #include "include/graph/op/all_ops.h" | #include "include/graph/op/all_ops.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class ConcatNPUKernel : public NPUKernel { | class ConcatNPUKernel : public NPUKernel { | ||||
| public: | public: | ||||
| @@ -27,8 +28,7 @@ class ConcatNPUKernel : public NPUKernel { | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | : NPUKernel(parameter, inputs, outputs, ctx, primitive) { | ||||
| auto concat_parameter = reinterpret_cast<ConcatParameter *>(parameter); | |||||
| axis_ = concat_parameter->axis_; | |||||
| concat_param_ = reinterpret_cast<ConcatParameter *>(parameter); | |||||
| } | } | ||||
| ~ConcatNPUKernel() override; | ~ConcatNPUKernel() override; | ||||
| @@ -40,7 +40,7 @@ class ConcatNPUKernel : public NPUKernel { | |||||
| private: | private: | ||||
| hiai::op::ConcatD *op_ = nullptr; | hiai::op::ConcatD *op_ = nullptr; | ||||
| int axis_; | |||||
| ConcatParameter *concat_param_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONCAT_NPU_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CONCAT_NPU_H_ | ||||
| @@ -24,7 +24,7 @@ using mindspore::schema::PrimitiveType_Pooling; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int PoolingNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | int PoolingNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | ||||
| OpParameter *opParameter) { | OpParameter *opParameter) { | ||||
| return RET_ERROR; | |||||
| return RET_OK; | |||||
| } | } | ||||
| int PoolingNPUKernel::SetPoolingParam() { | int PoolingNPUKernel::SetPoolingParam() { | ||||
| @@ -35,6 +35,8 @@ | |||||
| #include "src/runtime/agent/npu/npu_manager.h" | #include "src/runtime/agent/npu/npu_manager.h" | ||||
| #include "src/runtime/agent/npu/npu_transform_pass.h" | #include "src/runtime/agent/npu/npu_transform_pass.h" | ||||
| #include "src/runtime/agent/npu/npu_fusion_pass.h" | #include "src/runtime/agent/npu/npu_fusion_pass.h" | ||||
| #include "src/runtime/agent/npu/npu_add_transform_pass.h" | |||||
| #include "src/runtime/agent/npu/npu_concat_transform_pass.h" | |||||
| #endif | #endif | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| using kernel::KERNEL_ARCH::kCPU; | using kernel::KERNEL_ARCH::kCPU; | ||||
| @@ -532,6 +534,23 @@ int Scheduler::RunPass(std::vector<kernel::LiteKernel *> *dst_kernels) { | |||||
| MS_LOG(ERROR) << "Run npu format transform pass failed."; | MS_LOG(ERROR) << "Run npu format transform pass failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| auto add_format_pass = new NPUAddTransformPass; | |||||
| ret = add_format_pass->Run(context_, dst_kernels, &src_tensors_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run npu add op insert transform pass failed."; | |||||
| return ret; | |||||
| } | |||||
| delete add_format_pass; | |||||
| auto concat_format_pass = new NPUConcatTransformPass; | |||||
| ret = concat_format_pass->Run(context_, dst_kernels, &src_tensors_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run npu concat op insert transform pass failed."; | |||||
| return ret; | |||||
| } | |||||
| delete concat_format_pass; | |||||
| auto fusion_pass = new NPUFusionPass(dst_kernels); | auto fusion_pass = new NPUFusionPass(dst_kernels); | ||||
| ret = fusion_pass->Fusion(); | ret = fusion_pass->Fusion(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||