Merge pull request !4549 from zhaozhenlong/lite/op/fp16/concattags/v0.7.0-beta
| @@ -108,6 +108,7 @@ kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::tensor::T | |||||
| return kernel; | return kernel; | ||||
| } | } | ||||
| REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, CpuConcatInt8KernelCreator) | REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, CpuConcatInt8KernelCreator) | ||||
| REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator) | REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator) | ||||
| REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator) | REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator) | ||||
| @@ -0,0 +1,130 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <vector> | |||||
| #include "nnacl/fp16/concat_fp16.h" | |||||
| #include "src/runtime/kernel/arm/fp16/concat_fp16.h" | |||||
| #include "src/kernel_registry.h" | |||||
| #include "schema/model_generated.h" | |||||
| #include "include/errorcode.h" | |||||
| #include "nnacl/fp16/cast_fp16.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Concat; | |||||
| namespace mindspore::kernel { | |||||
| int ConcatFp16CPUKernel::Init() { | |||||
| auto ret = ConcatBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConcatFp16CPUKernel::ReSize() { | |||||
| for (auto ptr : fp16_inputs_) { | |||||
| if (ptr != nullptr) { | |||||
| free(ptr); | |||||
| ptr = nullptr; | |||||
| } | |||||
| } | |||||
| fp16_inputs_.clear(); | |||||
| for (size_t i = 0; i < in_tensors_.size(); ++i) { | |||||
| float16_t *ptr = nullptr; | |||||
| ptr = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * in_tensors_[i]->ElementsNum())); | |||||
| if (ptr == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| fp16_inputs_.push_back(ptr); | |||||
| } | |||||
| if (fp16_output_ != nullptr) { | |||||
| free(fp16_output_); | |||||
| fp16_output_ = nullptr; | |||||
| } | |||||
| fp16_output_ = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * out_tensors_[0]->ElementsNum())); | |||||
| if (fp16_output_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return ConcatBaseCPUKernel::ReSize(); | |||||
| } | |||||
| int ConcatFp16CPUKernel::Run() { | |||||
| auto prepare_ret = Prepare(); | |||||
| if (prepare_ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||||
| return prepare_ret; | |||||
| } | |||||
| auto input_num = in_tensors_.size(); | |||||
| std::vector<float *> inputs_addr(input_num, nullptr); | |||||
| std::vector<int *> inputs_output_shape(input_num + 1, nullptr); | |||||
| std::vector<std::vector<int>> shapes; | |||||
| for (size_t i = 0; i < input_num; ++i) { | |||||
| inputs_addr[i] = reinterpret_cast<float *>(in_tensors_[i]->Data()); | |||||
| if (inputs_addr[i] == nullptr) { | |||||
| MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| Float32ToFloat16(inputs_addr[i], fp16_inputs_[i], in_tensors_[i]->ElementsNum()); | |||||
| shapes.push_back(in_tensors_[i]->shape()); | |||||
| inputs_output_shape[i] = shapes[i].data(); | |||||
| } | |||||
| auto output_shape = out_tensors_.at(0)->shape(); | |||||
| inputs_output_shape[input_num] = output_shape.data(); | |||||
| auto output_addr = out_tensors_.at(0)->Data(); | |||||
| ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(), | |||||
| output_shape.size(), reinterpret_cast<void *>(fp16_output_)); | |||||
| Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum()); | |||||
| return RET_OK; | |||||
| } | |||||
| kernel::LiteKernel *CpuConcatFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const Context *ctx, | |||||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||||
| if (opParameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input opParameter is nullptr!"; | |||||
| return nullptr; | |||||
| } | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Concat); | |||||
| auto *kernel = new (std::nothrow) ConcatFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| delete kernel; | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Concat, CpuConcatFp16KernelCreator) | |||||
| } // namespace mindspore::kernel | |||||
| @@ -0,0 +1,54 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_ | |||||
| #include <arm_neon.h> | |||||
| #include <vector> | |||||
| #include "src/lite_kernel.h" | |||||
| #include "include/context.h" | |||||
| #include "src/runtime/kernel/arm/base/concat_base.h" | |||||
| using mindspore::lite::Context; | |||||
| namespace mindspore::kernel { | |||||
| class ConcatFp16CPUKernel : public ConcatBaseCPUKernel { | |||||
| public: | |||||
| ConcatFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~ConcatFp16CPUKernel() { | |||||
| for (auto ptr : fp16_inputs_) { | |||||
| if (ptr != nullptr) { | |||||
| free(ptr); | |||||
| } | |||||
| } | |||||
| } | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| private: | |||||
| std::vector<float16_t *> fp16_inputs_; | |||||
| float16_t *fp16_output_ = nullptr; | |||||
| }; | |||||
| } // namespace mindspore::kernel | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_ | |||||
| @@ -0,0 +1,43 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp16/concat_fp16.h" | |||||
| #include <string.h> | |||||
| void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output) { | |||||
| int before_axis_size = 1; | |||||
| for (int i = 0; i < axis; ++i) { | |||||
| before_axis_size *= inputs_output_shape[0][i]; | |||||
| } | |||||
| // sizeof float16 / byte | |||||
| int after_axis_size = 2; | |||||
| for (size_t i = axis + 1; i < shape_size; ++i) { | |||||
| after_axis_size *= inputs_output_shape[0][i]; | |||||
| } | |||||
| int axis_offset = 0; | |||||
| uint8_t *dst_base = (output); | |||||
| size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis]; | |||||
| for (int i = 0; i < input_num; ++i) { | |||||
| uint8_t *src_base = (input[i]); | |||||
| size_t input_stride = after_axis_size * inputs_output_shape[i][axis]; | |||||
| for (int j = 0; j < before_axis_size; ++j) { | |||||
| uint8_t *src = src_base + j * input_stride; | |||||
| uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size; | |||||
| memcpy(dst, src, input_stride); | |||||
| } | |||||
| axis_offset += inputs_output_shape[i][axis]; | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,30 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_ | |||||
| #include "nnacl/op_base.h" | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_ | |||||