From 220ec451817b8e573c5c93af65669af33c220afd Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Sun, 16 Aug 2020 16:11:56 +0800 Subject: [PATCH] fp16 concat --- .../runtime/kernel/arm/base/concat_base.cc | 1 + .../runtime/kernel/arm/fp16/concat_fp16.cc | 130 ++++++++++++++++++ .../src/runtime/kernel/arm/fp16/concat_fp16.h | 54 ++++++++ .../kernel/arm/nnacl/fp16/concat_fp16.c | 43 ++++++ .../kernel/arm/nnacl/fp16/concat_fp16.h | 30 ++++ 5 files changed, 258 insertions(+) create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.c create mode 100644 mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.h diff --git a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc index e28d88b771..e5d7576454 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc @@ -108,6 +108,7 @@ kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector +#include "nnacl/fp16/concat_fp16.h" +#include "src/runtime/kernel/arm/fp16/concat_fp16.h" +#include "src/kernel_registry.h" +#include "schema/model_generated.h" +#include "include/errorcode.h" +#include "nnacl/fp16/cast_fp16.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Concat; + +namespace mindspore::kernel { +int ConcatFp16CPUKernel::Init() { + auto ret = ConcatBaseCPUKernel::Init(); + if (ret != RET_OK) { + return ret; + } + if (!InferShapeDone()) { + return RET_OK; + } + + return ReSize(); +} + +int ConcatFp16CPUKernel::ReSize() { + for (auto ptr : fp16_inputs_) { + if (ptr != nullptr) { + free(ptr); + ptr = nullptr; + } + } + fp16_inputs_.clear(); + for (size_t i = 0; i < in_tensors_.size(); ++i) { + float16_t *ptr = nullptr; + ptr = reinterpret_cast(malloc(sizeof(float16_t) * in_tensors_[i]->ElementsNum())); + if (ptr == nullptr) { + MS_LOG(ERROR) << "malloc failed"; + return RET_ERROR; + } + fp16_inputs_.push_back(ptr); + } + + if (fp16_output_ != nullptr) { + free(fp16_output_); + fp16_output_ = nullptr; + } + fp16_output_ = reinterpret_cast(malloc(sizeof(float16_t) * out_tensors_[0]->ElementsNum())); + if (fp16_output_ == nullptr) { + MS_LOG(ERROR) << "malloc failed"; + return RET_ERROR; + } + return ConcatBaseCPUKernel::ReSize(); +} + +int ConcatFp16CPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + auto input_num = in_tensors_.size(); + std::vector inputs_addr(input_num, nullptr); + std::vector inputs_output_shape(input_num + 1, nullptr); + + std::vector> shapes; + for (size_t i = 0; i < input_num; ++i) { + inputs_addr[i] = reinterpret_cast(in_tensors_[i]->Data()); + if (inputs_addr[i] == nullptr) { + MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr"; + return RET_ERROR; + } + + Float32ToFloat16(inputs_addr[i], fp16_inputs_[i], in_tensors_[i]->ElementsNum()); + shapes.push_back(in_tensors_[i]->shape()); + inputs_output_shape[i] = shapes[i].data(); + } + auto output_shape = out_tensors_.at(0)->shape(); + inputs_output_shape[input_num] = output_shape.data(); + auto output_addr = out_tensors_.at(0)->Data(); + + ConcatFp16(reinterpret_cast(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(), + output_shape.size(), reinterpret_cast(fp16_output_)); + Float16ToFloat32(fp16_output_, reinterpret_cast(output_addr), out_tensors_.at(0)->ElementsNum()); + return RET_OK; +} + +kernel::LiteKernel *CpuConcatFp16KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const Context *ctx, + const kernel::KernelKey &desc, const lite::Primitive *primitive) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Input opParameter is nullptr!"; + return nullptr; + } + MS_ASSERT(desc.type == schema::PrimitiveType_Concat); + auto *kernel = new (std::nothrow) ConcatFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} +REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Concat, CpuConcatFp16KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h new file mode 100644 index 0000000000..3f12f1998a --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_ + +#include +#include +#include "src/lite_kernel.h" +#include "include/context.h" +#include "src/runtime/kernel/arm/base/concat_base.h" + +using mindspore::lite::Context; + +namespace mindspore::kernel { +class ConcatFp16CPUKernel : public ConcatBaseCPUKernel { + public: + ConcatFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::Context *ctx, + const lite::Primitive *primitive) + : ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} + + ~ConcatFp16CPUKernel() { + for (auto ptr : fp16_inputs_) { + if (ptr != nullptr) { + free(ptr); + } + } + } + + int Init() override; + + int ReSize() override; + + int Run() override; + + private: + std::vector fp16_inputs_; + float16_t *fp16_output_ = nullptr; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.c b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.c new file mode 100644 index 0000000000..25984f82fe --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.c @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/fp16/concat_fp16.h" +#include + +void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output) { + int before_axis_size = 1; + for (int i = 0; i < axis; ++i) { + before_axis_size *= inputs_output_shape[0][i]; + } + // sizeof float16 / byte + int after_axis_size = 2; + for (size_t i = axis + 1; i < shape_size; ++i) { + after_axis_size *= inputs_output_shape[0][i]; + } + int axis_offset = 0; + uint8_t *dst_base = (output); + size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis]; + for (int i = 0; i < input_num; ++i) { + uint8_t *src_base = (input[i]); + size_t input_stride = after_axis_size * inputs_output_shape[i][axis]; + for (int j = 0; j < before_axis_size; ++j) { + uint8_t *src = src_base + j * input_stride; + uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size; + memcpy(dst, src, input_stride); + } + axis_offset += inputs_output_shape[i][axis]; + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.h new file mode 100644 index 0000000000..81d272a2f5 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/concat_fp16.h @@ -0,0 +1,30 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_ + +#include "nnacl/op_base.h" + +#ifdef __cplusplus +extern "C" { +#endif +void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output); +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_