From: @fuzhiye Reviewed-by: @zhang_xue_tong,@zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tong,@zhang_xue_tongtags/v1.2.0-rc1
| @@ -136,6 +136,10 @@ int ArithmeticFP16CPUKernel::Execute(const void *input0, const void *input1, voi | |||||
| } | } | ||||
| int ArithmeticFP16CPUKernel::Run() { | int ArithmeticFP16CPUKernel::Run() { | ||||
| if (CheckDataType() != RET_OK) { | |||||
| MS_LOG(ERROR) << "ArithmeticFP16CPUKernel check dataType failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!input0_broadcast_) { | if (!input0_broadcast_) { | ||||
| input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); | input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); | ||||
| } | } | ||||
| @@ -31,11 +31,9 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight, | const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight, | ||||
| void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) | void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) | ||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx), | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), | |||||
| origin_weight_(origin_weight), | origin_weight_(origin_weight), | ||||
| origin_bias_(origin_bias), | |||||
| origin_weight_data_type_(origin_weight_data_type), | |||||
| origin_bias_data_type_(origin_bias_data_type) {} | |||||
| origin_bias_(origin_bias) {} | |||||
| ~Convolution1x1FP16CPUKernel() override; | ~Convolution1x1FP16CPUKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -64,8 +62,6 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| float16_t *pack_input_ = nullptr; | float16_t *pack_input_ = nullptr; | ||||
| float16_t *output_ptr_ = nullptr; | float16_t *output_ptr_ = nullptr; | ||||
| MatMulParameter *matmul_param_ = nullptr; | MatMulParameter *matmul_param_ = nullptr; | ||||
| TypeId origin_weight_data_type_; | |||||
| TypeId origin_bias_data_type_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -39,9 +39,8 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() { | |||||
| } | } | ||||
| int ConvolutionBaseFP16CPUKernel::GetExecuteFilter(lite::Tensor *weight_tensor, void *origin_data) { | int ConvolutionBaseFP16CPUKernel::GetExecuteFilter(lite::Tensor *weight_tensor, void *origin_data) { | ||||
| auto weight_data_type = weight_tensor->data_type(); | |||||
| MS_ASSERT(weight_data_type == kNumberTypeFloat32 || weight_data_type == kNumberTypeFloat16); | |||||
| if (weight_data_type == kNumberTypeFloat32) { | |||||
| MS_ASSERT(origin_weight_data_type_ == kNumberTypeFloat32 || origin_weight_data_type_ == kNumberTypeFloat16); | |||||
| if (origin_weight_data_type_ == kNumberTypeFloat32) { | |||||
| float *origin_weight = reinterpret_cast<float *>(origin_data); | float *origin_weight = reinterpret_cast<float *>(origin_data); | ||||
| size_t fp16_weight_size = weight_tensor->Channel() * weight_tensor->Batch() * weight_tensor->Height() * | size_t fp16_weight_size = weight_tensor->Channel() * weight_tensor->Batch() * weight_tensor->Height() * | ||||
| weight_tensor->Width() * sizeof(float16_t); | weight_tensor->Width() * sizeof(float16_t); | ||||
| @@ -27,8 +27,11 @@ namespace mindspore::kernel { | |||||
| class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { | class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { | ||||
| public: | public: | ||||
| ConvolutionBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ConvolutionBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, | |||||
| TypeId origin_weight_data_type, TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), | |||||
| origin_weight_data_type_(origin_weight_data_type), | |||||
| origin_bias_data_type_(origin_bias_data_type) {} | |||||
| ~ConvolutionBaseFP16CPUKernel() override; | ~ConvolutionBaseFP16CPUKernel() override; | ||||
| int Init() override { return mindspore::lite::RET_OK; } | int Init() override { return mindspore::lite::RET_OK; } | ||||
| @@ -46,6 +49,8 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| float16_t *execute_input_ = nullptr; | float16_t *execute_input_ = nullptr; | ||||
| float16_t *execute_weight_ = nullptr; | float16_t *execute_weight_ = nullptr; | ||||
| float16_t *execute_output_ = nullptr; | float16_t *execute_output_ = nullptr; | ||||
| TypeId origin_weight_data_type_; | |||||
| TypeId origin_bias_data_type_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -69,17 +69,13 @@ int ConvolutionDelegateFP16CPUKernel::Init() { | |||||
| if (in_tensors_.size() == 3) { | if (in_tensors_.size() == 3) { | ||||
| origin_bias_ = CopyData(in_tensors_.at(kBiasIndex)); | origin_bias_ = CopyData(in_tensors_.at(kBiasIndex)); | ||||
| need_free_ = need_free_ | BIAS_NEED_FREE; | need_free_ = need_free_ | BIAS_NEED_FREE; | ||||
| origin_bias_data_type_ = in_tensors_.at(kBiasIndex)->data_type(); | |||||
| } | } | ||||
| origin_weight_data_type_ = in_tensors_.at(kWeightIndex)->data_type(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| origin_weight_ = in_tensors_.at(kWeightIndex)->data_c(); | origin_weight_ = in_tensors_.at(kWeightIndex)->data_c(); | ||||
| if (in_tensors_.size() == 3) { | if (in_tensors_.size() == 3) { | ||||
| origin_bias_ = in_tensors_.at(kBiasIndex)->data_c(); | origin_bias_ = in_tensors_.at(kBiasIndex)->data_c(); | ||||
| origin_bias_data_type_ = in_tensors_.at(kBiasIndex)->data_type(); | |||||
| } | } | ||||
| origin_weight_data_type_ = in_tensors_.at(kWeightIndex)->data_type(); | |||||
| return ReSize(); | return ReSize(); | ||||
| } | } | ||||
| @@ -110,6 +106,28 @@ ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) { | |||||
| return conv_parameter; | return conv_parameter; | ||||
| } | } | ||||
| kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||||
| const InnerContext *ctx, void *origin_weight, void *origin_bias, | |||||
| TypeId origin_weight_data_type, TypeId origin_bias_data_type) { | |||||
| MS_ASSERT(opParameter != nullptr); | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | |||||
| kernel::LiteKernel *kernel; | |||||
| if (conv_param->input_channel_ < 32) { | |||||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel( | |||||
| opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); | |||||
| } else { | |||||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel( | |||||
| opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); | |||||
| } | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||||
| free(opParameter); | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | ||||
| const lite::InnerContext *ctx, void *origin_weight, void *origin_bias, | const lite::InnerContext *ctx, void *origin_weight, void *origin_bias, | ||||
| @@ -119,12 +137,17 @@ kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &i | |||||
| int out_unit; | int out_unit; | ||||
| CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param); | CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param); | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) { | |||||
| if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | |||||
| kernel = CpuConvDwFp16KernelCreator(inputs, outputs, op_parameter, ctx, origin_weight, origin_bias, | |||||
| origin_weight_data_type, origin_bias_data_type); | |||||
| } else if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) { | |||||
| kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel( | kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel( | ||||
| op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); | op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); | ||||
| } else if (use_winograd) { | } else if (use_winograd) { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionWinogradFP16CPUKernel( | |||||
| op_parameter, inputs, outputs, ctx, out_unit, origin_weight, origin_bias, origin_bias_data_type); | |||||
| kernel = new (std::nothrow) | |||||
| kernel::ConvolutionWinogradFP16CPUKernel(op_parameter, inputs, outputs, ctx, out_unit, origin_weight, origin_bias, | |||||
| origin_weight_data_type, origin_bias_data_type); | |||||
| } else { | } else { | ||||
| kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel( | kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel( | ||||
| op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); | op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type); | ||||
| @@ -211,7 +234,13 @@ static lite::Tensor *CreateOutputTensorFp16(const std::vector<int> &out_shape, | |||||
| kernel::LiteKernel *CreateDelegateConvFp16(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CreateDelegateConvFp16(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter, | ||||
| const InnerContext *ctx) { | const InnerContext *ctx) { | ||||
| return new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| auto weight_data_type = inputs.at(1)->data_type(); | |||||
| TypeId bias_data_type = kTypeUnknown; | |||||
| if (inputs.size() == 3) { | |||||
| bias_data_type = inputs.at(2)->data_type(); | |||||
| } | |||||
| return new (std::nothrow) | |||||
| kernel::ConvolutionDelegateFP16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); | |||||
| } | } | ||||
| kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -302,33 +331,6 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor | |||||
| GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, group_convs, conv_param->group_); | GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, group_convs, conv_param->group_); | ||||
| } | } | ||||
| kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||||
| const InnerContext *ctx, const kernel::KernelKey &desc) { | |||||
| MS_ASSERT(opParameter != nullptr); | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | |||||
| kernel::LiteKernel *kernel; | |||||
| if (conv_param->input_channel_ < 32) { | |||||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| } else { | |||||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx); | |||||
| } | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||||
| free(opParameter); | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| delete kernel; | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| const InnerContext *ctx, const kernel::KernelKey &desc) { | const InnerContext *ctx, const kernel::KernelKey &desc) { | ||||
| @@ -337,12 +339,13 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> & | |||||
| auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if (conv_param->group_ == 1) { | |||||
| kernel = CreateDelegateConvFp16(inputs, outputs, opParameter, ctx); | |||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | |||||
| kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, ctx, desc); | |||||
| } else { | |||||
| bool is_depthwise = | |||||
| (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_); | |||||
| if (conv_param->group_ > 1 && !is_depthwise) { | |||||
| kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx); | kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx); | ||||
| } else { | |||||
| kernel = CreateDelegateConvFp16(inputs, outputs, opParameter, ctx); | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| @@ -29,8 +29,11 @@ namespace mindspore::kernel { | |||||
| class ConvolutionDelegateFP16CPUKernel : public LiteKernel { | class ConvolutionDelegateFP16CPUKernel : public LiteKernel { | ||||
| public: | public: | ||||
| ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||||
| TypeId origin_weight_data_type, TypeId origin_bias_data_type) | |||||
| : LiteKernel(parameter, inputs, outputs, ctx), | |||||
| origin_weight_data_type_(origin_weight_data_type), | |||||
| origin_bias_data_type_(origin_bias_data_type) {} | |||||
| ~ConvolutionDelegateFP16CPUKernel() override { | ~ConvolutionDelegateFP16CPUKernel() override { | ||||
| FreeCopiedData(); | FreeCopiedData(); | ||||
| if (fp16_conv_kernel_ != nullptr) { | if (fp16_conv_kernel_ != nullptr) { | ||||
| @@ -42,7 +42,7 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, weight_tensor->data_c()); | |||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, origin_weight_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "get execute filter data failed."; | MS_LOG(ERROR) << "get execute filter data failed."; | ||||
| return ret; | return ret; | ||||
| @@ -63,8 +63,8 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||||
| auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_); | auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_); | ||||
| if (in_tensors_.size() == kInputSize2) { | if (in_tensors_.size() == kInputSize2) { | ||||
| auto bias_tensor = in_tensors_.at(kBiasIndex); | auto bias_tensor = in_tensors_.at(kBiasIndex); | ||||
| auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); | |||||
| MS_ASSERT(ori_bias); | |||||
| MS_ASSERT(origin_bias_); | |||||
| auto ori_bias = reinterpret_cast<float *>(origin_bias_); | |||||
| for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | ||||
| bias_fp16[i] = (float16_t)ori_bias[i]; | bias_fp16[i] = (float16_t)ori_bias[i]; | ||||
| } | } | ||||
| @@ -35,8 +35,12 @@ namespace mindspore::kernel { | |||||
| class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | ||||
| public: | public: | ||||
| ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, | |||||
| void *origin_weight, void *origin_bias, TypeId origin_weight_data_type, | |||||
| TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), | |||||
| origin_weight_(origin_weight), | |||||
| origin_bias_(origin_bias) {} | |||||
| ~ConvolutionDepthwiseFp16CPUKernel() override; | ~ConvolutionDepthwiseFp16CPUKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -47,6 +51,8 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void *origin_weight_; // do not free | |||||
| void *origin_bias_; // do not free | |||||
| float16_t *packed_weight_ = nullptr; | float16_t *packed_weight_ = nullptr; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -61,7 +61,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() { | |||||
| // init weight: o, h, w, i; o == group, i == 1 | // init weight: o, h, w, i; o == group, i == 1 | ||||
| auto weight_tensor = in_tensors_.at(kWeightIndex); | auto weight_tensor = in_tensors_.at(kWeightIndex); | ||||
| int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); | int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); | ||||
| auto origin_weight = reinterpret_cast<float *>(weight_tensor->MutableData()); | |||||
| int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); | int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); | ||||
| packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t))); | packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t))); | ||||
| @@ -69,8 +68,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| PackNCHWFp32ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), | |||||
| weight_tensor->Batch()); | |||||
| PackNCHWFp32ToNC8HW8Fp16(reinterpret_cast<float *>(origin_weight_), packed_weight_, 1, | |||||
| weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); | |||||
| bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t))); | bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t))); | ||||
| if (bias_data_ == nullptr) { | if (bias_data_ == nullptr) { | ||||
| @@ -81,8 +80,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() { | |||||
| auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_); | auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_); | ||||
| if (in_tensors_.size() == kInputSize2) { | if (in_tensors_.size() == kInputSize2) { | ||||
| auto bias_tensor = in_tensors_.at(kBiasIndex); | auto bias_tensor = in_tensors_.at(kBiasIndex); | ||||
| auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData()); | |||||
| MS_ASSERT(ori_bias); | |||||
| MS_ASSERT(origin_bias_); | |||||
| auto ori_bias = reinterpret_cast<float *>(origin_bias_); | |||||
| for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | for (int i = 0; i < bias_tensor->ElementsNum(); i++) { | ||||
| bias_fp16[i] = (float16_t)ori_bias[i]; | bias_fp16[i] = (float16_t)ori_bias[i]; | ||||
| } | } | ||||
| @@ -36,8 +36,12 @@ namespace mindspore::kernel { | |||||
| class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | ||||
| public: | public: | ||||
| ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, | |||||
| void *origin_weight, void *origin_bias, TypeId origin_weight_data_type, | |||||
| TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), | |||||
| origin_weight_(origin_weight), | |||||
| origin_bias_(origin_bias) {} | |||||
| ~ConvolutionDepthwiseSWFp16CPUKernel() override; | ~ConvolutionDepthwiseSWFp16CPUKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -50,6 +54,8 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel | |||||
| private: | private: | ||||
| void FreePackedInputOutput(); | void FreePackedInputOutput(); | ||||
| void *origin_weight_; // do not free | |||||
| void *origin_bias_; // do not free | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float16_t *packed_weight_ = nullptr; | float16_t *packed_weight_ = nullptr; | ||||
| float16_t *packed_input_ = nullptr; | float16_t *packed_input_ = nullptr; | ||||
| @@ -28,11 +28,9 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight, | const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight, | ||||
| void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) | void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type) | ||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx), | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), | |||||
| origin_weight_(origin_weight), | origin_weight_(origin_weight), | ||||
| origin_bias_(origin_bias), | |||||
| origin_weight_data_type_(origin_weight_data_type), | |||||
| origin_bias_data_type_(origin_bias_data_type) {} | |||||
| origin_bias_(origin_bias) {} | |||||
| ~ConvolutionFP16CPUKernel() override { | ~ConvolutionFP16CPUKernel() override { | ||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| free(packed_weight_); | free(packed_weight_); | ||||
| @@ -64,8 +62,6 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| float16_t *packed_input_ = nullptr; | float16_t *packed_input_ = nullptr; | ||||
| float16_t *packed_weight_ = nullptr; | float16_t *packed_weight_ = nullptr; | ||||
| float16_t *col_major_input_ = nullptr; | float16_t *col_major_input_ = nullptr; | ||||
| TypeId origin_weight_data_type_; | |||||
| TypeId origin_bias_data_type_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -31,12 +31,12 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| public: | public: | ||||
| ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, int out_unit, | const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, int out_unit, | ||||
| void *origin_weight, void *origin_bias, TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx), | |||||
| void *origin_weight, void *origin_bias, TypeId origin_weight_data_type, | |||||
| TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type), | |||||
| output_unit_(out_unit), | output_unit_(out_unit), | ||||
| origin_weight_(origin_weight), | origin_weight_(origin_weight), | ||||
| origin_bias_(origin_bias), | |||||
| origin_bias_data_type_(origin_bias_data_type) {} | |||||
| origin_bias_(origin_bias) {} | |||||
| ~ConvolutionWinogradFP16CPUKernel() override { | ~ConvolutionWinogradFP16CPUKernel() override { | ||||
| if (trans_weight_ != nullptr) { | if (trans_weight_ != nullptr) { | ||||
| free(trans_weight_); | free(trans_weight_); | ||||
| @@ -86,7 +86,6 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| TmpBufferAddressFp16 tmp_buffer_address_list_[4]; | TmpBufferAddressFp16 tmp_buffer_address_list_[4]; | ||||
| InputTransFp16Func in_func_; | InputTransFp16Func in_func_; | ||||
| OutputTransFp16Func out_func_; | OutputTransFp16Func out_func_; | ||||
| TypeId origin_bias_data_type_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -37,8 +37,9 @@ namespace mindspore::kernel { | |||||
| class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | ||||
| public: | public: | ||||
| DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, | |||||
| TypeId origin_weight_data_type, TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {} | |||||
| ~DeconvolutionDepthwiseFp16CPUKernel() override; | ~DeconvolutionDepthwiseFp16CPUKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -216,21 +216,25 @@ kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> | |||||
| MS_ASSERT(op_parameter != nullptr); | MS_ASSERT(op_parameter != nullptr); | ||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); | MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion); | ||||
| auto weight_data_type = inputs.at(1)->data_type(); | |||||
| TypeId bias_data_type = kTypeUnknown; | |||||
| if (inputs.size() == 3) { | |||||
| bias_data_type = inputs.at(2)->data_type(); | |||||
| } | |||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter); | ||||
| if (conv_param->group_ == 1) { | if (conv_param->group_ == 1) { | ||||
| if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && | if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) && | ||||
| (conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) { | |||||
| kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| (conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) { | |||||
| kernel = new (std::nothrow) | |||||
| kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); | |||||
| } else { | } else { | ||||
| kernel = new (std::nothrow) kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| kernel = new (std::nothrow) | |||||
| kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); | |||||
| } | } | ||||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | ||||
| kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "deconv do not support group deconv!"; | |||||
| kernel = nullptr; | |||||
| kernel = new (std::nothrow) | |||||
| DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type); | |||||
| } | } | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| @@ -27,8 +27,9 @@ namespace mindspore::kernel { | |||||
| class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | ||||
| public: | public: | ||||
| DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||||
| TypeId origin_weight_data_type, TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {} | |||||
| ~DeConvolutionFp16CPUKernel() override; | ~DeConvolutionFp16CPUKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| int Run() override; | int Run() override; | ||||
| @@ -28,8 +28,9 @@ namespace mindspore::kernel { | |||||
| class DeConvWinogradFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | class DeConvWinogradFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | ||||
| public: | public: | ||||
| DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {} | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | |||||
| TypeId origin_weight_data_type, TypeId origin_bias_data_type) | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {} | |||||
| ~DeConvWinogradFp16CPUKernel() override; | ~DeConvWinogradFp16CPUKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| int Run() override; | int Run() override; | ||||
| @@ -49,11 +49,6 @@ int ArithmeticCPUKernel::Init() { | |||||
| } | } | ||||
| int ArithmeticCPUKernel::ReSize() { | int ArithmeticCPUKernel::ReSize() { | ||||
| if (CheckDataType() != RET_OK) { | |||||
| MS_LOG(ERROR) << "ArithmeticCPUKernel resize failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| CalcMultiplesAndStrides(param_); | CalcMultiplesAndStrides(param_); | ||||
| if (param_->broadcasting_) { | if (param_->broadcasting_) { | ||||
| outside_ = 1; | outside_ = 1; | ||||
| @@ -359,6 +354,10 @@ int ArithmeticsRun(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ArithmeticCPUKernel::Run() { | int ArithmeticCPUKernel::Run() { | ||||
| if (CheckDataType() != RET_OK) { | |||||
| MS_LOG(ERROR) << "ArithmeticCPUKernel check dataType failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!input0_broadcast_) { | if (!input0_broadcast_) { | ||||
| input0_ptr_ = in_tensors_[0]->data_c(); | input0_ptr_ = in_tensors_[0]->data_c(); | ||||
| } | } | ||||