Merge pull request !7686 from fuzhiye/tmptags/v1.1.0
| @@ -62,9 +62,9 @@ void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, in | |||||
| #endif | #endif | ||||
| } | } | ||||
| void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight, | |||||
| const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id, | |||||
| ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize) { | |||||
| void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight, | |||||
| const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id, | |||||
| ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize) { | |||||
| int kernel_h = conv_param->kernel_h_; | int kernel_h = conv_param->kernel_h_; | ||||
| int kernel_w = conv_param->kernel_w_; | int kernel_w = conv_param->kernel_w_; | ||||
| int in_batch = conv_param->input_batch_; | int in_batch = conv_param->input_batch_; | ||||
| @@ -32,9 +32,9 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| // int8 conv common | // int8 conv common | ||||
| void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight, | |||||
| const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id, | |||||
| ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize); | |||||
| void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int8_t *packed_weight, | |||||
| const int32_t *bias_data, int8_t *output_data, int32_t *filter_zp, int32_t *input_sum, int task_id, | |||||
| ConvParameter *conv_param, MATMUL_OPT_R_FUNC matmul_func, bool is_optimize); | |||||
| // int8 convolution 1x1 | // int8 convolution 1x1 | ||||
| void Conv1x1PreOptPeroc(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum, size_t input_channel, | void Conv1x1PreOptPeroc(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum, size_t input_channel, | ||||
| @@ -141,34 +141,6 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionBaseCPUKernel::SetIfAsymmetric() { | |||||
| uint8_t asymmetric = 0b0; | |||||
| auto filter_tensor = in_tensors_.at(kWeightIndex); | |||||
| auto filter_ele_num = filter_tensor->ElementsNum(); | |||||
| auto filter_data = reinterpret_cast<int8_t *>(filter_tensor->MutableData()); | |||||
| int min_value = INT8_MAX; | |||||
| int max_value = INT8_MIN; | |||||
| for (int i = 0; i < filter_ele_num; ++i) { | |||||
| min_value = min_value < filter_data[i] ? min_value : filter_data[i]; | |||||
| max_value = max_value > filter_data[i] ? max_value : filter_data[i]; | |||||
| } | |||||
| if (conv_quant_arg_->filter_arg_num_ == kPerTensor) { | |||||
| auto filter_zp = conv_quant_arg_->filter_quant_args_[0].zp_; | |||||
| if (filter_zp != 0 && min_value >= -128 && max_value <= 127) { | |||||
| asymmetric = asymmetric | FILTER_ASYMMETRIC; | |||||
| } | |||||
| } else { | |||||
| auto filter_arg = conv_quant_arg_->filter_quant_args_; | |||||
| for (int i = 0; i < conv_param_->output_channel_; ++i) { | |||||
| if (filter_arg[i].zp_ != 0 && min_value >= -128 && max_value <= 127) { | |||||
| asymmetric = asymmetric | FILTER_ASYMMETRIC; | |||||
| } | |||||
| } | |||||
| } | |||||
| conv_quant_arg_->asymmetric_ = asymmetric; | |||||
| return RET_OK; | |||||
| } | |||||
| int ConvolutionBaseCPUKernel::MallocQuantParam() { | int ConvolutionBaseCPUKernel::MallocQuantParam() { | ||||
| conv_quant_arg_ = &conv_param_->conv_quant_arg_; | conv_quant_arg_ = &conv_param_->conv_quant_arg_; | ||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| @@ -48,7 +48,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel { | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { return 0; } | int ReSize() override { return 0; } | ||||
| int Run() override { return 0; } | int Run() override { return 0; } | ||||
| int SetIfAsymmetric(); | |||||
| int SetIfPerChannel(); | int SetIfPerChannel(); | ||||
| int MallocQuantParam(); | int MallocQuantParam(); | ||||
| int SetQuantParam(); | int SetQuantParam(); | ||||
| @@ -156,14 +156,7 @@ int Convolution3x3Int8CPUKernel::InitTmpBuffer() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void Convolution3x3Int8CPUKernel::ConfigInputOutput() { | |||||
| auto output_tensor = out_tensors_.at(kOutputIndex); | |||||
| output_tensor->SetFormat(schema::Format::Format_NHWC); | |||||
| } | |||||
| int Convolution3x3Int8CPUKernel::Init() { | int Convolution3x3Int8CPUKernel::Init() { | ||||
| // config input output | |||||
| ConfigInputOutput(); | |||||
| auto ret = SetQuantParam(); | auto ret = SetQuantParam(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set quant param failed."; | MS_LOG(ERROR) << "Set quant param failed."; | ||||
| @@ -38,7 +38,6 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int RunImpl(int task_id); | int RunImpl(int task_id); | ||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| int InitTmpBuffer(); | int InitTmpBuffer(); | ||||
| void ConfigInputOutput(); | |||||
| private: | private: | ||||
| void FreeTmpBuffer(); | void FreeTmpBuffer(); | ||||
| @@ -59,7 +59,7 @@ void ConvolutionInt8CPUKernel::CheckSupportOptimize() { | |||||
| conv_param_->tile_num_ = tile_num_; | conv_param_->tile_num_ = tile_num_; | ||||
| } | } | ||||
| int ConvolutionInt8CPUKernel::InitWeightBiasOpt() { | |||||
| int ConvolutionInt8CPUKernel::InitWeightBias() { | |||||
| auto filter_tensor = in_tensors_.at(kWeightIndex); | auto filter_tensor = in_tensors_.at(kWeightIndex); | ||||
| auto input_channel = filter_tensor->Channel(); | auto input_channel = filter_tensor->Channel(); | ||||
| auto output_channel = filter_tensor->Batch(); | auto output_channel = filter_tensor->Batch(); | ||||
| @@ -83,7 +83,7 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() { | |||||
| } | } | ||||
| #endif | #endif | ||||
| int pack_weight_size = up_round_oc * up_round_deep; | int pack_weight_size = up_round_oc * up_round_deep; | ||||
| int bias_size = up_round_oc * sizeof(int32_t); | |||||
| size_t bias_size = up_round_oc * sizeof(int32_t); | |||||
| int32_t input_zp = conv_param_->conv_quant_arg_.input_quant_args_[0].zp_; | int32_t input_zp = conv_param_->conv_quant_arg_.input_quant_args_[0].zp_; | ||||
| // init weight | // init weight | ||||
| @@ -150,7 +150,7 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionInt8CPUKernel::InitTmpBufferOpt() { | |||||
| int ConvolutionInt8CPUKernel::InitTmpBuffer() { | |||||
| MS_ASSERT(ctx_->allocator != nullptr); | MS_ASSERT(ctx_->allocator != nullptr); | ||||
| int kernel_plane = conv_param_->kernel_h_ * conv_param_->kernel_w_; | int kernel_plane = conv_param_->kernel_h_ * conv_param_->kernel_w_; | ||||
| int tmp_size; | int tmp_size; | ||||
| @@ -181,7 +181,7 @@ int ConvolutionInt8CPUKernel::Init() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitWeightBiasOpt(); | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Initialization for optimized int8 conv failed."; | MS_LOG(ERROR) << "Initialization for optimized int8 conv failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -212,9 +212,8 @@ int ConvolutionInt8CPUKernel::RunImpl(int task_id) { | |||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| auto ori_input_data = reinterpret_cast<int8_t *>(input_tensor->MutableData()); | auto ori_input_data = reinterpret_cast<int8_t *>(input_tensor->MutableData()); | ||||
| auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->MutableData()); | auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->MutableData()); | ||||
| ConvInt8Opt(ori_input_data, packed_input_, matmul_packed_input_, packed_weight_, | |||||
| reinterpret_cast<int32_t *>(bias_data_), output_addr, filter_zp_ptr_, input_sum_, task_id, conv_param_, | |||||
| matmul_func_, support_optimize_); | |||||
| ConvInt8(ori_input_data, packed_input_, matmul_packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), | |||||
| output_addr, filter_zp_ptr_, input_sum_, task_id, conv_param_, matmul_func_, support_optimize_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -229,7 +228,7 @@ int ConvolutionInt8Impl(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionInt8CPUKernel::Run() { | int ConvolutionInt8CPUKernel::Run() { | ||||
| auto ret = InitTmpBufferOpt(); | |||||
| auto ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -51,8 +51,8 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Run() override; | int Run() override; | ||||
| int RunImpl(int task_id); | int RunImpl(int task_id); | ||||
| void CheckSupportOptimize(); | void CheckSupportOptimize(); | ||||
| int InitWeightBiasOpt(); | |||||
| int InitTmpBufferOpt(); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| private: | private: | ||||
| void FreeTmpBuffer() { | void FreeTmpBuffer() { | ||||
| @@ -149,12 +149,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() { | |||||
| int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | ||||
| InitSlideParam(); | InitSlideParam(); | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| auto ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!"; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -179,6 +173,11 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "Only support input channel equals output channel."; | MS_LOG(ERROR) << "Only support input channel equals output channel."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!"; | |||||
| return ret; | |||||
| } | |||||
| // pack input, assume input format: NHWC -> NHWC4 | // pack input, assume input format: NHWC -> NHWC4 | ||||
| auto input_tensor = in_tensors_.at(kInputIndex); | auto input_tensor = in_tensors_.at(kInputIndex); | ||||
| @@ -191,7 +190,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| packed_output_ = output_addr; | packed_output_ = output_addr; | ||||
| } | } | ||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_); | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||