Browse Source

fix code review

tags/v1.1.0
sunsuodong 5 years ago
parent
commit
96bd94caed
27 changed files with 120 additions and 80 deletions
  1. +2
    -2
      mindspore/lite/nnacl/crop_parameter.h
  2. +3
    -0
      mindspore/lite/src/ops/populate/arithmetic_populate.h
  3. +1
    -0
      mindspore/lite/src/ops/populate/populate_register.h
  4. +0
    -1
      mindspore/lite/src/ops/schema_register.h
  5. +3
    -0
      mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
  6. +10
    -1
      mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
  7. +4
    -0
      mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc
  8. +0
    -4
      mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
  9. +7
    -4
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
  10. +2
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
  11. +1
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
  12. +7
    -4
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
  13. +8
    -8
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
  14. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
  15. +2
    -11
      mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
  16. +7
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
  17. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
  18. +1
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
  19. +1
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc
  20. +5
    -3
      mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
  21. +10
    -3
      mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
  22. +11
    -6
      mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc
  23. +2
    -3
      mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc
  24. +4
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
  25. +16
    -9
      mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc
  26. +1
    -7
      mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc
  27. +3
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h

+ 2
- 2
mindspore/lite/nnacl/crop_parameter.h View File

@@ -30,8 +30,8 @@ typedef struct CropParameter {
int64_t offset_[CROP_OFFSET_MAX_SIZE];
int64_t in_offset_[CROP_OFFSET_MAX_SIZE];
int64_t axis_;
const int *in_shape_;
const int *out_shape_;
int *in_shape_;
int *out_shape_;
int input_dim_;
} CropParameter;



+ 3
- 0
mindspore/lite/src/ops/populate/arithmetic_populate.h View File

@@ -13,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_
#define MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_

#include "src/ops/arithmetic.h"

@@ -21,3 +23,4 @@ namespace lite {
ArithmeticParameter *PopulateArithmeticCommonPara(const mindspore::lite::PrimitiveC *primitive);
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_

+ 1
- 0
mindspore/lite/src/ops/populate/populate_register.h View File

@@ -48,6 +48,7 @@ class Registry {
Registry(schema::PrimitiveType primitive_type, ParameterCreator creator) {
PopulateRegistry::GetInstance()->insertParameterMap(primitive_type, creator);
}
~Registry() = default;
};
OpParameter *PopulateArithmetic(const mindspore::lite::PrimitiveC *primitive);
OpParameter *PopulateStridedSliceParameter(const mindspore::lite::PrimitiveC *primitive);


+ 0
- 1
mindspore/lite/src/ops/schema_register.h View File

@@ -24,7 +24,6 @@ using GetSchemaDef = std::function<std::string()>;

class SchemaRegisterImpl {
public:
SchemaRegisterImpl() = default;
static SchemaRegisterImpl *Instance() {
static SchemaRegisterImpl instance;
return &instance;


+ 3
- 0
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc View File

@@ -67,12 +67,15 @@ void ConvolutionBaseCPUKernel::FreeQuantParam() {
}
if (conv_quant_arg_->input_quant_args_ != nullptr) {
free(conv_quant_arg_->input_quant_args_);
conv_quant_arg_->input_quant_args_ = nullptr;
}
if (conv_quant_arg_->filter_quant_args_ != nullptr) {
free(conv_quant_arg_->filter_quant_args_);
conv_quant_arg_->filter_quant_args_ = nullptr;
}
if (conv_quant_arg_->output_quant_args_ != nullptr) {
free(conv_quant_arg_->output_quant_args_);
conv_quant_arg_->output_quant_args_ = nullptr;
}
}



+ 10
- 1
mindspore/lite/src/runtime/kernel/arm/base/crop_base.h View File

@@ -33,7 +33,16 @@ class CropBaseCPUKernel : public LiteKernel {
crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_);
crop_para_->thread_count_ = op_parameter_->thread_num_;
}
~CropBaseCPUKernel() = default;
~CropBaseCPUKernel() {
if (crop_para_->in_shape_ != nullptr) {
free(crop_para_->in_shape_);
crop_para_->in_shape_ = nullptr;
}
if (crop_para_->out_shape_ != nullptr) {
free(crop_para_->out_shape_);
crop_para_->out_shape_ = nullptr;
}
}

int Init() override;
int ReSize() override;


+ 4
- 0
mindspore/lite/src/runtime/kernel/arm/base/reduce_base.cc View File

@@ -101,6 +101,10 @@ int ReduceBaseCPUKernel::Init() {
if (in_tensors_.size() > 1) {
auto axes_ptr = in_tensors_.at(1);
num_axes_ = axes_ptr->ElementsNum();
if (axes_ptr->ElementsNum() > REDUCE_MAX_AXES_NUM) {
MS_LOG(ERROR) << "input axes invalid.";
return RET_ERROR;
}
memcpy(axes_, axes_ptr->MutableData(), axes_ptr->Size());
} else {
num_axes_ = reduce_param->num_axes_;


+ 0
- 4
mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc View File

@@ -105,10 +105,6 @@ int ConcatFp16CPUKernel::Run() {
const auto in_tensor = in_tensors_[i];
if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
auto in_tensor_data = reinterpret_cast<float *>(in_tensor->MutableData());
if (in_tensor_data == nullptr) {
MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr";
return RET_ERROR;
}
Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum());
} else {
fp16_inputs_[i] = reinterpret_cast<float16_t *>(in_tensor->MutableData());


+ 7
- 4
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc View File

@@ -221,6 +221,7 @@ int Convolution1x1FP16CPUKernel::Run() {
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get executor tensor failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}

@@ -228,6 +229,7 @@ int Convolution1x1FP16CPUKernel::Run() {
ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t)));
if (pack_input_ == nullptr) {
MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return RET_MEMORY_FAILED;
}

@@ -249,6 +251,9 @@ int Convolution1x1FP16CPUKernel::Run() {
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
ctx_->allocator->Free(pack_input_);
pack_input_ = nullptr;
return ret;
}
}
@@ -256,10 +261,8 @@ int Convolution1x1FP16CPUKernel::Run() {
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();

if (pack_input_ != nullptr) {
ctx_->allocator->Free(pack_input_);
pack_input_ = nullptr;
}
ctx_->allocator->Free(pack_input_);
pack_input_ = nullptr;
return RET_OK;
}
} // namespace mindspore::kernel

+ 2
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc View File

@@ -91,9 +91,11 @@ void ConvolutionBaseFP16CPUKernel::IfCastOutput() {
void ConvolutionBaseFP16CPUKernel::FreeTmpBuffer() {
if (in_data_type_ == kNumberTypeFloat32) {
context_->allocator->Free(execute_input_);
execute_input_ = nullptr;
}
if (out_data_type_ == kNumberTypeFloat32) {
context_->allocator->Free(execute_output_);
execute_output_ = nullptr;
}
}



+ 1
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc View File

@@ -123,12 +123,11 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
return RET_ERROR;
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return RET_OK;
return ret;
}

kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 7
- 4
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc View File

@@ -35,7 +35,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() {
sliding_ = nullptr;
}
if (packed_weight_ != nullptr) {
delete packed_weight_;
free(packed_weight_);
packed_weight_ = nullptr;
}
}
@@ -143,12 +143,17 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
auto ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed.";
return RET_ERROR;
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
return ret;
}

ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
if (need_align_) {
@@ -164,7 +169,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]";
return RET_ERROR;
}
if (need_align_) {
PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_,
@@ -176,5 +180,4 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return RET_OK;
}

} // namespace mindspore::kernel

+ 8
- 8
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc View File

@@ -154,25 +154,26 @@ int ConvolutionFP16CPUKernel::Run() {
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}

ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeTmpBuffer();
return RET_ERROR;
}

int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv fp16 error error_code[" << error_code << "]";
FreeTmpBuffer();
return RET_ERROR;
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
}
FreeTmpBuffer();
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return RET_OK;
FreeTmpBuffer();
return ret;
}

ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) {
@@ -354,7 +355,6 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor
MS_LOG(ERROR) << "Get new conv parameter failed.";
return nullptr;
}

// create new input for each group
auto in_tensor = CreateInputTensor(inputs.front()->data_type(), in_shape, infered_flag);
if (in_tensor == nullptr) {


+ 6
- 6
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc View File

@@ -218,26 +218,26 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}

ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeTmpBuffer();
return RET_ERROR;
}

int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]";
FreeTmpBuffer();
return RET_ERROR;
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeTmpBuffer();
return RET_OK;
return ret;
}
} // namespace mindspore::kernel

+ 2
- 11
mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc View File

@@ -62,15 +62,10 @@ static int CropFp16Run(void *cdata, int task_id) {

int CropFp16CPUKernel::Run() {
input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_);
if (input_ptr_ == nullptr) {
MS_LOG(ERROR) << "input or output is nullptr";
return RET_ERROR;
}

output_ptr_ = MallocOutputFp16(out_tensors_.at(kOutputIndex), context_);
if (output_ptr_ == nullptr) {
FreeInputAndOutput();
if (input_ptr_ == nullptr || output_ptr_ == nullptr) {
MS_LOG(ERROR) << "input or output is nullptr";
FreeInputAndOutput();
return RET_ERROR;
}

@@ -78,16 +73,12 @@ int CropFp16CPUKernel::Run() {
if (ret != RET_OK) {
MS_LOG(ERROR) << "ParallelLaunch failed: " << ret;
FreeInputAndOutput();
return ret;
}
if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) {
Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()),
out_tensors_.at(kOutputIndex)->ElementsNum());
}
FreeInputAndOutput();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Crop error error_code[" << ret << "]";
}
return ret;
}



+ 7
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc View File

@@ -35,7 +35,7 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() {
sliding_ = nullptr;
}
if (packed_weight_ != nullptr) {
delete packed_weight_;
free(packed_weight_);
packed_weight_ = nullptr;
}
}
@@ -159,12 +159,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
auto ret = InitBuffer();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed.";
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
return RET_ERROR;
}

ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get Execute tensor failed.";
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return ret;
}
if (need_align_) {
@@ -181,7 +186,6 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
return RET_ERROR;
}

if (need_align_) {
@@ -192,7 +196,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
}
ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
return RET_OK;
return ret;
}

kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc View File

@@ -184,6 +184,8 @@ int DeConvolutionFp16CPUKernel::Run() {
int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]";
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeRunBuf();
return RET_ERROR;
}

@@ -196,15 +198,13 @@ int DeConvolutionFp16CPUKernel::Run() {
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
return RET_ERROR;
}
}

ConvolutionBaseFP16CPUKernel::IfCastOutput();
ConvolutionBaseFP16CPUKernel::FreeTmpBuffer();
FreeRunBuf();

return RET_OK;
return error_code;
}

kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 1
- 0
mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc View File

@@ -218,6 +218,7 @@ int Convolution3x3Int8CPUKernel::Run() {
auto ret = InitTmpBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init tmp buffer failed.";
FreeTmpBuffer();
return RET_ERROR;
}
auto input_addr = reinterpret_cast<int8_t *>(in_tensors_.at(kInputIndex)->MutableData());


+ 1
- 0
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc View File

@@ -61,6 +61,7 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
if (packed_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
free(tmp_weight);
return RET_ERROR;
}
bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;


+ 5
- 3
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc View File

@@ -55,6 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
if (packed_weight_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
free(tmp_weight);
return RET_ERROR;
}

@@ -143,6 +144,8 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() {
auto ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
context_->allocator->Free(row_buffer_);
row_buffer_ = nullptr;
return ret;
}

@@ -155,11 +158,10 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwInt8Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]";
return RET_ERROR;
}

context_->allocator->Free(row_buffer_);
return RET_OK;
row_buffer_ = nullptr;
return ret;
}

kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 10
- 3
mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc View File

@@ -87,7 +87,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() {
int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM *
UP_DIV(conv_param_->output_channel_, C8NUM);
packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t)));
if (packed_input_ == nullptr) {
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
@@ -322,6 +322,12 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
auto ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Depthwise int8 ReSize error!";
if (need_align_) {
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
return ret;
}

@@ -342,7 +348,6 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWInt8Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwSWInt8Run error: error_code[" << ret << "]";
return RET_ERROR;
}

if (need_align_) {
@@ -350,8 +355,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() {
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_input_);
context_->allocator->Free(packed_output_);
packed_input_ = nullptr;
packed_output_ = nullptr;
}
return RET_OK;
return ret;
}

} // namespace mindspore::kernel

+ 11
- 6
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc View File

@@ -117,10 +117,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
if (packed_input_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
return RET_OK;
}

@@ -177,6 +173,13 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
auto ret = InitBuffer();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!";
context_->allocator->Free(packed_input_);
packed_input_ = nullptr;
context_->allocator->Free(output_buffer_);
output_buffer_ = nullptr;
if (need_align_) {
context_->allocator->Free(packed_output_);
}
return ret;
}

@@ -194,17 +197,19 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() {
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]";
return RET_ERROR;
}

if (need_align_) {
PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_,
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
context_->allocator->Free(packed_output_);
packed_output_ = nullptr;
}
context_->allocator->Free(packed_input_);
packed_input_ = nullptr;
context_->allocator->Free(output_buffer_);
return RET_OK;
output_buffer_ = nullptr;
return ret;
}

kernel::LiteKernel *CpuDeconvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 2
- 3
mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.cc View File

@@ -256,6 +256,7 @@ int DeConvInt8CPUKernel::Run() {
int error_code = InitRunBuf();
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv int8 InitRunBuf error! error_code[" << error_code << "]";
FreeRunBuf();
return RET_ERROR;
}

@@ -270,12 +271,10 @@ int DeConvInt8CPUKernel::Run() {
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvInt8Run, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]";
return RET_ERROR;
}
}

FreeRunBuf();
return RET_OK;
return error_code;
}

kernel::LiteKernel *CpuDeConvInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,


+ 4
- 0
mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc View File

@@ -110,6 +110,8 @@ int DivInt8CPUKernel::Run() {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(tile0_data_);
context_->allocator->Free(tile1_data_);
tile0_data_ = nullptr;
tile1_data_ = nullptr;
return RET_ERROR;
}
TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->MutableData()),
@@ -120,6 +122,8 @@ int DivInt8CPUKernel::Run() {
if (broadcast_) {
context_->allocator->Free(tile0_data_);
context_->allocator->Free(tile1_data_);
tile0_data_ = nullptr;
tile1_data_ = nullptr;
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]";


+ 16
- 9
mindspore/lite/src/runtime/kernel/arm/int8/fullconnection_int8.cc View File

@@ -52,25 +52,32 @@ int FullconnectionInt8CPUKernel::ReSize() {
d16_ = UP_ROUND(fc_param_->deep_, 16);
thread_count_ = MSMIN(thread_count_, UP_DIV(c4_, 4));
thread_stride_ = UP_DIV(UP_DIV(c4_, 4), thread_count_);

a_r4x16_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(r4_ * d16_ * sizeof(int8_t)));
if (!a_r4x16_ptr_) return RET_MEMORY_FAILED;
memset(a_r4x16_ptr_, 0, r4_ * d16_ * sizeof(int8_t));
b_c16x4_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(c4_ * d16_ * sizeof(int8_t)));
if (!b_c16x4_ptr_) return RET_MEMORY_FAILED;
memset(b_c16x4_ptr_, 0, c4_ * d16_ * sizeof(int8_t));
input_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(r4_ * sizeof(int)));
if (!input_sums_) return RET_MEMORY_FAILED;
memset(input_sums_, 0, r4_ * sizeof(int));
weight_bias_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(c4_ * sizeof(int)));
if (!weight_bias_sums_) return RET_MEMORY_FAILED;
if (a_r4x16_ptr_ == nullptr || b_c16x4_ptr_ == nullptr || input_sums_ == nullptr || weight_bias_sums_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
FreeTmpBuffer();
return RET_MEMORY_FAILED;
}
memset(a_r4x16_ptr_, 0, r4_ * d16_ * sizeof(int8_t));
memset(b_c16x4_ptr_, 0, c4_ * d16_ * sizeof(int8_t));
memset(input_sums_, 0, r4_ * sizeof(int));
memset(weight_bias_sums_, 0, c4_ * sizeof(int));

if (in_tensors_.size() == 3) {
auto bias_len = fc_param_->col_8_ * sizeof(int);
bias_ptr_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(bias_len));
if (!bias_ptr_) return RET_MEMORY_FAILED;
if (bias_ptr_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
FreeTmpBuffer();
return RET_MEMORY_FAILED;
}
memcpy(bias_ptr_, in_tensors_[2]->data_c(), bias_len);
} else {
bias_ptr_ = NULL;
bias_ptr_ = nullptr;
}

auto input_tensor = in_tensors_[0];


+ 1
- 7
mindspore/lite/src/runtime/kernel/arm/int8/gather_int8.cc View File

@@ -83,15 +83,9 @@ int GatherInt8CPUKernel::DoGather(int task_id) {
int count = MSMIN(stride, outer_size - stride * task_id);
auto thread_stride = stride * task_id;

int error_code;
input_ptr += thread_stride * limit;
output_ptr += thread_stride * indices_element_size;
error_code = GatherInt8(input_ptr, output_ptr, count, inner_size, limit, indices_ptr, indices_element_size, param_);

if (error_code != RET_OK) {
return RET_ERROR;
}
return RET_OK;
return GatherInt8(input_ptr, output_ptr, count, inner_size, limit, indices_ptr, indices_element_size, param_);
}

int GatherInt8Run(void *cdata, int task_id) {


+ 3
- 0
mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h View File

@@ -13,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_

#include <stdlib.h>
#include <stdbool.h>
@@ -42,3 +44,4 @@ void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_

Loading…
Cancel
Save