Browse Source

!8810 fix code review issue

From: @zhaozhenlong
Reviewed-by: @zhanghaibo5,@ddwsky
Signed-off-by: @zhanghaibo5
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
63d01669e4
17 changed files with 56 additions and 13 deletions
  1. +2
    -2
      mindspore/lite/nnacl/split_parameter.h
  2. +5
    -0
      mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
  3. +3
    -0
      mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
  4. +8
    -1
      mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
  5. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
  6. +4
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
  7. +4
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
  8. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
  9. +2
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
  10. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
  11. +1
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
  12. +4
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
  13. +4
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
  14. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc
  15. +1
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
  16. +2
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
  17. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc

+ 2
- 2
mindspore/lite/nnacl/split_parameter.h View File

@@ -19,13 +19,13 @@

#include "nnacl/op_base.h"
#include "nnacl/quantization/quantize.h"
#define SPLIT_STRIDES_SIZE 32
typedef struct SplitParameter {
OpParameter op_parameter_;
SplitQuantArg quant_arg_;
int num_split_;
int *split_sizes_;
int strides_[32];
int strides_[SPLIT_STRIDES_SIZE];
int split_dim_;
int n_dims_;
int split_count_;


+ 5
- 0
mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc View File

@@ -38,11 +38,16 @@ int PoolingBaseCPUKernel::SetQuantParam() {
pooling_quant_arg_[0] = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
if (pooling_quant_arg_[0] == nullptr) {
MS_LOG(ERROR) << "malloc pooling_quant_arg[0] failed.";
free(pooling_quant_arg_);
pooling_quant_arg_ = nullptr;
return RET_MEMORY_FAILED;
}
pooling_quant_arg_[1] = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
if (pooling_quant_arg_[1] == nullptr) {
MS_LOG(ERROR) << "malloc pooling_quant_arg[1] failed.";
free(*pooling_quant_arg_);
free(pooling_quant_arg_);
pooling_quant_arg_ = nullptr;
return RET_MEMORY_FAILED;
}
auto *input_tensor = in_tensors_.at(kInputIndex);


+ 3
- 0
mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc View File

@@ -79,11 +79,14 @@ int PriorBoxCPUKernel::GeneratePriorBox() {
if (!exist) {
different_aspect_ratios.emplace_back(ratio);
if (prior_box_param_->flip) {
MS_ASSERT(fabs(ratio) > 1e-5);
different_aspect_ratios.emplace_back(1.0f / ratio);
}
}
}

MS_ASSERT(fmap_w);
MS_ASSERT(fmap_h);
for (int i = 0; i < fmap_h; i++) {
float cy = i + prior_box_param_->offset;
for (int j = 0; j < fmap_w; j++) {


+ 8
- 1
mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc View File

@@ -39,8 +39,11 @@ int QuantDTypeCastCPUKernel::Init() {
return RET_PARAM_INVALID;
}
auto in_tensor = in_tensors_.front();
MS_ASSERT(in_tensor);
auto out_tensor = out_tensors_.front();
MS_ASSERT(out_tensor);
auto param = reinterpret_cast<QuantDTypeCastParameter *>(op_parameter_);
MS_ASSERT(param);
if (param->srcT == kNumberTypeFloat32 && param->dstT == kNumberTypeInt8) {
if (in_tensor->data_type() != kNumberTypeFloat32 || out_tensor->data_type() != kNumberTypeInt8) {
MS_LOG(ERROR) << "param data type and tensor data type do not match.";
@@ -177,7 +180,11 @@ int QuantDTypeCastCPUKernel::Run() {
out_tensors_[0]->data_type() == TypeId::kNumberTypeInt8) {
int8_ptr_ = reinterpret_cast<int8_t *>(in_tensors_[0]->data_c());
int8_out_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->data_c());
float32_ptr_ = new float[in_tensors_[0]->ElementsNum()];
float32_ptr_ = new (std::nothrow) float[in_tensors_[0]->ElementsNum()];
if (float32_ptr_ == nullptr) {
MS_LOG(ERROR) << "new float[] failed";
return RET_ERROR;
}
} else if (in_tensors_[0]->data_type() == TypeId::kNumberTypeUInt8 &&
out_tensors_[0]->data_type() == TypeId::kNumberTypeFloat32) {
uint8_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_[0]->data_c());


+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/base/split_base.cc View File

@@ -37,16 +37,20 @@ int SplitBaseCPUKernel::ReSize() {
auto in_tensor = in_tensors_.front();
auto input_shape = in_tensor->shape();

MS_ASSERT(param);
MS_ASSERT(input_shape.size() >= 2 && input_shape.size() <= SPLIT_STRIDES_SIZE);
param->strides_[input_shape.size() - 1] = 1;
for (int i = input_shape.size() - 2; i >= 0; i--) {
param->strides_[i] = param->strides_[i + 1] * input_shape[i + 1];
}

MS_ASSERT(static_cast<size_t>(param->split_dim_) < input_shape.size());
param->split_count_ =
param->strides_[0] * input_shape[0] / (input_shape[param->split_dim_] * param->strides_[param->split_dim_]);
param->n_dims_ = input_shape.size();

if (param->split_sizes_[0] == 0) {
MS_ASSERT(param->num_split_ > 0 && static_cast<int>(param->num_split_) < input_shape.size());
if (input_shape[param->split_dim_] % param->num_split_ != 0) {
MS_LOG(ERROR) << "Default split size is not usable.";
return RET_ERROR;
@@ -57,6 +61,7 @@ int SplitBaseCPUKernel::ReSize() {
}
}

MS_ASSERT(param->num_split_ >= 1 && param->num_split_ <= SPLIT_STRIDES_SIZE);
if (param->split_sizes_[param->num_split_ - 1] == -1) {
int split_shape_end = input_shape[param->split_dim_];
for (int i = 0; i < param->num_split_ - 1; i++) {
@@ -67,6 +72,7 @@ int SplitBaseCPUKernel::ReSize() {

num_unit_ = param->split_count_ * param->num_split_;
thread_n_num_ = MSMIN(thread_count_, num_unit_);
MS_ASSERT(thread_n_num_);
thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_);
return RET_OK;
}


+ 4
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc View File

@@ -45,7 +45,8 @@ ARITHMETIC_COMPARE_FUNC_INFO_FP16 arithmetic_cp_fun_table_fp16[] = {
ElementOptGreaterEqualFp16}};

ArithmeticCompareFuncFp16 GetArithmeticCompareFun(int primitive_type, int activation_type) {
for (size_t i = 0; i < sizeof(arithmetic_cp_fun_table_fp16); i++) {
size_t length = sizeof(arithmetic_cp_fun_table_fp16) / sizeof(ARITHMETIC_COMPARE_FUNC_INFO_FP16);
for (size_t i = 0; i < length; i++) {
if (arithmetic_cp_fun_table_fp16[i].primitive_type_ == primitive_type &&
arithmetic_cp_fun_table_fp16[i].activation_type_ == activation_type) {
return arithmetic_cp_fun_table_fp16[i].func_;
@@ -55,7 +56,8 @@ ArithmeticCompareFuncFp16 GetArithmeticCompareFun(int primitive_type, int activa
}

ArithmeticCompareOptFuncFp16 GetOptimizedArithmeticCompareFun(int primitive_type, int activation_type) {
for (size_t i = 0; i < sizeof(arithmetic_cp_fun_table_fp16); i++) {
size_t length = sizeof(arithmetic_cp_fun_table_fp16) / sizeof(ARITHMETIC_COMPARE_FUNC_INFO_FP16);
for (size_t i = 0; i < length; i++) {
if (arithmetic_cp_fun_table_fp16[i].primitive_type_ == primitive_type &&
arithmetic_cp_fun_table_fp16[i].activation_type_ == activation_type) {
return arithmetic_cp_fun_table_fp16[i].opt_func_;


+ 4
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc View File

@@ -72,7 +72,8 @@ ARITHMETIC_FUNC_INFO_FP16 arithmetic_fun_table_fp16[] = {
{PrimitiveType_Minimum, schema::ActivationType_NO_ACTIVATION, ElementMinimumFp16, ElementOptMinimumFp16}};

ArithmeticFuncFp16 GetArithmeticFun(int primitive_type, int activation_type) {
for (size_t i = 0; i < sizeof(arithmetic_fun_table_fp16); i++) {
size_t length = sizeof(arithmetic_fun_table_fp16) / sizeof(ARITHMETIC_FUNC_INFO_FP16);
for (size_t i = 0; i < length; i++) {
if (arithmetic_fun_table_fp16[i].primitive_type_ == primitive_type &&
arithmetic_fun_table_fp16[i].activation_type_ == activation_type) {
return arithmetic_fun_table_fp16[i].func_;
@@ -82,7 +83,8 @@ ArithmeticFuncFp16 GetArithmeticFun(int primitive_type, int activation_type) {
}

ArithmeticOptFuncFp16 GetOptimizedArithmeticFun(int primitive_type, int activation_type) {
for (size_t i = 0; i < sizeof(arithmetic_fun_table_fp16); i++) {
size_t length = sizeof(arithmetic_fun_table_fp16) / sizeof(ARITHMETIC_FUNC_INFO_FP16);
for (size_t i = 0; i < length; i++) {
if (arithmetic_fun_table_fp16[i].primitive_type_ == primitive_type &&
arithmetic_fun_table_fp16[i].activation_type_ == activation_type) {
return arithmetic_fun_table_fp16[i].opt_func_;


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc View File

@@ -71,7 +71,7 @@ int ConcatFp16CPUKernel::MallocTmpBuffer() {
void ConcatFp16CPUKernel::FreeTmpBuffer() {
for (size_t i = 0; i < fp16_inputs_.size(); i++) {
auto &in_tensor = in_tensors_.at(i);
auto in_ptr = fp16_inputs_.at(i);
auto &in_ptr = fp16_inputs_.at(i);
if (in_tensor->data_type() == kNumberTypeFloat32 || in_tensor->data_type() == kNumberTypeFloat) {
if (in_ptr != nullptr) {
context_->allocator->Free(in_ptr);


+ 2
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc View File

@@ -33,7 +33,7 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D;
namespace mindspore::kernel {
ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() {
if (packed_weight_ != nullptr) {
delete packed_weight_;
free(packed_weight_);
packed_weight_ = nullptr;
}
}
@@ -68,6 +68,7 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData());
MS_ASSERT(ori_bias);
for (int i = 0; i < bias_tensor->ElementsNum(); i++) {
bias_fp16[i] = (float16_t)ori_bias[i];
}


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc View File

@@ -55,6 +55,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitBuffer() {
packed_output_ = reinterpret_cast<float16_t *>(context_->allocator->Malloc(pack_output_size * sizeof(float16_t)));
if (packed_output_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
context_->allocator->Free(packed_input_);
return RET_ERROR;
}
}
@@ -86,6 +87,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
if (in_tensors_.size() == kInputSize2) {
auto bias_tensor = in_tensors_.at(kBiasIndex);
auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData());
MS_ASSERT(ori_bias);
for (int i = 0; i < bias_tensor->ElementsNum(); i++) {
bias_fp16[i] = (float16_t)ori_bias[i];
}
@@ -161,8 +163,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);
} else {
packed_input_ = execute_input_;
}
if (!need_align_) {
packed_output_ = execute_output_;
}



+ 1
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc View File

@@ -342,6 +342,7 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() {
auto fp16_bias_data = reinterpret_cast<float16_t *>(bias_data_);
if (in_tensors_.size() == kInputSize2) {
auto src_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->MutableData());
MS_ASSERT(src_bias);
for (int i = 0; i < conv_param_->output_channel_; ++i) {
fp16_bias_data[i] = (float16_t)src_bias[i];
}


+ 4
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc View File

@@ -122,6 +122,10 @@ int FullconnectionFP16CPUKernel::ReSize() {
if (out_tensors_[0]->data_type() == kNumberTypeFloat32) {
output_fp16_ =
reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(fc_param_->row_ * fc_param_->col_ * sizeof(float16_t)));
if (output_fp16_ == nullptr) {
FreeTmpBuffer();
return RET_MEMORY_FAILED;
}
}
return RET_OK;
} // namespace mindspore::kernel


+ 4
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc View File

@@ -27,8 +27,10 @@ using mindspore::schema::PrimitiveType_FusedBatchNorm;
namespace mindspore::kernel {
int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
MS_ASSERT(param);
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
MS_ASSERT(in_tensors_.size() == 5);
MS_ASSERT(out_tensors_.size() == 1);
auto input = in_tensors_.at(0);
auto scale = in_tensors_.at(1);
auto offset = in_tensors_.at(2);
@@ -50,6 +52,7 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
context_->allocator->Free(mean_fp16);
context_->allocator->Free(variance_fp16);
context_->allocator->Free(output_fp16);
return RET_ERROR;
}
Float32ToFloat16(reinterpret_cast<float *>(input->MutableData()), reinterpret_cast<float16_t *>(input_fp16),
input->ElementsNum());


+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc View File

@@ -155,6 +155,8 @@ int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) {
if (in_tensors_.front()->data_type() == kNumberTypeFloat16) {
float16_t *src_ptr = reinterpret_cast<float16_t *>(ori_in_data_) + group_id * sub_in_channel;
float16_t *dst_ptr = reinterpret_cast<float16_t *>(sub_in_data);
MS_ASSERT(src_ptr);
MS_ASSERT(dst_ptr);
for (int i = 0; i < in_plane; ++i) {
memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float16_t));
src_ptr += ori_in_channel;
@@ -163,6 +165,8 @@ int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) {
} else {
float *src_ptr = reinterpret_cast<float *>(ori_in_data_) + group_id * sub_in_channel;
float *dst_ptr = reinterpret_cast<float *>(sub_in_data);
MS_ASSERT(src_ptr);
MS_ASSERT(dst_ptr);
for (int i = 0; i < in_plane; ++i) {
memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(float));
src_ptr += ori_in_channel;
@@ -180,6 +184,7 @@ void GroupConvolutionFP16CPUKernel::PostConcat(int group_id) {
int sub_out_channel = conv_param_->output_channel_;
int ori_out_channel = sub_out_channel * group_num_;
auto sub_out_data = reinterpret_cast<float16_t *>(group_convs_[group_id]->out_tensors().front()->data_c());
MS_ASSERT(sub_out_data);
float16_t *src_ptr = sub_out_data;
float16_t *dst_ptr = ori_out_data_ + group_id * sub_out_channel;
for (int i = 0; i < out_plane; ++i) {
@@ -192,6 +197,7 @@ void GroupConvolutionFP16CPUKernel::PostConcat(int group_id) {
int GroupConvolutionFP16CPUKernel::Run() {
ori_in_data_ = in_tensors().front()->data_c();
ori_out_data_ = reinterpret_cast<float16_t *>(out_tensors().front()->data_c());
MS_ASSERT(ori_out_data_);
for (int i = 0; i < group_num_; ++i) {
// first, separate group conv input into several parts. This step must be in runtime stage.
auto ret = SeparateInput(i);


+ 1
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc View File

@@ -339,6 +339,7 @@ kernel::LiteKernel *CpuMatmulFp16KernelCreator(const std::vector<lite::Tensor *>
auto *dequant_weight = kernel::DequantUtil::DequantWeight(weight_tensor);
if (dequant_weight == nullptr) {
MS_LOG(ERROR) << "dequant data is nullptr.";
free(opParameter);
return nullptr;
}
weight_tensor->set_data_type(kNumberTypeFloat32);


+ 2
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc View File

@@ -87,6 +87,8 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) {
auto quant_arg = !out_tensors_.front()->GetQuantParams().empty() ? out_tensors_.front()->GetQuantParams().front()
: in_tensors_.front()->GetQuantParams().front();
int ret;
MS_ASSERT(int8_ptr_);
MS_ASSERT(float16_ptr_);
if (inverse_) {
ret = DoDequantizeInt8ToFp16(int8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale,
quant_arg.zeroPoint, num_unit_thread);


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc View File

@@ -118,7 +118,7 @@ int ReduceFp16CPUKernel::Run() {
}

void ReduceFp16CPUKernel::FreeTmpBuffer() {
for (auto buffer : data_buffers_) {
for (auto &buffer : data_buffers_) {
if (buffer != nullptr) {
context_->allocator->Free(buffer);
buffer = nullptr;


Loading…
Cancel
Save