Browse Source

!4780 [MS][LITE][Develop]move malloc to run

Merge pull request !4780 from sunsuodong/move_malloc_to_run
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
8a33f61d51
15 changed files with 142 additions and 276 deletions
  1. +13
    -30
      mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
  2. +2
    -7
      mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
  3. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc
  4. +18
    -25
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
  5. +1
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
  6. +11
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
  7. +19
    -36
      mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc
  8. +9
    -11
      mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc
  9. +16
    -46
      mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
  10. +1
    -2
      mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
  11. +12
    -44
      mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc
  12. +18
    -28
      mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
  13. +1
    -2
      mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
  14. +13
    -32
      mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
  15. +8
    -6
      mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc

+ 13
- 30
mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc View File

@@ -61,19 +61,6 @@ int ArgMinMaxBaseCPUKernel::ReSize() {
return RET_PARAM_INVALID;
}
param->topk_ = MSMIN(param->topk_, in_shape[axis]);
if (param->topk_ > 1 || param->keep_dims_) {
if (context_ != nullptr && context_->allocator != nullptr) {
param->arg_elements_ =
reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis]));
data_from_allocator_ = true;
} else {
param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis]));
}
if (param->arg_elements_ == nullptr) {
MS_LOG(ERROR) << "malloc memroy fail!";
return RET_ERROR;
}
}
ComputeStrides(in_shape.data(), param->in_strides_, in_shape.size());
auto out_shape = out_tensors_.at(0)->shape();
ComputeStrides(out_shape.data(), param->out_strides_, out_shape.size());
@@ -81,28 +68,24 @@ int ArgMinMaxBaseCPUKernel::ReSize() {
}

int ArgMinMaxBaseCPUKernel::Run() {
auto input = in_tensors_.at(0);

auto input_data = reinterpret_cast<const void *>(in_tensors_.at(0)->Data());
auto input_data = in_tensors_.at(0)->Data();
auto output_data = out_tensors_.at(0)->Data();

auto shape = input->shape().data();
auto shape = in_tensors_.at(0)->shape().data();
auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
ArgMinMax(input_data, output_data, reinterpret_cast<const int *>(shape), param);
return RET_OK;
}

void ArgMinMaxBaseCPUKernel::FreeTmpMemory() {
auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
if (param->arg_elements_ == nullptr) {
return;
}
if (data_from_allocator_) {
context_->allocator->Free(param->arg_elements_);
} else {
free(param->arg_elements_);
MS_ASSERT(context_->allocator != nullptr);
if (param->topk_ > 1 || param->keep_dims_) {
param->arg_elements_ =
reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * shape[param->axis_]));
if (param->arg_elements_ == nullptr) {
MS_LOG(ERROR) << "malloc memroy fail!";
return RET_ERROR;
}
}
ArgMinMax(input_data, output_data, reinterpret_cast<const int *>(shape), param);
context_->allocator->Free(param->arg_elements_);
param->arg_elements_ = nullptr;
return RET_OK;
}

kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,


+ 2
- 7
mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h View File

@@ -26,20 +26,15 @@ class ArgMinMaxBaseCPUKernel : public LiteKernel {
ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), data_from_allocator_(false) {}
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}

virtual ~ArgMinMaxBaseCPUKernel() { FreeTmpMemory(); }
virtual ~ArgMinMaxBaseCPUKernel() = default;

int Init() override;

int ReSize() override;

int Run() override;

void FreeTmpMemory();

private:
bool data_from_allocator_;
};
} // namespace mindspore::kernel



+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc View File

@@ -43,7 +43,6 @@ int ArgMinMaxCPUKernel::Init() {
}

int ArgMinMaxCPUKernel::ReSize() {
ArgMinMaxBaseCPUKernel::FreeTmpMemory();
return ArgMinMaxBaseCPUKernel::ReSize();
}



+ 18
- 25
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc View File

@@ -29,19 +29,6 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Eltwise;

namespace mindspore::kernel {
void ArithmeticCPUKernel::FreeTileData() {
if (tile_data0_ != nullptr) {
delete[](tile_data0_);
tile_data0_ = nullptr;
}
if (tile_data1_ != nullptr) {
delete[](tile_data1_);
tile_data1_ = nullptr;
}
}

ArithmeticCPUKernel::~ArithmeticCPUKernel() { FreeTileData(); }

int ArithmeticCPUKernel::Init() {
if (!InferShapeDone()) {
return RET_OK;
@@ -50,7 +37,6 @@ int ArithmeticCPUKernel::Init() {
}

int ArithmeticCPUKernel::ReSize() {
FreeTileData();
arithmeticParameter_->in_elements_num0_ = in_tensors_[0]->ElementsNum();
arithmeticParameter_->in_elements_num1_ = in_tensors_[1]->ElementsNum();
arithmeticParameter_->out_elements_num_ = out_tensors_[0]->ElementsNum();
@@ -75,12 +61,6 @@ int ArithmeticCPUKernel::ReSize() {
}
}
}

if (arithmeticParameter_->broadcasting_) {
tile_data0_ = new float[arithmeticParameter_->out_elements_num_];
tile_data1_ = new float[arithmeticParameter_->out_elements_num_];
}

return RET_OK;
}

@@ -144,14 +124,27 @@ int ArithmeticCPUKernel::Run() {
if (arithmeticParameter_->broadcasting_) {
auto input_data0 = reinterpret_cast<float *>(in_tensors_[0]->Data());
auto input_data1 = reinterpret_cast<float *>(in_tensors_[1]->Data());
auto length = arithmeticParameter_->out_elements_num_ * sizeof(float);
MS_ASSERT(context_->allocator != nullptr);
tile_data0_ = reinterpret_cast<float *>(context_->allocator->Malloc(length));
tile_data1_ = reinterpret_cast<float *>(context_->allocator->Malloc(length));
if (tile_data0_ == nullptr || tile_data1_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(tile_data0_);
context_->allocator->Free(tile_data1_);
return RET_ERROR;
}
TileDimensions(input_data0, input_data1, tile_data0_, tile_data1_, arithmeticParameter_);
}
int error_code = LiteBackendParallelLaunch(ArithmeticsRun, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Arithmetic function error error_code[" << error_code << "]";
return RET_ERROR;
ret = LiteBackendParallelLaunch(ArithmeticsRun, this, thread_count_);
if (arithmeticParameter_->broadcasting_) {
context_->allocator->Free(tile_data0_);
context_->allocator->Free(tile_data1_);
}
return RET_OK;
if (ret != RET_OK) {
MS_LOG(ERROR) << "Arithmetic function error error_code[" << ret << "]";
}
return ret;
}

kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,


+ 1
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h View File

@@ -166,7 +166,7 @@ class ArithmeticCPUKernel : public LiteKernel {
break;
}
}
~ArithmeticCPUKernel() override;
~ArithmeticCPUKernel() = default;

int Init() override;
int ReSize() override;
@@ -174,7 +174,6 @@ class ArithmeticCPUKernel : public LiteKernel {
int DoArithmetic(int task_id);

private:
void FreeTileData();
int thread_count_;
float *tile_data0_ = nullptr;
float *tile_data1_ = nullptr;


+ 11
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc View File

@@ -51,11 +51,18 @@ int BiasCPUKernel::Run() {
auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->Data());
auto out = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
size_t data_size = in_tensors_.at(0)->ElementsNum();
auto tile_in = new float[data_size];
auto tile_bias = new float[data_size];
MS_ASSERT(context_->allocator != nullptr);
float *tile_in = reinterpret_cast<float *>(context_->allocator->Malloc(data_size * sizeof(float)));
float *tile_bias = reinterpret_cast<float *>(context_->allocator->Malloc(data_size * sizeof(float)));
if (tile_in == nullptr || tile_bias == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(tile_in);
context_->allocator->Free(tile_bias);
return RET_ERROR;
}
BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, bias_param_);
delete[] tile_in;
delete[] tile_bias;
context_->allocator->Free(tile_in);
context_->allocator->Free(tile_bias);
return RET_OK;
}



+ 19
- 36
mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup.cc View File

@@ -49,40 +49,6 @@ int EmbeddingLookupCPUKernel::ReSize() {
embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
}

if (input_addr_ != nullptr) {
free(input_addr_);
}
if (context_ != nullptr && context_->allocator != nullptr) {
input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
} else {
input_addr_ = reinterpret_cast<float *>(
malloc(sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
}
if (input_addr_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed";
return RET_ERROR;
}

if (embedding_lookup_parameter_->is_regulated_ != nullptr) {
free(embedding_lookup_parameter_->is_regulated_);
}
if (context_ != nullptr && context_->allocator != nullptr) {
embedding_lookup_parameter_->is_regulated_ =
reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
} else {
embedding_lookup_parameter_->is_regulated_ =
reinterpret_cast<bool *>(malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
}
if (embedding_lookup_parameter_->is_regulated_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed";
return RET_ERROR;
}

for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
}

return RET_OK;
}

@@ -111,6 +77,22 @@ int EmbeddingLookupCPUKernel::Run() {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}

MS_ASSERT(context_->allocator != nullptr);
input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
embedding_lookup_parameter_->is_regulated_ =
reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(input_addr_);
context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
return RET_ERROR;
}
for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
}

int dest_loc = 0;
for (int i = 0; i < in_tensors_.size() - 1; i++) {
auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->Data());
@@ -121,11 +103,12 @@ int EmbeddingLookupCPUKernel::Run() {
ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->Data());

auto ret = LiteBackendParallelLaunch(EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
context_->allocator->Free(input_addr_);
context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
return RET_ERROR;
}
return RET_OK;
return ret;
}

kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,


+ 9
- 11
mindspore/lite/src/runtime/kernel/arm/fp32/topk.cc View File

@@ -34,23 +34,13 @@ int TopKCPUKernel::Init() {
}

int TopKCPUKernel::ReSize() {
TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
lite::tensor::Tensor *input = in_tensors_.at(0);
TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
parameter->last_dim_size_ = input->shape()[input->shape().size() - 1];
parameter->loop_num_ = 1;
for (int i = 0; i < input->shape().size() - 1; ++i) {
parameter->loop_num_ *= input->shape()[i];
}

if (parameter->topk_node_list_ != nullptr) {
free(parameter->topk_node_list_);
parameter->topk_node_list_ = nullptr;
}
parameter->topk_node_list_ = malloc(sizeof(TopkNode) * parameter->last_dim_size_);
if (parameter->topk_node_list_ == nullptr) {
MS_LOG(ERROR) << "malloc fail.";
return RET_ERROR;
}
return RET_OK;
}

@@ -64,7 +54,15 @@ int TopKCPUKernel::Run() {
auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->Data());
auto output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data());

MS_ASSERT(context_->allocator != nullptr);
TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
parameter->topk_node_list_ = context_->allocator->Malloc(sizeof(TopkNode) * parameter->last_dim_size_);
if (parameter->topk_node_list_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
return RET_ERROR;
}
Topk(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_));
context_->allocator->Free(parameter->topk_node_list_);
return RET_OK;
}



+ 16
- 46
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc View File

@@ -47,32 +47,6 @@ int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata
}
} // namespace

void ArithmeticInt8CPUKernel::FreeTileData() {
auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
if (!param->broadcasting_) {
return;
}
if (context_->allocator != nullptr) {
if (tile_data0_ != nullptr) {
context_->allocator->Free(tile_data0_);
}
if (tile_data1_ != nullptr) {
context_->allocator->Free(tile_data1_);
}
} else {
if (tile_data0_ != nullptr) {
free(tile_data0_);
}
if (tile_data1_ != nullptr) {
free(tile_data1_);
}
}
tile_data0_ = nullptr;
tile_data1_ = nullptr;
}

ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() { FreeTileData(); }

int ArithmeticInt8CPUKernel::Init() {
switch (op_parameter_->type_) {
case PrimitiveType_Equal:
@@ -121,21 +95,6 @@ int ArithmeticInt8CPUKernel::Init() {
}

int ArithmeticInt8CPUKernel::ReSize() {
FreeTileData();
auto data_size = out_tensors_[0]->Size();
auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_);
if (param->broadcasting_) {
if (context_->allocator != nullptr) {
tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
} else {
tile_data0_ = reinterpret_cast<int8_t *>(malloc(data_size));
tile_data1_ = reinterpret_cast<int8_t *>(malloc(data_size));
}
} else {
tile_data0_ = nullptr;
tile_data1_ = nullptr;
}
return RET_OK;
}

@@ -182,14 +141,25 @@ int ArithmeticInt8CPUKernel::Run() {
if (param->broadcasting_) {
auto input_data0 = reinterpret_cast<int8_t *>(in_tensors_[0]->Data());
auto input_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->Data());
tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(out_tensors_[0]->Size()));
tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(out_tensors_[0]->Size()));
if (tile_data0_ == nullptr || tile_data1_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(tile_data0_);
context_->allocator->Free(tile_data1_);
return RET_ERROR;
}
TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param);
}
int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, op_parameter_->thread_num_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code;
return RET_ERROR;
ret = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, op_parameter_->thread_num_);
if (param->broadcasting_) {
context_->allocator->Free(tile_data0_);
context_->allocator->Free(tile_data1_);
}
return RET_OK;
if (ret != RET_OK) {
MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << ret;
}
return ret;
}

kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,


+ 1
- 2
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h View File

@@ -32,7 +32,7 @@ class ArithmeticInt8CPUKernel : public LiteKernel {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ArithmeticInt8CPUKernel();
~ArithmeticInt8CPUKernel() = default;

int Init() override;
int ReSize() override;
@@ -40,7 +40,6 @@ class ArithmeticInt8CPUKernel : public LiteKernel {
int DoArithmetic(int thread_id);

private:
void FreeTileData();
int8_t *tile_data0_;
int8_t *tile_data1_;
ArithmeticRunInt8 arithmetic_run_;


+ 12
- 44
mindspore/lite/src/runtime/kernel/arm/int8/div_int8.cc View File

@@ -59,49 +59,6 @@ int DivInt8CPUKernel::Init() {
}

int DivInt8CPUKernel::ReSize() {
if (broadcast_) {
if (tile0_data_ != nullptr) {
if (context_ != nullptr && context_->allocator != nullptr) {
context_->allocator->Free(tile0_data_);
} else {
free(tile0_data_);
}
}
if (tile1_data_ != nullptr) {
if (context_ != nullptr && context_->allocator != nullptr) {
context_->allocator->Free(tile1_data_);
} else {
free(tile1_data_);
}
}

if (context_ != nullptr && context_->allocator != nullptr) {
tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
} else {
tile0_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
tile1_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
}

if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
if (tile0_data_ != nullptr) {
if (context_ != nullptr && context_->allocator != nullptr) {
context_->allocator->Free(tile0_data_);
} else {
free(tile0_data_);
}
}
if (tile1_data_ != nullptr) {
if (context_ != nullptr && context_->allocator != nullptr) {
context_->allocator->Free(tile1_data_);
} else {
free(tile1_data_);
}
}
MS_LOG(ERROR) << "malloc memroy fail!";
return RET_ERROR;
}
}
return RET_OK;
}

@@ -154,12 +111,23 @@ int DivInt8CPUKernel::Run() {
tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i);
tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i);
}
tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(tile0_data_);
context_->allocator->Free(tile1_data_);
return RET_ERROR;
}
TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()),
static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_),
reinterpret_cast<uint8_t *>(tile1_data_), &tile_para);
}
ret = LiteBackendParallelLaunch(DivInt8Run, this, op_parameter_->thread_num_);

if (broadcast_) {
context_->allocator->Free(tile0_data_);
context_->allocator->Free(tile1_data_);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]";
}


+ 18
- 28
mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc View File

@@ -62,30 +62,8 @@ int SoftmaxInt8CPUKernel::Init() {
return ReSize();
}

void SoftmaxInt8CPUKernel::FreeTmpBuffer() {
if (exp_data_ != nullptr) {
free(exp_data_);
exp_data_ = nullptr;
}
if (sum_data_ != nullptr) {
free(sum_data_);
sum_data_ = nullptr;
}
}

int SoftmaxInt8CPUKernel::ReSize() {
auto ret = SoftmaxBaseCPUKernel::ReSize();
if (ret != RET_OK) {
return ret;
}
FreeTmpBuffer();
exp_data_ = reinterpret_cast<int *>(malloc(softmax_param_->element_size_ * sizeof(int)));
int inner_size = 1;
for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) {
inner_size *= softmax_param_->input_shape_[i];
}
sum_data_ = reinterpret_cast<int *>(malloc(inner_size * sizeof(int)));
return RET_OK;
return SoftmaxBaseCPUKernel::ReSize();
}

int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) {
@@ -132,12 +110,24 @@ int SoftmaxInt8CPUKernel::Run() {
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
return RET_ERROR;
}

int error_code = LiteBackendParallelLaunch(SoftmaxRun, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Softmax function error error_code[" << error_code << "]";
exp_data_ = reinterpret_cast<int *>(context_->allocator->Malloc(softmax_param_->element_size_ * sizeof(int)));
int inner_size = 1;
for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) {
inner_size *= softmax_param_->input_shape_[i];
}
sum_data_ = reinterpret_cast<int *>(context_->allocator->Malloc(inner_size * sizeof(int)));
if (exp_data_ == nullptr || sum_data_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(exp_data_);
context_->allocator->Free(sum_data_);
return RET_ERROR;
}
return RET_OK;
ret = LiteBackendParallelLaunch(SoftmaxRun, this, thread_count_);
context_->allocator->Free(exp_data_);
context_->allocator->Free(sum_data_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Softmax function error error_code[" << ret << "]";
}
return ret;
}
} // namespace mindspore::kernel

+ 1
- 2
mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h View File

@@ -28,7 +28,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive)
: SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~SoftmaxInt8CPUKernel() { FreeTmpBuffer(); }
~SoftmaxInt8CPUKernel() {}

int Init() override;
int ReSize() override;
@@ -36,7 +36,6 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
int DoSoftmax(int task_id);

private:
void FreeTmpBuffer();
int *sum_data_ = nullptr;
int *exp_data_ = nullptr;
SoftmaxQuantArg quant_params_;


+ 13
- 32
mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc View File

@@ -81,35 +81,6 @@ int SubInt8CPUKernel::Init() {
}

int SubInt8CPUKernel::ReSize() {
if (broadcast_) {
if (tile0_data_ != nullptr) {
if (context_ != nullptr && context_->allocator != nullptr) {
context_->allocator->Free(tile0_data_);
} else {
free(tile0_data_);
}
}
if (tile1_data_ != nullptr) {
if (context_ != nullptr && context_->allocator != nullptr) {
context_->allocator->Free(tile1_data_);
} else {
free(tile1_data_);
}
}

if (context_ != nullptr && context_->allocator != nullptr) {
tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
} else {
tile0_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
tile1_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
}

if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
MS_LOG(ERROR) << "malloc memroy fail!";
return RET_ERROR;
}
}
return RET_OK;
}

@@ -164,17 +135,27 @@ int SubInt8CPUKernel::Run() {
tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i);
tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i);
}
tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
MS_LOG(ERROR) << "malloc memroy fail!";
context_->allocator->Free(tile0_data_);
context_->allocator->Free(tile1_data_);
return RET_ERROR;
}
TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()),
static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_),
reinterpret_cast<uint8_t *>(tile1_data_), &tile_para);
}
ret = LiteBackendParallelLaunch(SubInt8Run, this, op_parameter_->thread_num_);

if (broadcast_) {
context_->allocator->Free(tile0_data_);
context_->allocator->Free(tile1_data_);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "SubInt8Run function error error_code[" << ret << "]";
return RET_ERROR;
}
return RET_OK;
return ret;
}

kernel::LiteKernel *CpuSubInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,


+ 8
- 6
mindspore/lite/src/runtime/kernel/arm/int8/topk_int8.cc View File

@@ -43,12 +43,6 @@ int TopKInt8CPUKernel::ReSize() {
for (int i = 0; i < input->shape().size() - 1; ++i) {
parameter->loop_num_ *= input->shape()[i];
}

parameter->topk_node_list_ = malloc(sizeof(TopkNodeInt8) * parameter->last_dim_size_);
if (parameter->topk_node_list_ == nullptr) {
MS_LOG(ERROR) << "malloc fail.";
return RET_ERROR;
}
return RET_OK;
}

@@ -62,7 +56,15 @@ int TopKInt8CPUKernel::Run() {
int8_t *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data());
int32_t *output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data());

MS_ASSERT(context_->allocator != nullptr);
TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_);
parameter->topk_node_list_ = context_->allocator->Malloc(sizeof(TopkNodeInt8) * parameter->last_dim_size_);
if (parameter->topk_node_list_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
return RET_ERROR;
}
TopkInt8(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_));
context_->allocator->Free(parameter->topk_node_list_);
return RET_OK;
}



Loading…
Cancel
Save