!7066 [MS][LITE][DEV]fix fp16 op bug

Merge pull request !7066 from 张学同/to_merge
5 years ago · f0e9f6137b
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
@@ -92,7 +92,7 @@ int ActivationFp16CPUKernel::DoActivation(int task_id) {
  return error_code;
 }
 int ActivationRun(void *cdata, int task_id) {
 int ActivationFp16Run(void *cdata, int task_id) {
  auto activation_kernel = reinterpret_cast<ActivationFp16CPUKernel *>(cdata);
  auto error_code = activation_kernel->DoActivation(task_id);
  if (error_code != RET_OK) {
@@ -115,7 +115,7 @@ int ActivationFp16CPUKernel::Run() {
    return ret;
  }
  int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_);
  int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationFp16Run, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
    FreeTmpBuffer();
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
@@ -30,7 +30,7 @@ using mindspore::schema::PrimitiveType_Cast;
 namespace mindspore::kernel {
 namespace {
 int CastRun(void *cdata, int task_id) {
 int CastFp16Run(void *cdata, int task_id) {
  if (cdata == nullptr) {
    MS_LOG(ERROR) << "input cdata is nullptr!";
    return RET_ERROR;
@@ -91,7 +91,7 @@ int CastFp16CPUKernel::Run() {
  if (data_num_ == 0) {
    return RET_OK;
  }
  return ParallelLaunch(this->context_->thread_pool_, CastRun, this, op_parameter_->thread_num_);
  return ParallelLaunch(this->context_->thread_pool_, CastFp16Run, this, op_parameter_->thread_num_);
 }
 kernel::LiteKernel *CpuCastFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
@@ -50,7 +50,7 @@ int CropFp16CPUKernel::DoExecute(int task_id) {
  return RET_OK;
 }
 static int CropRun(void *cdata, int task_id) {
 static int CropFp16Run(void *cdata, int task_id) {
  auto g_kernel = reinterpret_cast<CropFp16CPUKernel *>(cdata);
  auto ret = g_kernel->DoExecute(task_id);
  if (ret != RET_OK) {
@@ -79,7 +79,7 @@ int CropFp16CPUKernel::Run() {
    return RET_ERROR;
  }
  ret = ParallelLaunch(this->context_->thread_pool_, CropRun, this, thread_count_);
  ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, thread_count_);
  if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) {
    Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()),
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
@@ -65,7 +65,7 @@ int ReduceFp16CPUKernel::CallReduceUnit(int task_id) {
  return ret;
 }
 static int ReduceImpl(void *cdata, int task_id) {
 static int ReduceFp16Impl(void *cdata, int task_id) {
  auto reduce = reinterpret_cast<ReduceFp16CPUKernel *>(cdata);
  auto error_code = reduce->CallReduceUnit(task_id);
  if (error_code != RET_OK) {
@@ -102,7 +102,7 @@ int ReduceFp16CPUKernel::Run() {
    outer_size_ = outer_sizes_[i];
    inner_size_ = inner_sizes_[i];
    axis_size_ = axis_sizes_[i];
    auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceImpl, this, context_->thread_num_);
    auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceFp16Impl, this, context_->thread_num_);
    if (error_code != RET_OK) {
      FreeTmpBuffer();
      MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
@@ -92,7 +92,7 @@ int ScaleFp16CPUKernel::Scale(int task_id) {
  return RET_OK;
 }
 int ScaleRun(void *cdata, int task_id) {
 int ScaleFp16Run(void *cdata, int task_id) {
  auto scale = reinterpret_cast<ScaleFp16CPUKernel *>(cdata);
  auto ret = scale->Scale(task_id);
  if (ret != RET_OK) {
@@ -121,7 +121,7 @@ int ScaleFp16CPUKernel::Run() {
    return ret;
  }
  ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_);
  ret = ParallelLaunch(this->context_->thread_pool_, ScaleFp16Run, this, op_parameter_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
    return RET_ERROR;
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc
@@ -65,7 +65,7 @@ int SplitFp16CPUKernel::Split(int task_id) {
  return RET_OK;
 }
 static int SplitRun(void *cdata, int task_id) {
 static int SplitFp16Run(void *cdata, int task_id) {
  auto g_kernel = reinterpret_cast<SplitFp16CPUKernel *>(cdata);
  auto ret = g_kernel->Split(task_id);
  if (ret != RET_OK) {
@@ -94,7 +94,7 @@ int SplitFp16CPUKernel::Run() {
      return RET_ERROR;
    }
  }
  ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_);
  ret = ParallelLaunch(this->context_->thread_pool_, SplitFp16Run, this, thread_n_num_);
  for (int i = 0; i < param->num_split_; i++) {
    if (out_tensors_.at(i)->data_type() == kNumberTypeFloat32) {
      Float16ToFloat32(output_ptr_[i], reinterpret_cast<float *>(out_tensors_.at(i)->MutableData()),
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/transpose_fp16.cc
@@ -117,7 +117,7 @@ int TransposeFp16CPUKernel::TransposeParallel(int task_id) {
  return RET_OK;
 }
 static int TransposeRun(void *cdata, int task_id) {
 static int TransposeFp16Run(void *cdata, int task_id) {
  auto g_kernel = reinterpret_cast<TransposeFp16CPUKernel *>(cdata);
  auto ret = g_kernel->TransposeParallel(task_id);
  if (ret != RET_OK) {
@@ -162,7 +162,7 @@ int TransposeFp16CPUKernel::Run() {
  in_shape_ = const_cast<int *>(in_tensor->shape().data());
  out_shape_ = const_cast<int *>(out_tensor->shape().data());
  ret = ParallelLaunch(this->context_->thread_pool_, TransposeRun, this, thread_h_num_);
  ret = ParallelLaunch(this->context_->thread_pool_, TransposeFp16Run, this, thread_h_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]";
    FreeFp16Buffer();