compare,depth_to_space,batch_to_space support int8,append argmin,argmax,batch_to_space test

5 years ago · 32960a58d1
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
@@ -90,6 +90,7 @@ class ArithmeticCPUKernel : public LiteKernel {
      case PrimitiveType_FloorMod:
        arithmetic_run_ = ElementFloorMod;
        arithmetic_broadcast_run_ = BroadcastFloorMod;
        break;
      case PrimitiveType_Equal:
        arithmetic_run_ = ElementEqual;
        arithmetic_broadcast_run_ = BroadcastEqual;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc
@@ -17,7 +17,7 @@
 #include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/arm/opclib/arg_min_max.h"
 #include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h"
 #include "include/errorcode.h"

 using mindspore::lite::RET_OK;
@@ -31,12 +31,45 @@ int ArgMinMaxInt8CPUKernel::Init() {
  }
  auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter);
  param->data_type_ = kNumberTypeInt8;
  auto *input_tensor = inputs_.at(kInputIndex);
  auto in_quant_args = input_tensor->GetQuantParams();
  in_quant_arg_.scale_ = in_quant_args.front().scale;
  in_quant_arg_.zp_ = in_quant_args.front().zeroPoint;

  auto *out_tensor = outputs_.at(kOutputIndex);
  auto out_quant_args = out_tensor->GetQuantParams();
  out_quant_arg_.scale_ = out_quant_args.front().scale;
  out_quant_arg_.zp_ = out_quant_args.front().zeroPoint;
  return RET_OK;
 }

 int ArgMinMaxInt8CPUKernel::Run() {
  auto ret = ArgMinMaxBaseCPUKernel::Run();
  FreeTmpMemory();
  return ret;
  auto input = inputs_.at(0);

  const int8_t *input_data = reinterpret_cast<const int8_t *>(inputs_.at(0)->Data());
  int8_t *output_data = reinterpret_cast<int8_t *>(outputs_.at(0)->Data());

  auto in_shape = input->shape().data();
  auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter);
  if (param->topk_ == 1) {
    ArgMinMaxQuant(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
    return RET_OK;
  }

  switch (param->axis_) {
  case 0:
    ArgMinMaxDim0(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
    break;
  case 1:
    ArgMinMaxDim1(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
    break;
  case 2:
    ArgMinMaxDim2(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
    break;
  case 3:
    ArgMinMaxDim3(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
    break;
  }
  return RET_OK;
 }
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.h
@@ -18,6 +18,7 @@

 #include <vector>
 #include "src/runtime/kernel/arm/base/arg_min_max_base.h"
 #include "src/runtime/kernel/arm/opclib/quantization/quantize.h"

 namespace mindspore::kernel {
 class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel {
@@ -31,6 +32,9 @@ class ArgMinMaxInt8CPUKernel : public ArgMinMaxBaseCPUKernel {
  int Init() override;
  int ReSize() override { return 0; }
  int Run() override;
 private:
  QuantArg in_quant_arg_;
  QuantArg out_quant_arg_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
@@ -0,0 +1,194 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "src/runtime/kernel/arm/int8/arithmetic_int8.h"
 #include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h"
 #include "src/runtime/kernel/arm/opclib/arithmetic_common.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/runtime_api.h"
 #include "include/errorcode.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_PARAM_INVALID;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;

 using mindspore::schema::PrimitiveType_Equal;
 using mindspore::schema::PrimitiveType_NotEqual;
 using mindspore::schema::PrimitiveType_LessEqual;
 using mindspore::schema::PrimitiveType_Greater;
 using mindspore::schema::PrimitiveType_GreaterEqual;
 using mindspore::schema::PrimitiveType_Less;

 namespace mindspore::kernel {
 namespace {
 int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
  auto arithmetic_kernel = reinterpret_cast<ArithmeticInt8CPUKernel *>(cdata);
  auto error_code = arithmetic_kernel->DoArithmetic(thread_id);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "ArithmeticsRun error thread_id[" << thread_id << "] error_code[" << error_code << "]";
    return RET_ERROR;
  }
  return RET_OK;
 }
 }  // namespace

 ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() {
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (!param->broadcasting_) {
    return;
  }
  if (context_->allocator != nullptr) {
    if (tile_data0_ != nullptr) {
      context_->allocator->Free(tile_data0_);
    }
    if (tile_data1_ != nullptr) {
      context_->allocator->Free(tile_data1_);
    }
  } else {
    if (tile_data0_ != nullptr) {
      free(tile_data0_);
    }
    if (tile_data1_ != nullptr) {
      free(tile_data1_);
    }
  }
  tile_data0_ = nullptr;
  tile_data1_ = nullptr;
 }

 int ArithmeticInt8CPUKernel::Init() {
  switch (opParameter->type_) {
    case PrimitiveType_Equal:
      arithmetic_run_ = ElementEqual;
      break;
    case PrimitiveType_NotEqual:
      arithmetic_run_ = ElementNotEqual;
      break;
    case PrimitiveType_Less:
      arithmetic_run_ = ElementEqual;
      break;
    case PrimitiveType_LessEqual:
      arithmetic_run_ = ElementNotEqual;
      break;
    case PrimitiveType_Greater:
      arithmetic_run_ = ElementGreater;
      break;
    case PrimitiveType_GreaterEqual:
      arithmetic_run_ = ElementGreaterEqual;
      break;
    default:
      MS_LOG(ERROR) << "Error Operator type " << opParameter->type_;
      arithmetic_run_ = nullptr;
      return RET_PARAM_INVALID;
  }
  auto data_size = outputs_[0]->Size();
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (param->broadcasting_) {
    if (context_->allocator != nullptr) {
      tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
      tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size));
    } else {
      tile_data0_ = reinterpret_cast<int8_t *>(malloc(data_size));
      tile_data1_ = reinterpret_cast<int8_t *>(malloc(data_size));
    }
  } else {
    tile_data0_ = nullptr;
    tile_data1_ = nullptr;
  }
  return RET_OK;
 }

 int ArithmeticInt8CPUKernel::ReSize() { return RET_OK; }

 int ArithmeticInt8CPUKernel::DoArithmetic(int thread_id) {
  auto input0_data = reinterpret_cast<int8_t *>(inputs_[0]->Data());
  auto input1_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
  auto output_data = reinterpret_cast<int8_t *>(outputs_[0]->Data());
  auto element_num = outputs_[0]->ElementsNum();
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (param->broadcasting_ && arithmetic_run_ != nullptr) {
    MS_ASSERT(thread_count_ != 0);
    int stride = UP_DIV(element_num, thread_count_);
    int count = MSMIN(stride, element_num - stride * thread_id);

    int error_code = arithmetic_run_(tile_data0_ + stride * thread_id, tile_data1_ + stride * thread_id,
                                     output_data + stride * thread_id, count);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "Arithmetic run fail! ret: " << error_code;
      return RET_ERROR;
    }
  } else if (arithmetic_run_ != nullptr) {
    int error_code = arithmetic_run_(input0_data, input1_data1, output_data, element_num);
    if (error_code != RET_OK) {
      MS_LOG(ERROR) << "Arithmetic run fail!ret: " << error_code;
      return RET_ERROR;
    }
  } else {
    MS_LOG(ERROR) << "arithmetic_run function is nullptr!";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int ArithmeticInt8CPUKernel::Run() {
  auto param = reinterpret_cast<ArithmeticParameter *>(opParameter);
  if (param->broadcasting_) {
    auto input_data0 = reinterpret_cast<int8_t *>(inputs_[0]->Data());
    auto input_data1 = reinterpret_cast<int8_t *>(inputs_[1]->Data());
    TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param);
  }
  int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, thread_count_);
  if (error_code != RET_OK) {
    MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code;
    return RET_ERROR;
  }
  return RET_OK;
 }

 kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
                                                   const std::vector<lite::tensor::Tensor *> &outputs,
                                                   OpParameter *parameter, const lite::Context *ctx,
                                                   const kernel::KernelKey &desc) {
  if (parameter == nullptr) {
    MS_LOG(ERROR) << "Input parameter is null!";
    return nullptr;
  }
  auto kernel = new (std::nothrow) ArithmeticInt8CPUKernel(parameter, inputs, outputs, ctx);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "Create ArithmeticInt8CPUKernel failed, name: " << parameter->name_;
    return nullptr;
  }
  auto ret = kernel->Init();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_ << ", type: "
                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
    delete kernel;
    return nullptr;
  }
  return kernel;
 }

 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Equal, CpuArithmeticInt8KernelCreator)
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_NotEqual, CpuArithmeticInt8KernelCreator)
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Less, CpuArithmeticInt8KernelCreator)
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_LessEqual, CpuArithmeticInt8KernelCreator)
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Greater, CpuArithmeticInt8KernelCreator)
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_GreaterEqual, CpuArithmeticInt8KernelCreator)

 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
@@ -0,0 +1,47 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_

 #include <vector>
 #include "src/lite_kernel.h"
 #include "schema/model_generated.h"

 namespace mindspore::kernel {
 class ArithmeticInt8CPUKernel : public LiteKernel {
  typedef int (*ArithmeticRunInt8)(int8_t *input0, int8_t *input1, int8_t *output, int element_size);

 public:
  ArithmeticInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                          const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
      : LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_), context_(ctx) {}
  ~ArithmeticInt8CPUKernel();

  int Init() override;
  int ReSize() override;
  int Run() override;
  int DoArithmetic(int thread_id);

 private:
  int thread_count_;
  int8_t *tile_data0_;
  int8_t *tile_data1_;
  const lite::Context *context_;
  ArithmeticRunInt8 arithmetic_run_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc
@@ -18,13 +18,27 @@
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/arm/opclib/batch_to_space.h"
 #include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h"
 #include "include/errorcode.h"

 using mindspore::lite::RET_OK;

 namespace mindspore::kernel {
 int BatchToSpaceInt8CPUKernel::Init() {
  return BatchToSpaceBaseCPUKernel::Init();
  auto ret = BatchToSpaceBaseCPUKernel::Init();
  if (ret != RET_OK) {
    return ret;
  }
  auto *input_tensor = inputs_.at(kInputIndex);
  auto in_quant_args = input_tensor->GetQuantParams();
  in_quant_arg_.scale_ = in_quant_args.front().scale;
  in_quant_arg_.zp_ = in_quant_args.front().zeroPoint;

  auto *out_tensor = outputs_.at(kOutputIndex);
  auto out_quant_args = out_tensor->GetQuantParams();
  out_quant_arg_.scale_ = out_quant_args.front().scale;
  out_quant_arg_.zp_ = out_quant_args.front().zeroPoint;
  return RET_OK;
 }

 int BatchToSpaceInt8CPUKernel::Run() {
@@ -36,12 +50,22 @@ int BatchToSpaceInt8CPUKernel::Run() {
  auto out_shape = output->shape();
  BatchToSpaceParameter *param = reinterpret_cast<BatchToSpaceParameter *>(this->opParameter);

  if (IsNoCrop()) {
    BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
                              sizeof(int8_t));
  if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) {
    if (IsNoCrop()) {
      BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
                                sizeof(int8_t));
    } else {
      BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_,
                          sizeof(int8_t));
    }
  } else {
    BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_,
                        sizeof(int8_t));
    if (IsNoCrop()) {
      BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
                                &in_quant_arg_, &out_quant_arg_);
    } else {
      BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_,
                          &in_quant_arg_, &out_quant_arg_);
    }
  }

  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h
@@ -31,6 +31,9 @@ class BatchToSpaceInt8CPUKernel : public BatchToSpaceBaseCPUKernel {
  int Init() override;
  int ReSize() override { return 0; }
  int Run() override;
 private:
  QuantArg in_quant_arg_;
  QuantArg out_quant_arg_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc
@@ -18,6 +18,7 @@
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/arm/opclib/depth_to_space.h"
 #include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h"
 #include "include/errorcode.h"

 using mindspore::lite::RET_OK;
@@ -31,6 +32,16 @@ int DepthToSpaceInt8CPUKernel::Init() {
  }
  DepthToSpaceParameter *param = reinterpret_cast<DepthToSpaceParameter *>(opParameter);
  param->data_type_size_ = sizeof(int8_t);

  auto *input_tensor = inputs_.at(kInputIndex);
  auto in_quant_args = input_tensor->GetQuantParams();
  in_quant_arg_.scale_ = in_quant_args.front().scale;
  in_quant_arg_.zp_ = in_quant_args.front().zeroPoint;

  auto *out_tensor = outputs_.at(kOutputIndex);
  auto out_quant_args = out_tensor->GetQuantParams();
  out_quant_arg_.scale_ = out_quant_args.front().scale;
  out_quant_arg_.zp_ = out_quant_args.front().zeroPoint;
  return RET_OK;
 }

@@ -41,14 +52,11 @@ int DepthToSpaceInt8CPUKernel::Run() {
  int8_t *output_data = reinterpret_cast<int8_t *>(output->Data());
  auto in_shape = input->shape();
  DepthToSpaceParameter *param = reinterpret_cast<DepthToSpaceParameter *>(opParameter);
  if (input->GetFormat() == schema::Format_NHWC) {
  if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) {
    DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param);
    return RET_OK;
  } else {
    MS_LOG(ERROR) << "Depth_to_space only support NHWC now!";
    return RET_ERROR;
    DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_);
  }

  return RET_OK;
 }
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.h
@@ -31,6 +31,9 @@ class DepthToSpaceInt8CPUKernel : public DepthToSpaceBaseCPUKernel {
  int Init() override;
  int ReSize() override { return 0; }
  int Run() override;
 private:
  QuantArg in_quant_arg_;
  QuantArg out_quant_arg_;
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.cc
@@ -15,10 +15,8 @@
 */
 #include "src/runtime/kernel/arm/opclib/arg_min_max.h"
 #include "src/runtime/kernel/arm/opclib/fp32/arg_min_max.h"
 #include "src/runtime/kernel/arm/opclib/int8/arg_min_max.h"

 #define FLOAT_DATA_TYPE 43
 #define INT8_DATA_TYPE  32

 void GetCalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
                      int *after_axis_count) {
@@ -51,16 +49,6 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax
      }
      break;
    }
    case INT8_DATA_TYPE: {
      if (param->get_max_) {
        ArgMax(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), param, pre_axis_count,
               axis_count, after_axis_count);
      } else {
        ArgMin(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), param, pre_axis_count,
               axis_count, after_axis_count);
      }
      break;
    }
    default:
      break;
  }
@@ -100,40 +88,6 @@ void ArgMinMaxTopknFp32(const float *input, float *output, const int *in_shape,
  }
 }

 void ArgMinMaxTopknInt8(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->get_max_) {
    switch (param->axis_) {
      case 0:
        ArgMaxDim0(input, output, in_shape, param);
        break;
      case 1:
        ArgMaxDim1(input, output, in_shape, param);
        break;
      case 2:
        ArgMaxDim2(input, output, in_shape, param);
        break;
      case 3:
        ArgMaxDim3(input, output, in_shape, param);
        break;
    }
  } else {
    switch (param->axis_) {
      case 0:
        ArgMinDim0(input, output, in_shape, param);
        break;
      case 1:
        ArgMinDim1(input, output, in_shape, param);
        break;
      case 2:
        ArgMinDim2(input, output, in_shape, param);
        break;
      case 3:
        ArgMinDim3(input, output, in_shape, param);
        break;
    }
  }
 }

 void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->topk_ == 1) {
    ArgMinMaxTopk1(input, output, in_shape, param);
@@ -145,10 +99,6 @@ void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxPa
      ArgMinMaxTopknFp32(reinterpret_cast<const float *>(input), reinterpret_cast<float *>(output), in_shape, param);
      return;
    }
    case INT8_DATA_TYPE: {
      ArgMinMaxTopknInt8(reinterpret_cast<const int8_t *>(input), reinterpret_cast<int8_t *>(output), in_shape, param);
      return;
    }
    default:
      break;
  }
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h
@@ -18,6 +18,7 @@

 #include "src/runtime/kernel/arm/opclib/op_base.h"
 #include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h"
 #include "src/runtime/kernel/arm/opclib/quantization/quantize.h"

 void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param);
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_ARG_MIN_MAX_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h
@@ -15,19 +15,7 @@
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_
 #include "src/runtime/kernel/arm/opclib/op_base.h"

 struct DepthToSpaceParameter {
    OpParameter op_parameter_;
    int32_t block_size_;
    int32_t in_stride_dim0_;
    int32_t in_stride_dim1_;
    int32_t in_stride_dim2_;
    int32_t out_stride_dim0_;
    int32_t out_stride_dim1_;
    int32_t out_stride_dim2_;
    uint8_t data_type_size_;
 };
 #include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h"

 void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param);
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space_parameter.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space_parameter.h
@@ -0,0 +1,32 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
 #include "src/runtime/kernel/arm/opclib/op_base.h"

 struct DepthToSpaceParameter {
    OpParameter op_parameter_;
    int32_t block_size_;
    int32_t in_stride_dim0_;
    int32_t in_stride_dim1_;
    int32_t in_stride_dim2_;
    int32_t out_stride_dim0_;
    int32_t out_stride_dim1_;
    int32_t out_stride_dim2_;
    uint8_t data_type_size_;
 };

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_DEPTH_TO_SPACE_PARAMETER_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.cc
@@ -1,488 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/opclib/int8/arg_min_max.h"

 #define INT8_MAX_VALUE 127

 int ArgCompareAscInt8(const void *a, const void *b) {
  return reinterpret_cast<const ArgElement *>(a)->data_.i8_data_
    - reinterpret_cast<const ArgElement *>(b)->data_.i8_data_;
 }

 int ArgCompareDescInt8(const void *a, const void *b) {
  return reinterpret_cast<const ArgElement *>(b)->data_.i8_data_
    - reinterpret_cast<const ArgElement *>(a)->data_.i8_data_;
 }

 void ArgMaxDim0OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
      param->arg_elements_[j].index_ = j;
      param->arg_elements_[j].data_.f_data_ = input[offset];
    }
    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8);
    for (int j = 0; j < param->topk_; ++j) {
      size_t out_offset = j * param->out_strides_[0] + i;
      output[out_offset] = param->arg_elements_[j].data_.f_data_;
    }
  }
 }

 void ArgMaxDim0OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
      param->arg_elements_[j].index_ = j;
      param->arg_elements_[j].data_.f_data_ = input[offset];
    }
    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8);
    for (int j = 0; j < param->topk_; ++j) {
      size_t out_offset = j * param->out_strides_[0] + i;
      output[out_offset] = param->arg_elements_[j].index_;
    }
  }
 }

 void ArgMinDim0OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
      param->arg_elements_[j].index_ = j;
      param->arg_elements_[j].data_.f_data_ = input[offset];
    }
    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8);
    for (int j = 0; j < param->topk_; ++j) {
      size_t out_offset = j * param->out_strides_[0] + i;
      output[out_offset] = param->arg_elements_[j].data_.f_data_;
    }
  }
 }

 void ArgMinDim0OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
      param->arg_elements_[j].index_ = j;
      param->arg_elements_[j].data_.f_data_ = input[offset];
    }
    qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8);
    for (int j = 0; j < param->topk_; ++j) {
      size_t out_offset = j * param->out_strides_[0] + i;
      output[out_offset] = param->arg_elements_[j].index_;
    }
  }
 }

 void ArgMaxDim1OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < param->in_strides_[1]; ++j) {
      for (int k = 0; k < in_shape1; ++k) {
        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
        param->arg_elements_[k].index_ = k;
        param->arg_elements_[k].data_.f_data_ = input[offset];
      }
      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8);
      for (int k = 0; k < param->topk_; ++k) {
        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
        output[out_offset] = param->arg_elements_[k].data_.f_data_;
      }
    }
  }
 }

 void ArgMaxDim1OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < param->in_strides_[1]; ++j) {
      for (int k = 0; k < in_shape1; ++k) {
        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
        param->arg_elements_[k].index_ = k;
        param->arg_elements_[k].data_.f_data_ = input[offset];
      }
      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8);
      for (int k = 0; k < param->topk_; ++k) {
        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
        output[out_offset] = param->arg_elements_[k].index_;
      }
    }
  }
 }

 void ArgMinDim1OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < param->in_strides_[1]; ++j) {
      for (int k = 0; k < in_shape1; ++k) {
        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
        param->arg_elements_[k].index_ = k;
        param->arg_elements_[k].data_.f_data_ = input[offset];
      }
      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8);
      for (int k = 0; k < param->topk_; ++k) {
        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
        output[out_offset] = param->arg_elements_[k].data_.f_data_;
      }
    }
  }
 }

 void ArgMinDim1OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < param->in_strides_[1]; ++j) {
      for (int k = 0; k < in_shape1; ++k) {
        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
        param->arg_elements_[k].index_ = k;
        param->arg_elements_[k].data_.f_data_ = input[offset];
      }
      qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8);
      for (int k = 0; k < param->topk_; ++k) {
        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
        output[out_offset] = param->arg_elements_[k].index_;
      }
    }
  }
 }

 void ArgMaxDim2OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < param->in_strides_[2]; ++k) {
        for (int l = 0; l < in_shape2; ++l) {
          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
          output[out_offset] = param->arg_elements_[l].data_.f_data_;
        }
      }
    }
  }
 }

 void ArgMaxDim2OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < param->in_strides_[2]; ++k) {
        for (int l = 0; l < in_shape2; ++l) {
          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
          output[out_offset] = param->arg_elements_[l].index_;
        }
      }
    }
  }
 }

 void ArgMinDim2OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < param->in_strides_[2]; ++k) {
        for (int l = 0; l < in_shape2; ++l) {
          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
          output[out_offset] = param->arg_elements_[l].data_.f_data_;
        }
      }
    }
  }
 }

 void ArgMinDim2OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < param->in_strides_[2]; ++k) {
        for (int l = 0; l < in_shape2; ++l) {
          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
          output[out_offset] = param->arg_elements_[l].index_;
        }
      }
    }
  }
 }

 void ArgMaxDim3OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < in_shape2; ++k) {
        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
        for (int l = 0; l < in_shape3; ++l) {
          size_t offset = l + in_dim2_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim2_offset + l;
          output[out_offset] = param->arg_elements_[l].data_.f_data_;
        }
      }
    }
  }
 }

 void ArgMaxDim3OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < in_shape2; ++k) {
        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
        for (int l = 0; l < in_shape3; ++l) {
          size_t offset = l + in_dim2_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim2_offset + l;
          output[out_offset] = param->arg_elements_[l].index_;
        }
      }
    }
  }
 }

 void ArgMinDim3OutValue(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < in_shape2; ++k) {
        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
        for (int l = 0; l < in_shape3; ++l) {
          size_t offset = l + in_dim2_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim2_offset + l;
          output[out_offset] = param->arg_elements_[l].data_.f_data_;
        }
      }
    }
  }
 }

 void ArgMinDim3OutIndex(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < in_shape2; ++k) {
        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
        for (int l = 0; l < in_shape3; ++l) {
          size_t offset = l + in_dim2_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset];
        }
        qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim2_offset + l;
          output[out_offset] = param->arg_elements_[l].index_;
        }
      }
    }
  }
 }

 void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim0OutValue(input, output, in_shape, param);
  } else {
    ArgMaxDim0OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim0OutValue(input, output, in_shape, param);
  } else {
    ArgMinDim0OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim1OutValue(input, output, in_shape, param);
  } else {
    ArgMaxDim1OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim1OutValue(input, output, in_shape, param);
  } else {
    ArgMinDim1OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim2OutValue(input, output, in_shape, param);
  } else {
    ArgMaxDim2OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim2OutValue(input, output, in_shape, param);
  } else {
    ArgMinDim2OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMaxDim3OutValue(input, output, in_shape, param);
  } else {
    ArgMaxDim3OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param) {
  if (param->out_value_) {
    ArgMinDim3OutValue(input, output, in_shape, param);
  } else {
    ArgMinDim3OutIndex(input, output, in_shape, param);
  }
 }

 void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count) {
  bool out_value = param->out_value_;
  for (int i = 0; i < pre_axis_count; ++i) {
    size_t output_offset = i * after_axis_count;
    size_t input_offset = output_offset * axis_count;
    for (int j = 0; j < after_axis_count; ++j) {
      int8_t value = -INT8_MAX_VALUE;
      int8_t index = 0;
      for (int k = 0; k < axis_count; ++k) {
        int8_t value_tmp = input[input_offset + k * after_axis_count + j];
        if (value_tmp > value) {
          value = value_tmp;
          index = k;
        }
      }
      output[output_offset + j] = out_value ? value : index;
    }
  }
 }

 void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count) {
  bool out_value = param->out_value_;
  for (int i = 0; i < pre_axis_count; ++i) {
    size_t output_offset = i * after_axis_count;
    size_t input_offset = output_offset * axis_count;
    for (int j = 0; j < after_axis_count; ++j) {
      int8_t value = INT8_MAX_VALUE;
      int8_t index = 0;
      for (int k = 0; k < axis_count; ++k) {
        int8_t value_tmp = input[input_offset + k * after_axis_count + j];
        if (value_tmp < value) {
          value = value_tmp;
          index = k;
        }
      }
      output[output_offset + j] = out_value ? value : index;
    }
  }
 }

 #undef INT8_MAX_VALUE
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max.h
@@ -1,33 +0,0 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_

 #include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h"

 void ArgMax(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count);
 void ArgMin(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count);
 void ArgMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 void ArgMinDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param);
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.cc
@@ -0,0 +1,221 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h"
 #include <float.h>

 void CalcParameter(const int *shape, int dims_number, int axis, int *pre_axis_count, int *axis_count,
                      int *after_axis_count) {
  *pre_axis_count = 1;
  for (int i = 0; i < axis; ++i) {
    *pre_axis_count = (*pre_axis_count) * shape[i];
  }

  *axis_count = shape[axis];

  *after_axis_count = 1;
  for (int i = axis + 1; i < dims_number; ++i) {
    *after_axis_count = (*after_axis_count) * shape[i];
  }
 }

 void ArgMinMaxQuant(const int8_t *input, int8_t *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
            int after_axis_count, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  bool out_value = param->out_value_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
  int32_t output_zp = out_quant_arg->zp_;
  for (int i = 0; i < pre_axis_count; ++i) {
    size_t output_offset = i * after_axis_count;
    size_t input_offset = output_offset * axis_count;
    for (int j = 0; j < after_axis_count; ++j) {
      float value = -FLT_MAX;
      if (!param->get_max_) {
        value = FLT_MAX;
      }
      float index = 0.0f;
      for (int k = 0; k < axis_count; ++k) {
        float value_tmp = input[input_offset + k * after_axis_count + j] * in_quant_arg->scale_ + bias;
        if (param->get_max_) {
          if (value_tmp > value) {
            value = value_tmp;
            index = k;
          }
        } else {
          if (value_tmp < value) {
            value = value_tmp;
            index = k;
          }
        }
      }
      float real_out = out_value ? value : index;
      output[output_offset + j] = real_out * output_inverse_scale + output_zp;
    }
  }
 }

 void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                    QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  int pre_axis_count = 1;
  int axis_count = 1;
  int after_axis_count = 1;
  CalcParameter(in_shape, param->dims_size_, param->axis_, &pre_axis_count, &axis_count, &after_axis_count);
  ArgMinMaxQuant(input, output, param, pre_axis_count, axis_count, after_axis_count, in_quant_arg, out_quant_arg);
  return;
 }

 int ArgCompareAscInt8(const void *a, const void *b) {
  return reinterpret_cast<const ArgElement *>(a)->data_.f_data_
    - reinterpret_cast<const ArgElement *>(b)->data_.f_data_;
 }

 int ArgCompareDescInt8(const void *a, const void *b) {
  return reinterpret_cast<const ArgElement *>(b)->data_.f_data_
    - reinterpret_cast<const ArgElement *>(a)->data_.f_data_;
 }

 int8_t GetInt8Output(float real_out, float output_inverse_scale, int32_t output_zp) {
  return real_out * output_inverse_scale + output_zp;
 }

 void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  bool out_value = param->out_value_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
  int32_t output_zp = out_quant_arg->zp_;
  for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
    for (int j = 0; j < in_shape[0]; ++j) {
      size_t offset = param->in_strides_[0] * j + i;
      param->arg_elements_[j].index_ = j;
      param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
    }
    if (param->get_max_) {
      qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareDescInt8);
    } else {
      qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), ArgCompareAscInt8);
    }

    for (int j = 0; j < param->topk_; ++j) {
      size_t out_offset = j * param->out_strides_[0] + i;
      float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
      output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
    }
  }
 }

 void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  bool out_value = param->out_value_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
  int32_t output_zp = out_quant_arg->zp_;
  int in_shape1 = in_shape[1];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < param->in_strides_[1]; ++j) {
      for (int k = 0; k < in_shape1; ++k) {
        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
        param->arg_elements_[k].index_ = k;
        param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
      }
      if (param->get_max_) {
        qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareDescInt8);
      } else {
        qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), ArgCompareAscInt8);
      }

      for (int k = 0; k < param->topk_; ++k) {
        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
        float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
        output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
      }
    }
  }
 }

 void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  bool out_value = param->out_value_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
  int32_t output_zp = out_quant_arg->zp_;
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < param->in_strides_[2]; ++k) {
        for (int l = 0; l < in_shape2; ++l) {
          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
        }
        if (param->get_max_) {
          qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareDescInt8);
        } else {
          qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
        }
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
          float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
          output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
        }
      }
    }
  }
 }

 void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  bool out_value = param->out_value_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
  int32_t output_zp = out_quant_arg->zp_;
  int in_shape1 = in_shape[1];
  int in_shape2 = in_shape[2];
  int in_shape3 = in_shape[3];
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_dim0_offset = i * param->in_strides_[0];
    size_t out_dim0_offset = i * param->out_strides_[0];
    for (int j = 0; j < in_shape1; ++j) {
      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
      for (int k = 0; k < in_shape2; ++k) {
        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
        for (int l = 0; l < in_shape3; ++l) {
          size_t offset = l + in_dim2_offset;
          param->arg_elements_[l].index_ = l;
          param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
        }
        if (param->get_max_) {
          qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareDescInt8);
        } else {
          qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
        }
        for (int l = 0; l < param->topk_; ++l) {
          size_t out_offset = out_dim2_offset + l;
          float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
          output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
        }
      }
    }
  }
 }
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arg_min_max_int8.h
@@ -0,0 +1,32 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_

 #include "src/runtime/kernel/arm/opclib/arg_min_max_parameter.h"
 #include "src/runtime/kernel/arm/opclib/quantization/quantize.h"

 void ArgMinMaxQuant(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                    QuantArg *in_quant, QuantArg *out_quant);
 void ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant, QuantArg *out_quant);
 void ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant, QuantArg *out_quant);
 void ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant, QuantArg *out_quant);
 void ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, ArgMinMaxParameter *param,
                   QuantArg *in_quant, QuantArg *out_quant);
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARG_MIN_MAX_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.cc
@@ -0,0 +1,63 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h"
 #ifdef ENABLE_NEON
 #include <arm_neon.h>
 #endif
 #include "src/runtime/kernel/arm/opclib/errorcode.h"

 int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
  for (int index = 0; index < element_size; ++index) {
    output[index] = (int8_t)(input0[index] != input1[index]);
  }
  return OPCLIB_OK;
 }

 int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
  for (int index = 0; index < element_size; ++index) {
    output[index] = (int8_t)(input0[index] == input1[index]);
  }
  return OPCLIB_OK;
 }

 int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
  for (int index = 0; index < element_size; ++index) {
    output[index] = (int8_t)(input0[index] < input1[index]);
  }
  return OPCLIB_OK;
 }

 int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
  for (int index = 0; index < element_size; ++index) {
    output[index] = (int8_t)(input0[index] <= input1[index]);
  }
  return OPCLIB_OK;
 }

 int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
  for (int index = 0; index < element_size; ++index) {
    output[index] = (int8_t)(input0[index] > input1[index]);
  }
  return OPCLIB_OK;
 }

 int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size) {
  for (int index = 0; index < element_size; ++index) {
    output[index] = (int8_t)(input0[index] >= input1[index]);
  }
  return OPCLIB_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/arithmetic_int8.h
@@ -0,0 +1,32 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_

 #include "src/runtime/kernel/arm/opclib/op_base.h"

 int ElementNotEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);

 int ElementEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);

 int ElementLess(int8_t *input0, int8_t *input1, int8_t *output, int element_size);

 int ElementLessEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);

 int ElementGreater(int8_t *input0, int8_t *input1, int8_t *output, int element_size);

 int ElementGreaterEqual(int8_t *input0, int8_t *input1, int8_t *output, int element_size);
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_ARITHMETIC_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.cc
@@ -0,0 +1,111 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h"
 #include "src/runtime/kernel/arm/opclib/arithmetic_common.h"

 void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
                               QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  int block_h = block[0];
  int block_w = block[1];
  int in_h = in_shape[1];
  int in_w = in_shape[2];
  int in_c = in_shape[3];
  size_t stride_h = block_w * out_n;
  size_t output_offset = 0;
  size_t in_stride_h = in_w * in_c;
  size_t in_stride_n = in_stride_h * in_h;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float scale = in_quant_arg->scale_ * output_inverse_scale;
  float bias = -in_quant_arg->zp_ * scale;
  int32_t output_zp = out_quant_arg->zp_;

  for (int n = 0; n < out_n; ++n) {
    for (int h = 0; h < in_h; ++h) {
      size_t h_offset = h * in_stride_h;
      for (int bh = 0; bh < block_h; ++bh) {
        for (int w = 0; w < in_w; ++w) {
          size_t w_offset = w * in_c;
          for (int bw = 0; bw < block_w; ++bw) {
            size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
            for (int c = 0; c < in_c; ++c) {
              int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp;
              output_tmp = output_tmp > 127 ? 127 : output_tmp;
              output_tmp = output_tmp < -128 ? -128 : output_tmp;
              output[output_offset++] = output_tmp;
            }
          }
        }
      }
    }
  }
 }

 void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
                         const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  int block_h = block[0];
  int block_w = block[1];
  int in_n = in_shape[0];
  int in_h = in_shape[1];
  int in_w = in_shape[2];
  int in_c = in_shape[3];
  int h_start = crops[0] / block_h;
  int h_valid_begin = crops[0];
  int h_end = MSMIN((in_h * block_h - crops[1]) / block_h + 1, in_h);
  int h_valid_end = in_h * block_h - crops[1] - 1;
  int w_start = crops[2] / block_w;
  int w_valid_begin = crops[2];
  int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w);
  int w_valid_end = in_w * block_w - crops[3] - 1;

  size_t stride_h = block_w * out_n;
  size_t output_offset = 0;
  size_t in_stride_h = in_w * in_c;
  size_t in_stride_n = in_stride_h * in_h;

  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float scale = in_quant_arg->scale_ * output_inverse_scale;
  float bias = -in_quant_arg->zp_ * scale;
  int32_t output_zp = out_quant_arg->zp_;

  for (int n = 0; n < out_n; ++n) {
    for (int h = h_start; h < h_end; ++h) {
      size_t h_offset = h * in_stride_h;
      for (int bh = 0; bh < block_h; ++bh) {
        size_t h_index = h * block_h + bh;
        if (h_index < h_valid_begin || h_index > h_valid_end) {
          continue;
        }
        for (int w = w_start; w < w_end; ++w) {
          size_t w_offset = w * in_c;
          for (int bw = 0; bw < block_w; ++bw) {
            size_t w_index = w * block_w + bw;
            if (w_index < w_valid_begin || w_index > w_valid_end) {
              continue;
            }
            size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
            for (int c = 0; c < in_c; ++c) {
              int32_t output_tmp = round(input[in_offset + c] * scale + bias) + output_zp;
              output_tmp = output_tmp > 127 ? 127 : output_tmp;
              output_tmp = output_tmp < -128 ? -128 : output_tmp;
              output[output_offset++] = output_tmp;
            }
          }
        }
      }
    }
  }
 }
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/batch_to_space_int8.h
@@ -0,0 +1,25 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
 #include "src/runtime/kernel/arm/opclib/op_base.h"
 #include "src/runtime/kernel/arm/opclib/quantization/quantize.h"

 void BatchToSpaceNoCropForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
                               QuantArg *in_quant_arg, QuantArg *out_quant_arg);
 void BatchToSpaceForNHWC(const int8_t *input, int8_t *output, const int *in_shape, int out_n, const int *block,
                         const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg);
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_BATCH_TO_SPACE_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.cc
@@ -0,0 +1,51 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h"
 #include <string.h>

 void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param,
                         QuantArg *in_quant_arg, QuantArg *out_quant_arg) {
  int32_t block_size = param->block_size_;
  int32_t in_shape_dim2 = in_shape[2];
  int32_t in_shape_dim1 = in_shape[1];
  size_t copy_size = block_size * param->out_stride_dim2_;
  float output_inverse_scale = 1.f / out_quant_arg->scale_;
  float scale = in_quant_arg->scale_ * output_inverse_scale;
  float bias = -in_quant_arg->zp_ * scale;
  int32_t output_zp = out_quant_arg->zp_;
  for (int i = 0; i < in_shape[0]; ++i) {
    size_t in_offset_n = i * param->in_stride_dim0_;
    size_t out_offset_n = i * param->out_stride_dim0_;
    for (int j = 0; j < in_shape_dim1; ++j) {
      size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_;
      size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_;
      for (int k = 0; k < in_shape_dim2; ++k) {
        size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_;
        size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_;
        for (int l = 0; l < block_size; ++l) {
          size_t out_offset = out_offset_w + l * param->out_stride_dim1_;
          size_t in_offset = in_offset_w + l * block_size * param->out_stride_dim2_;
          for (int m = 0; m < copy_size; ++m) {
            int32_t output_tmp = round(input[in_offset + m] * scale + bias) + output_zp;
            output_tmp = output_tmp > 127 ? 127 : output_tmp;
            output_tmp = output_tmp < -128 ? -128 : output_tmp;
            output[out_offset + m] = output_tmp;
          }
        }
      }
    }
  }
 }
--- a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/depth_to_space_int8.h
@@ -0,0 +1,24 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_

 #include "src/runtime/kernel/arm/opclib/depth_to_space_parameter.h"
 #include "src/runtime/kernel/arm/opclib/quantization/quantize.h"

 void DepthToSpaceForNHWC(const int8_t *input, int8_t *output, int *in_shape, DepthToSpaceParameter *param,
                         QuantArg *in_quant_arg, QuantArg *out_quant_arg);
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_DEPTH_TO_SPACE_INT8_H_
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/argminmax_fp32_test.cc
@@ -0,0 +1,328 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "mindspore/core/utils/log_adapter.h"
 #include "common/common_test.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/arg_min_max.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/arg_min_max.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h"

 namespace mindspore {

 class TestArgMinMaxTestFp32 : public mindspore::Common {
 public:
  TestArgMinMaxTestFp32() = default;
 };

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest1) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {2, 2, 0, 2, 0};
  std::vector<int> shape = {3, 5};
  float out[5];
  ArgMinMaxParameter param;
  param.topk_ = 1;
  param.out_value_ = false;
  param.axis_ = 0;
  param.data_type_ = 43;
  param.dims_size_ = 2;
  param.get_max_ = true;
  ArgMinMax(in.data(), out, shape.data(), &param);
  for (size_t i = 0; i < except_out.size(); ++i) {
    std::cout << out[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest2) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {30, 45, 30, 50, 90};
  std::vector<int> shape = {3, 5};
  float out[5];
  ArgMinMaxParameter param;
  param.topk_ = 1;
  param.out_value_ = true;
  param.axis_ = 0;
  param.data_type_ = 43;
  param.dims_size_ = 2;
  param.get_max_ = true;
  ArgMinMax(in.data(), out, shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMinTest2) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {10, 11, 15, 1, 30};
  std::vector<int> shape = {3, 5};
  float out[5];
  ArgMinMaxParameter param;
  param.topk_ = 1;
  param.out_value_ = true;
  param.axis_ = 0;
  param.data_type_ = 43;
  param.dims_size_ = 2;
  param.get_max_ = false;
  ArgMinMax(in.data(), out, shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.000001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_data) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50};
  ArgMinMaxParameter param;
  param.axis_ = 2;
  std::vector<int> in_shape = {1, 1, 3, 5};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = true;
  param.topk_ = 2;
  std::vector<int> out_shape = {1, 1, 2, 5};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[10];
  ArgMaxDim2(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest3_axis2_out_index) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1};
  ArgMinMaxParameter param;
  param.axis_ = 2;
  std::vector<int> in_shape = {1, 1, 3, 5};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = false;
  param.topk_ = 2;
  std::vector<int> out_shape = {1, 1, 2, 5};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[10];
  ArgMaxDim2(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_data) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {90, 40,
                                   50, 20,
                                   50, 45};
  ArgMinMaxParameter param;
  param.axis_ = 3;
  std::vector<int> in_shape = {1, 1, 3, 5};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = true;
  param.topk_ = 2;
  std::vector<int> out_shape = {1, 1, 3, 2};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[6];
  ArgMaxDim3(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest4_axis3_out_index) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {4, 3,
                                   4, 0,
                                   3, 1};
  ArgMinMaxParameter param;
  param.axis_ = 3;
  std::vector<int> in_shape = {1, 1, 3, 5};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = false;
  param.topk_ = 2;
  std::vector<int> out_shape = {1, 1, 3, 2};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[6];
  ArgMaxDim3(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_index) {
  std::vector<float> in = {100, 2, 300,
                            4,  50, 6,
                            11, 12, 13,
                            34, 35, 36,
                            9,  6,  17,
                            10, 20, 30,
                            10, 20, 30,
                            40, 5,  60,
                            7,  80, 90,
                            10, 11, 120,
                            18, 5,  16,
                            9,  22, 23};
  std::vector<float> except_out = {0, 1, 0,
                                   1, 0, 1,
                                   1, 2, 2,
                                   2, 1, 2,
                                   2, 1, 1,
                                   0, 2, 1,
                                   0, 0, 0,
                                   1, 1, 0};
  ArgMinMaxParameter param;
  param.axis_ = 1;
  std::vector<int> in_shape = {2, 3, 2, 3};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = false;
  param.topk_ = 2;
  std::vector<int> out_shape = {2, 2, 2, 3};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[24];
  ArgMaxDim1(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest5_axis1_out_data) {
  std::vector<float> in = {100, 2, 300,
                            4,  50, 6,
                            11, 12, 13,
                            34, 35, 36,
                            9,  6,  17,
                            10, 20, 30,
                            10, 20, 30,
                            40, 5,  60,
                            7,  80, 90,
                            10, 11, 120,
                            18, 5,  16,
                            9,  22, 23};
  std::vector<float> except_out = {100, 12, 300,
                                   34, 50, 36,
                                   11, 6, 17,
                                   10, 35, 30,
                                   18, 80, 90,
                                   40, 22, 120,
                                   10, 20, 30,
                                   10, 11, 60};
  ArgMinMaxParameter param;
  param.axis_ = 1;
  std::vector<int> in_shape = {2, 3, 2, 3};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = true;
  param.topk_ = 2;
  std::vector<int> out_shape = {2, 2, 2, 3};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[24];
  ArgMaxDim1(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_index) {
  std::vector<float> in = {100, 2,
                            4,  50,
                            11, 12,
                            34, 35,
                            10, 20,
                            40, 5,
                            7,  80,
                            10, 11,
                            55, 25,
                            5,  15,
                            18, 8,
                            15, 16};
  std::vector<float> except_out = {0, 2,
                                   1, 0,
                                   2, 1,
                                   0, 0,
                                   2, 1,
                                   2, 2,
                                   0, 0,
                                   2, 2};
  ArgMinMaxParameter param;
  param.axis_ = 1;
  std::vector<int> in_shape = {3, 2, 2, 2};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = false;
  param.topk_ = 2;
  std::vector<int> out_shape = {2, 2, 2, 2};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[16];
  ArgMaxDim0(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMaxTest6_axis0_out_data) {
  std::vector<float> in = {100, 2,
                            4,  50,
                            11, 12,
                            34, 35,
                            10, 20,
                            40, 5,
                            7,  80,
                            10, 11,
                            55, 25,
                            5,  15,
                            18, 8,
                            15, 16};
  std::vector<float> except_out = {100, 25,
                                   40, 50,
                                   18, 80,
                                   34, 35,
                                   55, 20,
                                   5, 15,
                                   11, 12,
                                   15, 16};
  ArgMinMaxParameter param;
  param.axis_ = 1;
  std::vector<int> in_shape = {3, 2, 2, 2};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = true;
  param.topk_ = 2;
  std::vector<int> out_shape = {2, 2, 2, 2};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[16];
  ArgMaxDim0(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 TEST_F(TestArgMinMaxTestFp32, ArgMinTest1_axis3_out_data) {
  std::vector<float> in = {10, 20, 30, 40, 90,
                           20, 11, 15, 1,  50,
                           30, 45, 25, 50, 30};
  std::vector<float> except_out = {10, 20,
                                   1,  11,
                                   25, 30};
  ArgMinMaxParameter param;
  param.axis_ = 3;
  std::vector<int> in_shape = {1, 1, 3, 5};
  param.arg_elements_ = reinterpret_cast<ArgElement *>(malloc(in_shape[param.axis_] * sizeof(ArgElement)));
  param.out_value_ = true;
  param.topk_ = 2;
  std::vector<int> out_shape = {1, 1, 3, 2};
  ComputeStrides(in_shape.data(), param.in_strides_, in_shape.size());
  ComputeStrides(out_shape.data(), param.out_strides_, out_shape.size());
  float out[6];
  ArgMinDim3(in.data(), out, in_shape.data(), &param);
  CompareOutputData(out, except_out.data(), except_out.size(), 0.00001);
 }

 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc
@@ -0,0 +1,197 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "mindspore/core/utils/log_adapter.h"
 #include "common/common_test.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/batch_to_space.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h"

 namespace mindspore {

 class BatchToSpaceTestFp32 : public mindspore::Common {
 public:
  BatchToSpaceTestFp32() = default;
 };


 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest1) {
  float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
  constexpr int kOutSize = 12;
  float expect_out[kOutSize] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};

  float output[kOutSize];
  int in_shape[4] = {4, 1, 1, 3};
  int out_n = 1;
  int block[2] = {2, 2};
  BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_1) {
  float input[12] = {10, 30, 90, 2, 20, 120, 5, 50, 150, 6, 16, 160};
  constexpr int kOutSize = 3;
  float expect_out[kOutSize] = {5, 50, 150};

  float output[kOutSize];
  int in_shape[4] = {4, 1, 1, 3};
  int out_n = 1;
  int block[2] = {2, 2};
  int crops[4] = {1, 0, 0, 1};
  BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest2) {
  float input[32] = {1, 10, 3, 30, 9,  90,  11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
                     5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160};
  constexpr int kOutSize = 32;
  float expect_out[kOutSize] = {1, 10, 2,  20,  3,  30,  4,  40,  5,  50,  6,  60,  7,  70,  8,  80,
                                9, 90, 10, 100, 11, 110, 12, 120, 13, 130, 14, 140, 15, 150, 16, 160};

  float output[kOutSize];
  int in_shape[4] = {4, 2, 2, 2};
  int out_n = 1;
  int block[2] = {2, 2};
  BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_2) {
  float input[32] = {1, 10, 3, 30, 9,  90,  11, 110, 2, 20, 4, 40, 10, 100, 12, 120,
                     5, 50, 7, 70, 13, 130, 15, 150, 6, 60, 8, 80, 14, 140, 16, 160};
  constexpr int kOutSize = 12;
  float expect_out[kOutSize] = {6,  60,  7,  70,  8,  80,
                                10, 100, 11, 110, 12, 120};

  float output[kOutSize];
  int in_shape[4] = {4, 2, 2, 2};
  int out_n = 1;
  int block[2] = {2, 2};
  int crops[4] = {1, 1, 1, 0};
  BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest3) {
  float input[64] = {1,  10, 3,  30, 9,   90,  11,  110, 2,  20, 4,  40, 10,  100, 12,  120,
                     5,  50, 7,  70, 13,  130, 15,  150, 6,  60, 8,  80, 14,  140, 16,  160,
                     21, 10, 23, 30, 29,  90,  211, 110, 22, 20, 24, 40, 210, 100, 212, 120,
                     25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160};
  constexpr int kOutSize = 64;
  float expect_out[kOutSize] = {1,  10,  5,  50,  3,  30,  7,  70,  21,  10,  25,  50,  23,  30,  27,  70,
                                9,  90,  13, 130, 11, 110, 15, 150, 29,  90,  213, 130, 211, 110, 215, 150,
                                2,  20,  6,  60,  4,  40,  8,  80,  22,  20,  26,  60,  24,  40,  28,  80,
                                10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160};

  float output[kOutSize];
  int in_shape[4] = {8, 2, 2, 2};
  int out_n = 2;
  int block[2] = {2, 2};
  BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
  for (int i = 0; i < kOutSize && i < 32; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_3) {
  float input[64] = {1,  10, 3,  30, 9,   90,  11,  110, 2,  20, 4,  40, 10,  100, 12,  120,
                     5,  50, 7,  70, 13,  130, 15,  150, 6,  60, 8,  80, 14,  140, 16,  160,
                     21, 10, 23, 30, 29,  90,  211, 110, 22, 20, 24, 40, 210, 100, 212, 120,
                     25, 50, 27, 70, 213, 130, 215, 150, 26, 60, 28, 80, 214, 140, 216, 160};
  constexpr int kOutSize = 16;
  float expect_out[kOutSize] = {9,  90,  13, 130, 29,  90,  213, 130,
                                10, 100, 14, 140, 210, 100, 214, 140};

  float output[kOutSize];
  int in_shape[4] = {8, 2, 2, 2};
  int out_n = 2;
  int block[2] = {2, 2};
  int crops[4] = {2, 0, 0, 2};
  BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
  for (int i = 0; i < kOutSize && i < 32; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest4) {
  float input[96] = {1,   10,  3,   30,  9,   90,  11,  110, 2,  20,  4,   40,  10,  100, 12,  120, 5,   50,  7,   70,
                     13,  130, 15,  150, 6,   60,  8,   80,  14, 140, 16,  160, 21,  10,  23,  30,  29,  90,  211, 110,
                     22,  20,  24,  40,  210, 100, 212, 120, 25, 50,  27,  70,  213, 130, 215, 150, 26,  60,  28,  80,
                     214, 140, 216, 160, 31,  10,  33,  30,  39, 90,  311, 110, 32,  20,  34,  40,  310, 100, 312, 120,
                     35,  50,  37,  70,  313, 130, 315, 150, 36, 60,  38,  80,  314, 140, 316, 160};
  constexpr int kOutSize = 96;
  float expect_out[kOutSize] = {
    1,  10,  5,  50,  3,  30,  7,  70,  21,  10,  25,  50,  23,  30,  27,  70,  31,  10,  35,  50,  33,  30,  37,  70,
    9,  90,  13, 130, 11, 110, 15, 150, 29,  90,  213, 130, 211, 110, 215, 150, 39,  90,  313, 130, 311, 110, 315, 150,
    2,  20,  6,  60,  4,  40,  8,  80,  22,  20,  26,  60,  24,  40,  28,  80,  32,  20,  36,  60,  34,  40,  38,  80,
    10, 100, 14, 140, 12, 120, 16, 160, 210, 100, 214, 140, 212, 120, 216, 160, 310, 100, 314, 140, 312, 120, 316, 160};

  float output[kOutSize];
  int in_shape[4] = {12, 2, 2, 2};
  int out_n = 2;
  int block[2] = {3, 2};
  BatchToSpaceNoCropForNHWC(input, output, in_shape, out_n, block, sizeof(float));
  for (int i = 0; i < kOutSize && i < 32; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(BatchToSpaceTestFp32, BatchToSpaceTest_crop_4) {
  float input[96] = {1,   10,  3,   30,  9,   90,  11,  110, 2,  20,  4,   40,  10,  100, 12,  120, 5,   50,  7,   70,
                     13,  130, 15,  150, 6,   60,  8,   80,  14, 140, 16,  160, 21,  10,  23,  30,  29,  90,  211, 110,
                     22,  20,  24,  40,  210, 100, 212, 120, 25, 50,  27,  70,  213, 130, 215, 150, 26,  60,  28,  80,
                     214, 140, 216, 160, 31,  10,  33,  30,  39, 90,  311, 110, 32,  20,  34,  40,  310, 100, 312, 120,
                     35,  50,  37,  70,  313, 130, 315, 150, 36, 60,  38,  80,  314, 140, 316, 160};
  constexpr int kOutSize = 24;
  float expect_out[kOutSize] = {
    25,  50,  23,  30,  35,  50,  33,  30,
    13, 130, 11, 110,  26,  60,  24,  40, 36,  60,  34,  40, 14, 140, 12, 120};

  float output[kOutSize];
  int in_shape[4] = {12, 2, 2, 2};
  int out_n = 2;
  int block[2] = {3, 2};
  int crops[4] = {1, 2, 1, 1};
  BatchToSpaceForNHWC(input, output, in_shape, out_n, block, crops, sizeof(float));
  for (int i = 0; i < kOutSize && i < 32; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc
@@ -0,0 +1,234 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "common/common_test.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h"

 namespace mindspore {
 class CropTestFp32 : public mindspore::Common {
 public:
  CropTestFp32() = default;
 };

 TEST_F(CropTestFp32, CropTest1) {
  float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  const int kOutSize = 2;
  float expect_out[kOutSize] = {8, 16};

  float output[kOutSize];
  int in_shape[4] = {2, 2, 2, 2};
  int out_shape[4] = {2, 1, 1, 1};
  CropParameter crop_param;
  crop_param.axis_ = 1;
  crop_param.offset_[0] = 1;
  crop_param.offset_[1] = 1;
  crop_param.offset_[2] = 1;
  crop_param.op_parameter_.thread_num_ = 1;
  crop_param.thread_id_ = 0;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest2) {
  float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  const int kOutSize = 4;
  float expect_out[kOutSize] = {13, 14, 15, 16};

  float output[kOutSize];
  int in_shape[4] = {2, 2, 2, 2};
  int out_shape[4] = {1, 1, 2, 2};
  CropParameter crop_param;
  crop_param.axis_ = 0;
  crop_param.offset_[0] = 1;
  crop_param.offset_[1] = 1;
  crop_param.offset_[2] = 0;
  crop_param.offset_[3] = 0;
  crop_param.op_parameter_.thread_num_ = 1;
  crop_param.thread_id_ = 0;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest3) {
  float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  const int kOutSize = 8;
  float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16};

  float output[kOutSize];
  int in_shape[4] = {2, 2, 2, 2};
  int out_shape[4] = {2, 2, 2, 1};
  CropParameter crop_param;
  crop_param.axis_ = 3;
  crop_param.offset_[0] = 1;
  crop_param.op_parameter_.thread_num_ = 1;
  crop_param.thread_id_ = 0;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest4) {
  float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  const int kOutSize = 8;
  float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16};

  float output[kOutSize];
  int in_shape[4] = {2, 2, 2, 2};
  int out_shape[4] = {2, 2, 2, 1};
  CropParameter crop_param;
  crop_param.axis_ = 3;
  crop_param.offset_[0] = 1;
  crop_param.op_parameter_.thread_num_ = 2;
  crop_param.thread_id_ = 0;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  crop_param.thread_id_ = 1;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest5) {
  float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  const int kOutSize = 2;
  float expect_out[kOutSize] = {8, 16};

  float output[kOutSize];
  int in_shape[4] = {2, 2, 2, 2};
  int out_shape[4] = {2, 1, 1, 1};
  CropParameter crop_param;
  crop_param.axis_ = 1;
  crop_param.offset_[0] = 1;
  crop_param.offset_[1] = 1;
  crop_param.offset_[2] = 1;
  Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest6) {
  float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  const int kOutSize = 4;
  float expect_out[kOutSize] = {13, 14, 15, 16};

  float output[kOutSize];
  int in_shape[4] = {2, 2, 2, 2};
  int out_shape[4] = {1, 1, 2, 2};
  CropParameter crop_param;
  crop_param.axis_ = 0;
  crop_param.offset_[0] = 1;
  crop_param.offset_[1] = 1;
  crop_param.offset_[2] = 0;
  crop_param.offset_[3] = 0;
  Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest7) {
  float input[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  const int kOutSize = 8;
  float expect_out[kOutSize] = {2, 4, 6, 8, 10, 12, 14, 16};

  float output[kOutSize];
  int in_shape[4] = {2, 2, 2, 2};
  int out_shape[4] = {2, 2, 2, 1};
  CropParameter crop_param;
  crop_param.axis_ = 3;
  crop_param.offset_[0] = 1;
  Crop4DNoParallel(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest8) {
  float input[27] = {1,  2,  3,  4,  5,  6,  7,  8,  9,
                     11, 12, 13, 14, 15, 16, 17, 18, 19,
                     21, 22, 23, 24, 25, 26, 27, 28, 29};
  const int kOutSize = 4;
  float expect_out[kOutSize] = {15, 16, 18, 19};

  float output[kOutSize];
  int in_shape[4] = {1, 3, 3, 3};
  int out_shape[4] = {1, 1, 2, 2};
  CropParameter crop_param;
  crop_param.axis_ = 1;
  crop_param.offset_[0] = 1;
  crop_param.offset_[1] = 1;
  crop_param.offset_[2] = 1;
  crop_param.op_parameter_.thread_num_ = 2;
  crop_param.thread_id_ = 0;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  crop_param.thread_id_ = 1;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(CropTestFp32, CropTest9) {
  float input[64] = {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                     11, 12, 13, 14, 15, 16, 17, 18, 19, 110, 111, 112, 113, 114, 115, 116,
                     21, 22, 23, 24, 25, 26, 27, 28, 29, 210, 211, 212, 213, 214, 215, 216,
                     31, 32, 33, 34, 35, 36, 37, 38, 39, 310, 311, 312, 313, 314, 315, 316};
  const int kOutSize = 8;
  float expect_out[kOutSize] = {16, 17, 110, 111, 26, 27, 210, 211};

  float output[kOutSize];
  int in_shape[4] = {1, 4, 4, 4};
  int out_shape[4] = {1, 2, 2, 2};
  CropParameter crop_param;
  crop_param.axis_ = 1;
  crop_param.offset_[0] = 1;
  crop_param.offset_[1] = 1;
  crop_param.offset_[2] = 1;
  crop_param.op_parameter_.thread_num_ = 2;
  crop_param.thread_id_ = 0;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  crop_param.thread_id_ = 1;
  Crop4D(input, output, in_shape, out_shape, &crop_param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 }  // namespace mindspore

--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/depth_to_space_fp32_test.cc
@@ -0,0 +1,85 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "mindspore/core/utils/log_adapter.h"
 #include "common/common_test.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/depth_to_space.h"
 #include "mindspore/lite/src/runtime/kernel/arm/opclib/arithmetic_common.h"

 namespace mindspore {

 class DepthToSpaceTestFp32 : public mindspore::Common {
 public:
  DepthToSpaceTestFp32() = default;
 };

 TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest2) {
  float input[16] = {1, 2, 10, 20, 5, 6, 3, 8, 18, 10, 11, 55, 3, 4, 15, 25};
  constexpr int kOutSize = 16;
  float expect_out[kOutSize] = {1, 2, 5, 6, 10, 20, 3, 8, 18, 10, 3, 4, 11, 55, 15, 25};

  float output[kOutSize];
  int in_shape[4] = {1, 2, 2, 4};
  int out_shape[4] = {1, 4, 4, 1};
  DepthToSpaceParameter param;
  param.block_size_ = 2;
  int in_strides[4];
  ComputeStrides(in_shape, in_strides, 4);
  int out_strides[4];
  ComputeStrides(out_shape, out_strides, 4);
  param.in_stride_dim0_ = in_strides[0];
  param.in_stride_dim1_ = in_strides[1];
  param.in_stride_dim2_ = in_strides[2];
  param.out_stride_dim0_ = out_strides[0];
  param.out_stride_dim1_ = out_strides[1];
  param.out_stride_dim2_ = out_strides[2];
  param.data_type_size_ = sizeof(float);
  DepthToSpaceForNHWC((const void *)input, output, in_shape, &param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }

 TEST_F(DepthToSpaceTestFp32, DepthToSpaceTest3) {
  float input[8] = {1, 2, 3, 4, 5, 6, 7, 8};
  constexpr int kOutSize = 8;
  float expect_out[kOutSize] = {1, 2, 3, 4, 5, 6, 7, 8};

  float output[kOutSize];
  int in_shape[4] = {1, 1, 1, 8};
  int out_shape[4] = {1, 2, 2, 2};
  DepthToSpaceParameter param;
  param.block_size_ = 2;
  int in_strides[4];
  ComputeStrides(in_shape, in_strides, 4);
  int out_strides[4];
  ComputeStrides(out_shape, out_strides, 4);
  param.in_stride_dim0_ = in_strides[0];
  param.in_stride_dim1_ = in_strides[1];
  param.in_stride_dim2_ = in_strides[2];
  param.out_stride_dim0_ = out_strides[0];
  param.out_stride_dim1_ = out_strides[1];
  param.out_stride_dim2_ = out_strides[2];
  param.data_type_size_ = sizeof(float);
  DepthToSpaceForNHWC((const void *)input, output, in_shape, &param);
  for (int i = 0; i < kOutSize; ++i) {
    std::cout << output[i] << " ";
  }
  std::cout << "\n";
  CompareOutputData(output, expect_out, kOutSize, 0.000001);
 }
 }  // namespace mindspore