Add arm op sub for int8 and testcases

5 years ago · 62e93f158d
--- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.cc
@@ -0,0 +1,205 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "src/runtime/kernel/arm/int8/sub_int8.h"
 #include <limits>
 #include <algorithm>
 #include "src/runtime/kernel/arm/nnacl/arithmetic_common.h"
 #include "src/runtime/kernel/arm/nnacl/quantization/quantize.h"
 #include "src/runtime/runtime_api.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"

 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Sub;

 namespace mindspore::kernel {

 int SubInt8CPUKernel::Init() {
  lite::tensor::Tensor *input0 = in_tensors_.at(0);
  lite::tensor::Tensor *input1 = in_tensors_.at(1);
  lite::tensor::Tensor *output = out_tensors_.at(0);
  MS_ASSERT(input0);
  MS_ASSERT(input1);
  MS_ASSERT(output);

  broadcast_ = input0->ElementsNum() != input1->ElementsNum();

  param_.in0_args_.scale_ = input0->GetQuantParams().front().scale;
  param_.in0_args_.zp_ = -input0->GetQuantParams().front().zeroPoint;
  param_.in1_args_.scale_ = input1->GetQuantParams().front().scale;
  param_.in1_args_.zp_ = -input1->GetQuantParams().front().zeroPoint;
  param_.out_args_.scale_ = output->GetQuantParams().front().scale;
  param_.out_args_.zp_ = output->GetQuantParams().front().zeroPoint;

  const int left_shift = 20;
  const double twice_max_input_scale = 2 * std::max(param_.in0_args_.scale_, param_.in1_args_.scale_);
  const double real_input0_multiplier = param_.in0_args_.scale_ / twice_max_input_scale;
  const double real_input1_multiplier = param_.in1_args_.scale_ / twice_max_input_scale;
  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * param_.out_args_.scale_);

  QuantizeMultiplierSmallerThanOne(real_input0_multiplier, &param_.input0_multiplier_, &param_.input0_shift_);
  QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &param_.input1_multiplier_, &param_.input1_shift_);
  QuantizeMultiplierSmallerThanOne(real_output_multiplier, &param_.output_multiplier_, &param_.output_shift_);

  param_.output_activation_min_ = std::numeric_limits<int8_t>::min();
  param_.output_activation_max_ = std::numeric_limits<int8_t>::max();

  int left_shift0 = -param_.input0_shift_ > 0 ? -param_.input0_shift_ : 0;
  param_.right_shift0_ = -param_.input0_shift_ > 0 ? 0 : param_.input0_shift_;

  int left_shift1 = -param_.input1_shift_ > 0 ? -param_.input1_shift_ : 0;
  param_.right_shift1_ = -param_.input1_shift_ > 0 ? 0 : param_.input1_shift_;

  param_.left_shift_out_ = -param_.output_shift_ > 0 ? -param_.output_shift_ : 0;
  param_.right_shift_out_ = -param_.output_shift_ > 0 ? 0 : param_.output_shift_;

  param_.left_shift_result0_ = (1 << left_shift) * ((1 << left_shift0));
  param_.left_shift_result1_ = (1 << left_shift) * ((1 << left_shift1));

  MS_ASSERT(left_shift + left_shift0 == left_shift);
  MS_ASSERT(left_shift + left_shift1 == left_shift);

  if (!InferShapeDone()) {
    return RET_OK;
  }
  return ReSize();
 }

 int SubInt8CPUKernel::ReSize() {
  if (broadcast_) {
    if (tile0_data_ != nullptr) {
      if (context_ != nullptr && context_->allocator != nullptr) {
        context_->allocator->Free(tile0_data_);
      } else {
        free(tile0_data_);
      }
    }
    if (tile1_data_ != nullptr) {
      if (context_ != nullptr && context_->allocator != nullptr) {
        context_->allocator->Free(tile1_data_);
      } else {
        free(tile1_data_);
      }
    }

    if (context_ != nullptr && context_->allocator != nullptr) {
      tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
      tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size()));
    } else {
      tile0_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
      tile1_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size()));
    }

    if (tile0_data_ == nullptr || tile1_data_ == nullptr) {
      MS_LOG(ERROR) << "malloc memroy fail!";
      return RET_ERROR;
    }
  }
  return RET_OK;
 }

 int SubInt8CPUKernel::DoExecute(int task_id) {
  auto input0_data_ = static_cast<int8_t *>(in_tensors_.at(0)->Data());
  auto input1_data_ = static_cast<int8_t *>(in_tensors_.at(1)->Data());
  auto output_data_ = static_cast<int8_t *>(out_tensors_.at(0)->Data());
  auto element_num = out_tensors_[0]->ElementsNum();

  MS_ASSERT(op_parameter_->thread_num_ != 0);
  int stride = UP_DIV(element_num, op_parameter_->thread_num_);
  int count = MSMIN(stride, element_num - stride * task_id);

  auto ret = RET_OK;
  if (broadcast_) {
    ret = SubInt8(tile0_data_ + task_id * count, tile1_data_ + task_id * count, output_data_ + task_id * count, count,
                  &param_);
  } else {
    ret = SubInt8(input0_data_ + task_id * count, input1_data_ + task_id * count, output_data_ + task_id * count, count,
                  &param_);
  }

  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Subint8 function error error_code[" << ret << "]";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int SubInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
  auto sub_kernel = reinterpret_cast<SubInt8CPUKernel *>(cdata);
  auto ret = sub_kernel->DoExecute(task_id);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "SubInt8 DoExecute error task_id[" << task_id << "] error_code[" << ret << "]";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int SubInt8CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Prepare failed.";
    return RET_ERROR;
  }

  if (broadcast_) {
    ArithmeticParameter tile_para = {0};
    tile_para.ndim_ = out_tensors_.at(0)->shape().size();
    for (size_t i = 0; i < tile_para.ndim_; i++) {
      tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i);
      tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i);
      tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i);
    }
    TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()),
                        static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_),
                        reinterpret_cast<uint8_t *>(tile1_data_), &tile_para);
  }
  ret = LiteBackendParallelLaunch(SubInt8Run, this, op_parameter_->thread_num_);

  if (ret != RET_OK) {
    MS_LOG(ERROR) << "SubInt8Run function error error_code[" << ret << "]";
    return RET_ERROR;
  }
  return RET_OK;
 }

 kernel::LiteKernel *CpuSubInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
                                            const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter,
                                            const lite::Context *ctx, const KernelKey &desc,
                                            const lite::Primitive *primitive) {
  if (parameter == nullptr || ctx == nullptr) {
    MS_LOG(ERROR) << "parameter or ctx is nullptr";
    return nullptr;
  }
  MS_ASSERT(desc.type == PrimitiveType_Sub);
  auto *kernel = new (std::nothrow) SubInt8CPUKernel(parameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "kernel is nullptr.";
    return nullptr;
  }
  auto ret = kernel->Init();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
                  << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
    delete kernel;
    return nullptr;
  }
  return kernel;
 }

 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Sub, CpuSubInt8KernelCreator)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h
@@ -0,0 +1,46 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SUB_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SUB_INT8_H_

 #include <vector>
 #include "src/lite_kernel.h"
 #include "src/runtime/kernel/arm/nnacl/int8/sub_int8.h"
 #include "src/runtime/runtime_api.h"

 namespace mindspore::kernel {
 class SubInt8CPUKernel : public LiteKernel {
 public:
  explicit SubInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                            const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                            const lite::Primitive *primitive)
      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~SubInt8CPUKernel() override {}

  int Init() override;
  int ReSize() override;
  int Run() override;
  int DoExecute(int task_id);

 private:
  SubQuantArg param_;
  int8_t *tile0_data_ = nullptr;
  int8_t *tile1_data_ = nullptr;
  bool broadcast_ = false;
 };
 }  // namespace mindspore::kernel

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SUB_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/add_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/add_int8.h
@@ -48,6 +48,8 @@ void AddInt8(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int6
 #ifdef ENABLE_NEON
 #include <arm_neon.h>
 int16x8_t LoadAndAddOffset(int8_t *data, int index, int offset);
 int32x4_t ClacScaledInput(int32x4_t input, int32x4_t left_shift_result_vec, int32x4_t input_multiplier_vec,
                          int32x4_t right_shift_vec);
 #endif

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ADD_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/sub_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/sub_int8.cc
@@ -0,0 +1,104 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "nnacl/int8/sub_int8.h"
 #ifdef ENABLE_NEON
 #include <arm_neon.h>
 #include "nnacl/add_int8.h"
 #endif
 #include "nnacl/quantization/fixed_point.h"

 #ifdef ENABLE_NEON

 int16x4_t ClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec,
                          int32x4_t output_multiplier_vec, SubQuantArg *para) {
  int32x4_t raw_data = vsubq_s32(scaled_input0, scaled_input1);

  raw_data = RoundingDivideByPOTInt32x4(vqrdmulhq_s32(vmulq_s32(raw_data, left_shift_out_vec), output_multiplier_vec),
                                        para->right_shift_out_);
  raw_data = vaddq_s32(raw_data, vdupq_n_s32(para->out_args_.zp_));
  raw_data = vmaxq_s32(raw_data, vdupq_n_s32(para->output_activation_min_));
  raw_data = vminq_s32(raw_data, vdupq_n_s32(para->output_activation_max_));
  return vqmovn_s32(raw_data);
 }

 void SubInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
                 SubQuantArg *para, int *index) {
  int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_);
  int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_);
  int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_);
  int32x4_t input1_multiplier_vec = vdupq_n_s32(para->input1_multiplier_);
  int32x4_t output_multiplier_vec = vdupq_n_s32(para->output_multiplier_);
  int32x4_t left_shift_out_vec = vdupq_n_s32((1 << para->left_shift_out_));
  int32x4_t right_shift0_vec = vdupq_n_s32(-para->right_shift0_);
  int32x4_t right_shift1_vec = vdupq_n_s32(-para->right_shift1_);

  for (; (*index) <= real_dst_count - 8; (*index) += 8) {
    int16x8_t input0_val = LoadAndAddOffset(input0_data, *index, para->in0_args_.zp_);
    int16x8_t input1_val = LoadAndAddOffset(input1_data, *index, para->in1_args_.zp_);

    int32x4_t input0_low = vmovl_s16(vget_low_s16(input0_val));
    int32x4_t input0_high = vmovl_s16(vget_high_s16(input0_val));
    int32x4_t input1_low = vmovl_s16(vget_low_s16(input1_val));
    int32x4_t input1_high = vmovl_s16(vget_high_s16(input1_val));

    int32x4_t scaled_input0_low =
      ClacScaledInput(input0_low, left_shift_result0_vec, input0_multiplier_vec, right_shift0_vec);
    int32x4_t scaled_input0_high =
      ClacScaledInput(input0_high, left_shift_result0_vec, input0_multiplier_vec, right_shift0_vec);
    int32x4_t scaled_input1_low =
      ClacScaledInput(input1_low, left_shift_result1_vec, input1_multiplier_vec, right_shift1_vec);
    int32x4_t scaled_input1_high =
      ClacScaledInput(input1_high, left_shift_result1_vec, input1_multiplier_vec, right_shift1_vec);

    int16x4_t sum_low =
      ClacSumHalfWord(scaled_input0_low, scaled_input1_low, left_shift_out_vec, output_multiplier_vec, para);
    int16x4_t sum_high =
      ClacSumHalfWord(scaled_input0_high, scaled_input1_high, left_shift_out_vec, output_multiplier_vec, para);

    int16x8_t res_s16 = vcombine_s16(sum_low, sum_high);
    int8x8_t res_u8_n0 = vqmovn_s16(res_s16);
    vst1_s8(output_data + *index, res_u8_n0);
  }
 }
 #endif

 int SubInt8(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, SubQuantArg *para) {
  int index = 0;
 #ifdef ENABLE_NEON
  SubInt8NEON(input0_data, input1_data, output_data, real_dst_count, para, &index);
 #endif
  for (; index < real_dst_count; ++index) {
    const int32_t input0_val = para->in0_args_.zp_ + input0_data[index];
    const int32_t input1_val = para->in1_args_.zp_ + input1_data[index];
    const int32_t shifted_input0_val = input0_val * para->left_shift_result0_;
    const int32_t shifted_input1_val = input1_val * para->left_shift_result1_;
    const int32_t scaled_input0_val = RoundingDivideByPOT(
      SaturatingRoundingDoublingHighMul(shifted_input0_val, para->input0_multiplier_), para->right_shift0_);
    const int32_t scaled_input1_val = RoundingDivideByPOT(
      SaturatingRoundingDoublingHighMul(shifted_input1_val, para->input1_multiplier_), para->right_shift1_);

    const int32_t raw_data = scaled_input0_val - scaled_input1_val;
    const int32_t raw_output =
      RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(raw_data * (1 << (unsigned int)para->left_shift_out_),
                                                            para->output_multiplier_),
                          para->right_shift_out_) +
      para->out_args_.zp_;

    output_data[index] = (int8_t)MSMAX(para->output_activation_min_, MSMIN(raw_output, para->output_activation_max_));
  }
  return 0;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/sub_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/sub_int8.h
@@ -0,0 +1,25 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_SUB_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_SUB_INT8_H_

 #include "nnacl/op_base.h"
 #include "nnacl/quantization/quantize.h"

 int SubInt8(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, SubQuantArg *para);

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_SUB_INT8_H_
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/quantization/quantize.h
@@ -173,6 +173,26 @@ typedef struct QuantMulArg {
  int right_shift_;
 } QuantMulArg;

 typedef struct SubQuantArg {
  QuantArg in0_args_;
  QuantArg in1_args_;
  QuantArg out_args_;
  int output_activation_min_;
  int output_activation_max_;
  int input0_multiplier_;
  int input1_multiplier_;
  int output_multiplier_;
  int input0_shift_;
  int input1_shift_;
  int output_shift_;
  int left_shift_result0_;
  int left_shift_result1_;
  int right_shift0_;
  int right_shift1_;
  int left_shift_out_;
  int right_shift_out_;
 } SubQuantArg;

 void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);

 inline void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/arithmetic_fp32_tests.cc
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/sub_int_tests.cc
@@ -0,0 +1,74 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <iostream>
 #include <memory>
 #include "common/common_test.h"
 #include "mindspore/lite/src/runtime/kernel/arm/int8/sub_int8.h"
 #include "mindspore/lite/src/kernel_registry.h"
 #include "mindspore/lite/include/context.h"

 namespace mindspore {
 class TestSubInt8 : public mindspore::CommonTest {
 public:
  TestSubInt8() {}
 };

 TEST_F(TestSubInt8, SubInt8) {
  lite::tensor::Tensor in_tensor0(kNumberTypeInt8, {1, 1, 2, 5});
  lite::tensor::Tensor in_tensor1(kNumberTypeInt8, {1, 1, 1, 5});
  lite::tensor::Tensor out_tensor(kNumberTypeInt8, {1, 1, 2, 5});

  int8_t input_data0[] = {105, 35, -27, 0, -63, 99, 16, 122, 67, -49};
  int8_t input_data1[] = {24, -38, -115, 106, -98};
  int8_t output_data[10] = {0};
  in_tensor0.SetData(input_data0);
  in_tensor1.SetData(input_data1);
  out_tensor.SetData(output_data);

  const lite::tensor::QuantArg quant_in0 = {0.00784314f, 0};  // -1.0--1.0 -> 0--255
  const lite::tensor::QuantArg quant_in1 = {0.00784314f, 0};
  const lite::tensor::QuantArg quant_out = {0.00784314f, 0};
  in_tensor0.AddQuantParam(quant_in0);
  in_tensor1.AddQuantParam(quant_in1);
  out_tensor.AddQuantParam(quant_out);

  std::vector<lite::tensor::Tensor *> inputs = {&in_tensor0, &in_tensor1};
  std::vector<lite::tensor::Tensor *> outputs = {&out_tensor};

  OpParameter parameter = {};
  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Sub};

  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
  ASSERT_NE(creator, nullptr);

  auto ctx = std::make_shared<lite::Context>();
  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
  ASSERT_NE(kernel, nullptr);

  auto ret = kernel->Run();
  EXPECT_EQ(0, ret);

  int8_t expect0[10] = {81, 73, 88, -106, 35, 75, 54, 127, -39, 49};
  for (int i = 0; i < 10; ++i) {
    EXPECT_EQ(output_data[i], expect0[i]);
  }

  in_tensor0.SetData(nullptr);
  in_tensor1.SetData(nullptr);
  out_tensor.SetData(nullptr);
 }
 }  // namespace mindspore