!8067 [MSLITE][Develop] add L2Norm int8 kernel

Merge pull request !8067 from sunsuodong/add_l2_norm_int8
5 years ago · a002c4a3f4
--- a/mindspore/lite/nnacl/int8/l2_norm_int8.c
+++ b/mindspore/lite/nnacl/int8/l2_norm_int8.c
@@ -0,0 +1,81 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <limits.h>
 #include "nnacl/int8/l2_norm_int8.h"
 #include "nnacl/quantization/fixed_point.h"
 #include "nnacl/errorcode.h"

 void GetSqrtQuantMultiplierExp(int32_t input, int reverse_shift, int32_t *multiplier, int32_t *shift) {
  if (input <= 1) {
    *multiplier = INT_MAX;
    *shift = 0;
  }
  *shift = 11;
  while (input >= (1 << 29)) {
    input /= 4;
    ++*shift;
  }
  int max_left_shift_bits = CountLeadingSignBits(input);
  int left_shift_bit_pairs = max_left_shift_bits / 2 - 1;
  *shift -= left_shift_bit_pairs;
  input <<= 2 * left_shift_bit_pairs;
  int32_t fixedpoint_f3_input = input >> 1;  // sign: 1 bit, integer: 3 bit, fractional: 28 bit
  int32_t fp_f3_half_input = SaturatingRoundingMultiplyByPOT(fixedpoint_f3_input, -1);
  int32_t fp_f3_half_three = (1 << 28) + (1 << 27);
  int32_t tmp = (1 << 28);  // one
  for (int i = 0; i < 5; i++) {
    int32_t tmp3 = Rescale(SaturatingRoundingDoublingHighMul(tmp, SaturatingRoundingDoublingHighMul(tmp, tmp)), 9, 3);
    tmp = Rescale(SaturatingRoundingDoublingHighMul(fp_f3_half_three, tmp) -
                    SaturatingRoundingDoublingHighMul(fp_f3_half_input, tmp3),
                  6, 3);
  }
  int32_t fp_f0_half_sqrt_2 = 1518500250;  // sqrt(2) / 2
  tmp = SaturatingRoundingDoublingHighMul(tmp, fp_f0_half_sqrt_2);
  *multiplier = tmp;
  if (*shift < 0) {
    *multiplier <<= -*shift;
    *shift = 0;
  }
  *shift *= reverse_shift;
 }

 int32_t MultiplyByQuantizedMultiplier2(int32_t input, int32_t multiplier, int shift) {
  int left_shift = MSMAX(shift, 0);
  int right_shift = MSMAX(-shift, 0);
  return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(input * (1 << left_shift), multiplier), right_shift);
 }

 int L2NormalizationInt8(const int8_t *input_data, int8_t *output_data, const L2NormParameter *param,
                        const L2NormQuantArg *quant_param, const int begin, const int end) {
  const int inner_size = param->shape_[param->shape_num_ - 1];

  for (int i = begin; i < end; ++i) {
    int32_t square_sum = 0.0f;
    for (int j = 0; j < inner_size; ++j) {
      int32_t in = input_data[i * inner_size + j] - quant_param->in_.zp_;
      square_sum += in * in;
    }
    int32_t multiplier;
    int32_t shift;
    GetSqrtQuantMultiplierExp(square_sum, -1, &multiplier, &shift);
    for (int k = 0; k < inner_size; ++k) {
      int32_t in = input_data[i * inner_size + k] - quant_param->in_.zp_;
      int32_t out = MultiplyByQuantizedMultiplier2(in, multiplier, shift + 7);
      output_data[i * inner_size + k] = MSMIN(127, MSMAX(-128, out));
    }
  }
  return NNACL_OK;
 }
--- a/mindspore/lite/nnacl/int8/l2_norm_int8.h
+++ b/mindspore/lite/nnacl/int8/l2_norm_int8.h
@@ -0,0 +1,32 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_NNACL_INT8_L2_NORM_INT8_H_
 #define MINDSPORE_LITE_NNACL_INT8_L2_NORM_INT8_H_

 #include "nnacl/l2_norm_parameter.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

 int L2NormalizationInt8(const int8_t *input_data, int8_t *output_data, const L2NormParameter *param,
                        const L2NormQuantArg *quant_param, const int begin, const int end);

 #ifdef __cplusplus
 }
 #endif

 #endif  // MINDSPORE_LITE_NNACL_INT8_L2_NORM_INT8_H_
--- a/mindspore/lite/nnacl/l2_norm_parameter.h
+++ b/mindspore/lite/nnacl/l2_norm_parameter.h
@@ -13,11 +13,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_NNACL_L2NORM_PARAMETER_H_
 #define MINDSPORE_LITE_NNACL_L2NORM_PARAMETER_H_

 #include "nnacl/op_base.h"
 #include "nnacl/quantization/quantize.h"

 typedef struct L2NormParameter {
  OpParameter op_parameter_;
@@ -30,4 +30,9 @@ typedef struct L2NormParameter {
  ActType act_type_;
 } L2NormParameter;

 typedef struct {
  QuantArg in_;
  QuantArg out_;
 } L2NormQuantArg;

 #endif  // MINDSPORE_LITE_NNACL_L2NORM_PARAMETER_H_
--- a/mindspore/lite/nnacl/quantization/fixed_point.h
+++ b/mindspore/lite/nnacl/quantization/fixed_point.h
@@ -42,6 +42,8 @@ int RoundingDivideByPOT(int x, int exponent);

 int MultiplyByQuantizedMultiplier(int32_t value, int32_t multiplier, int32_t left_shift, int32_t right_shift);

 int SaturatingRoundingMultiplyByPOT(int32_t x, int exponent);

 int32_t Rescale(int x, int kIntegerBitsSrc, int kIntegerBitsDst);

 int CountLeadingSignBits(int32_t x);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm.h
@@ -50,10 +50,12 @@ class L2NormCPUKernel : public LiteKernel {
  int ReSize() override;
  int Run() override;

 protected:
  L2NormParameter *l2_norm_param_;

 private:
  int MallocTmpBuffer();
  void FreeTmpBuffer();
  L2NormParameter *l2_norm_param_;
  float sqrt_sum_;
  float *input_ptr_;
  float *output_ptr_;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.cc
@@ -0,0 +1,91 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/int8/l2_norm_int8.h"
 #include "src/runtime/runtime_api.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"

 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_L2Norm;

 namespace mindspore::kernel {
 int L2NormInt8CPUKernel::Init() {
  lite::Tensor *input = in_tensors_.at(0);
  lite::Tensor *output = out_tensors_.at(0);
  MS_ASSERT(input);
  MS_ASSERT(output);

  quant_param_.in_.scale_ = input->GetQuantParams().front().scale;
  quant_param_.in_.zp_ = input->GetQuantParams().front().zeroPoint;
  quant_param_.out_.scale_ = output->GetQuantParams().front().scale;
  quant_param_.out_.zp_ = output->GetQuantParams().front().zeroPoint;
  return ReSize();
 }

 int L2NormInt8Run(void *cdata, int task_id) {
  auto kernel = reinterpret_cast<L2NormInt8CPUKernel *>(cdata);
  kernel->DoExecute(task_id);
  return lite::RET_OK;
 }

 int L2NormInt8CPUKernel::Run() {
  if (l2_norm_param_->axis_num_ != 1 || l2_norm_param_->axis_[0] != static_cast<int>(l2_norm_param_->shape_num_) - 1) {
    MS_LOG(ERROR) << "L2Norm only support reduce on all axis and trailing axis with trailing axis";
    return RET_ERROR;
  }
  auto ret = ParallelLaunch(context_->thread_pool_, L2NormInt8Run, this, context_->thread_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
  }
  return ret;
 }

 int L2NormInt8CPUKernel::DoExecute(int task_id) {
  lite::Tensor *input_tensor = in_tensors().front();
  int outer_size = input_tensor->ElementsNum() / input_tensor->shape().back();
  int stride = UP_DIV(outer_size, context_->thread_num_);
  int begin = task_id * stride;
  int end = MSMIN(begin + stride, outer_size);

  int8_t *input_data = static_cast<int8_t *>(in_tensors().front()->MutableData());
  int8_t *output_data = static_cast<int8_t *>(out_tensors().front()->MutableData());
  return L2NormalizationInt8(input_data, output_data, l2_norm_param_, &quant_param_, begin, end);
 }

 kernel::LiteKernel *CpuL2NormInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
                                               const lite::InnerContext *ctx, const KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
  auto *kernel = new (std::nothrow) L2NormInt8CPUKernel(parameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "kernel is nullptr.";
    free(parameter);
    return nullptr;
  }
  auto ret = kernel->Init();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
                  << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
    delete kernel;
    return nullptr;
  }
  return kernel;
 }

 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_L2Norm, CpuL2NormInt8KernelCreator)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/l2_norm_int8.h
@@ -0,0 +1,41 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_L2_NORM_INT8_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_L2_NORM_INT8_H_

 #include <vector>
 #include "src/runtime/kernel/arm/fp32/l2_norm.h"
 #include "nnacl/int8/l2_norm_int8.h"

 namespace mindspore::kernel {
 class L2NormInt8CPUKernel : public L2NormCPUKernel {
 public:
  explicit L2NormInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                               const mindspore::lite::PrimitiveC *primitive)
      : L2NormCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~L2NormInt8CPUKernel() {}

  int Init() override;
  int Run() override;
  int DoExecute(int tId);

 private:
  L2NormQuantArg quant_param_;
 };
 }  // namespace mindspore::kernel

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_L2_NORM_INT8_H_
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/l2_norm_int8_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/l2_norm_int8_tests.cc
@@ -0,0 +1,73 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include <iostream>
 #include <memory>
 #include "schema/inner/model_generated.h"
 #include "common/common_test.h"
 #include "mindspore/lite/src/kernel_registry.h"
 #include "mindspore/lite/include/context.h"
 #include "nnacl/l2_norm_parameter.h"

 namespace mindspore {
 class TestL2NormInt8 : public mindspore::CommonTest {
 public:
  TestL2NormInt8() {}
  L2NormParameter param_;
 };

 TEST_F(TestL2NormInt8, norm) {
  lite::Tensor in_tensor(kNumberTypeInt8, {2, 1, 1, 5});
  lite::Tensor out_tensor(kNumberTypeInt8, {2, 1, 1, 5});
  // -6.0 -4.5 -3.0 -1.5 0 1.0 2.5 3.5 4.0 6.0
  int8_t input_data[] = {-128, -96, -64, -32, 0, 21, 53, 74, 85, 127};
  int8_t output_data[10] = {0};
  in_tensor.set_data(input_data);
  out_tensor.set_data(output_data);

  const lite::QuantArg quant_in = {0.0470588244497776f, 0};
  const lite::QuantArg quant_out = {0.0078125f, 0};
  in_tensor.AddQuantParam(quant_in);
  out_tensor.AddQuantParam(quant_out);

  std::vector<lite::Tensor *> inputs = {&in_tensor};
  std::vector<lite::Tensor *> outputs = {&out_tensor};

  param_.axis_num_ = 1;
  param_.axis_ = reinterpret_cast<int *>(malloc(sizeof(int)));
  param_.axis_[0] = -1;
  param_.epsilon_ = 1e-6;
  param_.act_type_ = ActType_No;
  param_.shape_ = nullptr;
  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_L2Norm};

  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
  ASSERT_NE(creator, nullptr);

  auto ctx = std::make_shared<lite::InnerContext>();
  ASSERT_EQ(lite::RET_OK, ctx->Init());
  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&param_), ctx.get(), desc, nullptr);
  ASSERT_NE(kernel, nullptr);

  auto ret = kernel->Run();
  EXPECT_EQ(0, ret);
  int8_t expect[10] = {-93, -70, -47, -23, 0, 15, 38, 53, 61, 91};
  for (int i = 0; i < 10; ++i) {
    EXPECT_EQ(output_data[i], expect[i]);
  }
  in_tensor.set_data(nullptr);
  out_tensor.set_data(nullptr);
 }
 }  // namespace mindspore