Browse Source

!10759 [MS][LITE][CPU]clean codex && fix bug of int8 add, depthwise conv

From: @fuzhiye
Reviewed-by: @zhanghaibo5,@hangangqiang
Signed-off-by: @hangangqiang
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 5 years ago
parent
commit
0525e98953
11 changed files with 20 additions and 12 deletions
  1. +1
    -1
      mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Row.S
  2. +3
    -1
      mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c
  3. +3
    -1
      mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c
  4. +1
    -1
      mindspore/lite/nnacl/int8/add_int8.c
  5. +3
    -1
      mindspore/lite/nnacl/int8/arithmetic_self_int8.c
  6. +1
    -1
      mindspore/lite/nnacl/x86_64_sse/ConvDwFp32IndirectRow.c
  7. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
  8. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32_grad/sigmoid_cross_entropy_with_logits_grad.cc
  9. +3
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc
  10. +1
    -1
      mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc
  11. +1
    -1
      mindspore/lite/src/runtime/kernel/npu/arithmetic_self_npu.cc

+ 1
- 1
mindspore/lite/nnacl/assembly/arm64/ConvDwInt8Row.S View File

@@ -101,7 +101,7 @@ mov x9, x4
ldrsb w14, [x7], #1
ldrsh w15, [x8], #2
ldr w16, [x0], #4
add w14, w14, w6
sub w14, w14, w6

sxth w14, w14
madd w14, w14, w15, w16


+ 3
- 1
mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c View File

@@ -112,7 +112,9 @@ int ElementNegativeFp16(float16_t *input, float16_t *output, int element_size) {

int ElementReciprocalFp16(float16_t *input, float16_t *output, int element_size) {
for (int i = 0; i < element_size; ++i) {
assert(input[i] != 0.0f);
if (input[i] == 0.0f) {
return NNACL_ERR;
}
output[i] = 1.f / input[i];
}
return NNACL_OK;


+ 3
- 1
mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c View File

@@ -132,7 +132,9 @@ int ElementNegative(const float *input, float *output, const int element_size) {

int ElementReciprocal(const float *input, float *output, const int element_size) {
for (int i = 0; i < element_size; ++i) {
assert(input[i] != 0.0f);
if (input[i] == 0.0f) {
return NNACL_ERR;
}
output[i] = 1.f / input[i];
}
return NNACL_OK;


+ 1
- 1
mindspore/lite/nnacl/int8/add_int8.c View File

@@ -183,7 +183,7 @@ void AddOptInt8(const int8_t *ptr_in, const int8_t element_in, int8_t *output, i
const int32x4_t ele_left_vec = vdupq_n_s32(ele_left_shift);

const int32x4_t ptr_right_vec = vdupq_n_s32(-ptr_args->right_shift_);
const int32x4_t ele_right_vec = vdupq_n_s32(-ptr_args->right_shift_);
const int32x4_t ele_right_vec = vdupq_n_s32(-ele_args->right_shift_);

const int32x4_t out_left_vec = vdupq_n_s32(params->out_left_shift_);
const int32x4_t out_right_vec = vdupq_n_s32(-params->out_right_shift_);


+ 3
- 1
mindspore/lite/nnacl/int8/arithmetic_self_int8.c View File

@@ -288,7 +288,9 @@ int Int8ElementReciprocal(int8_t *input, int8_t *output, int element_size, Arith
float bias = in_zp * in_scale;
for (int i = 0; i < element_size; i++) {
float input_f32 = input[i] * in_scale + bias;
assert(input_f32 != 0.0f);
if (input_f32 == 0.0f) {
return NNACL_ERR;
}
int32_t output_tmp = round(1.f / (input_f32 * out_scale)) + out_zp;
if (output_tmp > para.output_activation_max_) {
output[i] = para.output_activation_max_;


+ 1
- 1
mindspore/lite/nnacl/x86_64_sse/ConvDwFp32IndirectRow.c View File

@@ -24,7 +24,7 @@ void ConvDwFp32Avx5x5(float *output, float **input, const float *weights, const
input_stride /= sizeof(float *);
size_t c8 = UP_DIV(channels, C8NUM) * C8NUM;
size_t c8_mod = channels % C8NUM;
int kernel = 25;
const int kernel = 25;
for (int i = 0; i < output_width; ++i) {
float *in[kernel];
for (int k = 0; k < kernel; k++) {


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc View File

@@ -146,7 +146,7 @@ void PadCPUKernel::InitMirrorPadBlock() {
std::vector<int> pad_cord(pad_region.size());

for (int pos = 0; pos < remain_size; ++pos) {
int dst_basic_offset = 0;
const int dst_basic_offset = 0;

for (int index = 1; index < region_size; ++index) {
int dst_offset = dst_basic_offset;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/fp32_grad/sigmoid_cross_entropy_with_logits_grad.cc View File

@@ -34,8 +34,8 @@ int SigmoidCrossEntropyWithLogitsGradCPUKernel::Execute(int task_id) {
auto labels = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
auto dloss = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
auto *out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
float zero = 0.0f;
float one = 1.0f;
const float zero = 0.0f;
const float one = 1.0f;

size_t tensor_len = in_tensors_.at(0)->ElementsNum();
for (uint64_t i = 0; i < tensor_len; ++i) {


+ 3
- 1
mindspore/lite/src/runtime/kernel/arm/int8/add_int8.cc View File

@@ -143,7 +143,9 @@ void QuantizedAddCPUKernel::BroadcastRun(int task_id) {
if (real_out_count <= 0) {
return;
}
int8_t *cur_in0, *cur_in1, *cur_out;
int8_t *cur_in0 = nullptr;
int8_t *cur_in1 = nullptr;
int8_t *cur_out = nullptr;
for (int i = 0; i < real_out_count; i++) {
if (arith_para_->in_elements_num0_ == arith_para_->out_elements_num_) {
cur_in0 = input0_data_ + task_id * stride * in_size_ + i * in_size_;


+ 1
- 1
mindspore/lite/src/runtime/kernel/npu/arithmetic_npu.cc View File

@@ -68,7 +68,7 @@ ge::Operator *CreateOperator(const std::vector<ge::Operator *> &npu_inputs, cons
int ArithmeticNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) {
ge::Operator *op;
ge::Operator *op = nullptr;
switch (primitive_->Type()) {
case PrimitiveType_Mul:
op = CreateOperator<hiai::op::Mul>(npu_inputs, name_);


+ 1
- 1
mindspore/lite/src/runtime/kernel/npu/arithmetic_self_npu.cc View File

@@ -54,7 +54,7 @@ ge::Operator *CreateOperator(ge::Operator *input, const std::string &name) {
int ArithmeticSelfNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
const std::vector<ge::Operator *> &npu_inputs) {
ge::Operator *op;
ge::Operator *op = nullptr;
switch (primitive_->Type()) {
case PrimitiveType_Cos:
op = CreateOperator<hiai::op::Cos>(npu_inputs[0], name_);


Loading…
Cancel
Save