Browse Source

!11044 [MSLITE] nnacl optimize : delete clip

From: @ling_qiao_min
Reviewed-by: 
Signed-off-by:
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 4 years ago
parent
commit
175a17216a
31 changed files with 83 additions and 243 deletions
  1. +1
    -1
      mindspore/lite/nnacl/base/batch_to_space_base.c
  2. +8
    -12
      mindspore/lite/nnacl/base/batch_to_space_base.h
  3. +16
    -7
      mindspore/lite/nnacl/base/squeeze_base.h
  4. +5
    -5
      mindspore/lite/nnacl/base/unstack_base.c
  5. +7
    -4
      mindspore/lite/nnacl/base/unstack_base.h
  6. +10
    -12
      mindspore/lite/nnacl/base/zeroslike_base.h
  7. +0
    -11
      mindspore/lite/nnacl/batch_to_space.h
  8. +2
    -2
      mindspore/lite/nnacl/fp32/topk_fp32.c
  9. +4
    -5
      mindspore/lite/nnacl/int8/squeeze_int8.c
  10. +3
    -3
      mindspore/lite/nnacl/int8/squeeze_int8.h
  11. +0
    -35
      mindspore/lite/nnacl/squeeze.c
  12. +0
    -21
      mindspore/lite/nnacl/squeeze_parameter.h
  13. +3
    -11
      mindspore/lite/nnacl/unstack_parameter.h
  14. +0
    -20
      mindspore/lite/nnacl/zeroslike.c
  15. +0
    -1
      mindspore/lite/src/ops/clip.cc
  16. +0
    -42
      mindspore/lite/src/ops/populate/clip_populate.cc
  17. +0
    -3
      mindspore/lite/src/ops/populate/squeeze_populate.cc
  18. +1
    -1
      mindspore/lite/src/ops/populate/unstack_populate.cc
  19. +1
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.h
  20. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
  21. +1
    -17
      mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc
  22. +1
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h
  23. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc
  24. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.h
  25. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc
  26. +1
    -0
      mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h
  27. +1
    -5
      mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
  28. +5
    -5
      mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
  29. +1
    -0
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc
  30. +1
    -1
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc
  31. +2
    -7
      mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc

mindspore/lite/nnacl/batch_to_space.c → mindspore/lite/nnacl/base/batch_to_space_base.c View File

@@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */


#include "nnacl/batch_to_space.h"
#include "nnacl/base/batch_to_space_base.h"


void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block,
int data_size) { int data_size) {

mindspore/lite/nnacl/squeeze.h → mindspore/lite/nnacl/base/batch_to_space_base.h View File

@@ -13,25 +13,21 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#ifndef MINDSPORE_LITE_NNACL_BATCH_TO_SPACE_BASE_H_
#define MINDSPORE_LITE_NNACL_BATCH_TO_SPACE_BASE_H_


#ifndef MINDSPORE_LITE_NNACL_SQUEEZE_H_
#define MINDSPORE_LITE_NNACL_SQUEEZE_H_

#include <string.h>
#include "nnacl/op_base.h" #include "nnacl/op_base.h"


typedef struct SqueezeParameter {
// primitive parameter
OpParameter op_parameter_;
int axes_[8];
} SqueezeParameter;

#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int DoSqueeze(const float *input_ptr, float *output_ptr, size_t data_size);
int DoSqueezeInt32(int32_t *in_data, int32_t *out_data, size_t data_size);
void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block,
int data_size);
void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block,
const int *crops, int data_size);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


#endif // MINDSPORE_LITE_NNACL_SQUEEZE_H_
#endif // MINDSPORE_LITE_NNACL_BATCH_TO_SPACE_BASE_H_

mindspore/lite/nnacl/clip.c → mindspore/lite/nnacl/base/squeeze_base.h View File

@@ -14,16 +14,25 @@
* limitations under the License. * limitations under the License.
*/ */


#include "nnacl/clip.h"
#ifndef MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_
#define MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_

#include "nnacl/errorcode.h" #include "nnacl/errorcode.h"


int ClipFp32(const float *src, int length, float *dst, float min_val, float max_val) {
if (max_val <= min_val) {
#ifdef __cplusplus
extern "C" {
#endif

inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) {
if (input_ptr == NULL || output_ptr == NULL) {
return NNACL_ERR; return NNACL_ERR;
} }
int i = 0;
for (; i < length; ++i) {
dst[i] = src[i] < min_val ? min_val : (src[i] > max_val ? max_val : src[i]);
}
(void)memcpy(output_ptr, input_ptr, data_size);
return NNACL_OK; return NNACL_OK;
} }

#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_

mindspore/lite/nnacl/unstack.c → mindspore/lite/nnacl/base/unstack_base.c View File

@@ -14,16 +14,16 @@
* limitations under the License. * limitations under the License.
*/ */


#include "nnacl/unstack.h"
#include <string.h>
#include "nnacl/base/unstack_base.h"


void Unistack(const float *input, float **output, UnstackParameter *para) {
void Unstack(const void *input, void **output, UnstackParameter *para, int data_size) {
const int8_t *in_addr = (int8_t *)input;
for (int j = 0; j < para->num_; j++) { for (int j = 0; j < para->num_; j++) {
float *out_addr = output[j];
int8_t *out_addr = (int8_t *)output[j];
int out_offset = 0; int out_offset = 0;
for (int i = 0; i < para->pre_dims_; i++) { for (int i = 0; i < para->pre_dims_; i++) {
int in_offset = i * para->axis_dim_ * para->after_dims_ + j * para->after_dims_; int in_offset = i * para->axis_dim_ * para->after_dims_ + j * para->after_dims_;
(void)memcpy(out_addr + out_offset, input + in_offset, para->after_dims_ * sizeof(float));
(void)memcpy(out_addr + out_offset * data_size, in_addr + in_offset * data_size, para->after_dims_ * data_size);
out_offset += para->after_dims_; out_offset += para->after_dims_;
} }
} }

mindspore/lite/nnacl/zeroslike.h → mindspore/lite/nnacl/base/unstack_base.h View File

@@ -13,17 +13,20 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#ifndef MINDSPORE_LITE_NNACL_ZEROSLIKE_H_
#define MINDSPORE_LITE_NNACL_ZEROSLIKE_H_


#ifndef MINDSPORE_LITE_NNACL_UNSTACK_H_
#define MINDSPORE_LITE_NNACL_UNSTACK_H_

#include <string.h>
#include "nnacl/op_base.h" #include "nnacl/op_base.h"
#include "nnacl/unstack_parameter.h"


#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
void ApproximateZerosLike(float *output, int number);
void Unstack(const void *input, void **output, UnstackParameter *para, int data_size);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


#endif // MINDSPORE_LITE_NNACL_ZEROSLIKE_H_
#endif // MINDSPORE_LITE_NNACL_UNSTACK_H_

mindspore/lite/nnacl/clip.h → mindspore/lite/nnacl/base/zeroslike_base.h View File

@@ -13,24 +13,22 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#ifndef MINDSPORE_LITE_NNACL_CLIP_H_
#define MINDSPORE_LITE_NNACL_CLIP_H_
#ifndef MINDSPORE_LITE_NNACL_ZEROSLIKE_BASE_H_
#define MINDSPORE_LITE_NNACL_ZEROSLIKE_BASE_H_


#include <math.h>
#include "nnacl/op_base.h" #include "nnacl/op_base.h"
#include "mindspore/lite/nnacl/int8/fixed_point.h"

typedef struct ClipParameter {
OpParameter op_parameter_;
float min_val_;
float max_val_;
} ClipParameter;


#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
int ClipFp32(const float *src, int length, float *dst, float min_val, float max_val);

static inline void ApproximateZerosLike(void *output, int number, int data_size) {
memset(output, 0.0, number * data_size);
return;
}

#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif // MINDSPORE_LITE_NNACL_CLIP_H_

#endif // MINDSPORE_LITE_NNACL_ZEROSLIKE_BASE_H_

+ 0
- 11
mindspore/lite/nnacl/batch_to_space.h View File

@@ -29,15 +29,4 @@ typedef struct BatchToSpaceParameter {
bool no_crop_; bool no_crop_;
} BatchToSpaceParameter; } BatchToSpaceParameter;


#ifdef __cplusplus
extern "C" {
#endif
void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block,
int data_size);
void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block,
const int *crops, int data_size);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_NNACL_FP32_BATCH_TO_SPACE_H_ #endif // MINDSPORE_LITE_NNACL_FP32_BATCH_TO_SPACE_H_

+ 2
- 2
mindspore/lite/nnacl/fp32/topk_fp32.c View File

@@ -30,7 +30,7 @@ int DescendCmp(const void *a, const void *b) {
} }
} }


int AscendCmp(const void *a, const void *b) {
int IndexSortCmp(const void *a, const void *b) {
if (((const TopkNode *)a)->index > ((const TopkNode *)b)->index) { if (((const TopkNode *)a)->index > ((const TopkNode *)b)->index) {
return 1; return 1;
} else { } else {
@@ -54,7 +54,7 @@ void Topk(float *input_data, float *output_data, int32_t *output_index, TopkPara
} }
qsort(top_map, last_dim_size, sizeof(top_map[0]), DescendCmp); qsort(top_map, last_dim_size, sizeof(top_map[0]), DescendCmp);
if (!parameter->sorted_) { if (!parameter->sorted_) {
qsort(top_map, k, sizeof(top_map[0]), AscendCmp);
qsort(top_map, k, sizeof(top_map[0]), IndexSortCmp);
} }
for (int m = 0; m < k; m++) { for (int m = 0; m < k; m++) {
cur_output_data[m] = top_map[m].element; cur_output_data[m] = top_map[m].element;


+ 4
- 5
mindspore/lite/nnacl/int8/squeeze_int8.c View File

@@ -13,19 +13,18 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#include "nnacl/squeeze_parameter.h"
#include "nnacl/int8/squeeze_int8.h" #include "nnacl/int8/squeeze_int8.h"
#include <string.h>


void SqueezeInt8(const int8_t *input_ptr, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
SqueezeParameter *para_, const int num) {
void SqueezeInt8(const int8_t *input_ptr, int8_t *output_ptr, const SqueezeQuantArg *quant_Squeeze_parm, int num,
int task_id, int thread_count) {
float output_scale = quant_Squeeze_parm->out_quant_args_->scale_; float output_scale = quant_Squeeze_parm->out_quant_args_->scale_;
const float output_inverse_scale = 1.f / output_scale; const float output_inverse_scale = 1.f / output_scale;
QuantArg *input_quant = quant_Squeeze_parm->in_quant_args_; QuantArg *input_quant = quant_Squeeze_parm->in_quant_args_;
int output_zp = quant_Squeeze_parm->out_quant_args_->zp_; int output_zp = quant_Squeeze_parm->out_quant_args_->zp_;


const int i = 0; const int i = 0;
for (int j = task_id; j < num; j += para_->op_parameter_.thread_num_) {
for (int j = task_id; j < num; j += thread_count) {
float scale = input_quant[i].scale_ * output_inverse_scale; float scale = input_quant[i].scale_ * output_inverse_scale;
float bias = -input_quant[i].zp_ * scale; float bias = -input_quant[i].zp_ * scale;
int32_t output_tmp = round(input_ptr[j] * scale + bias) + output_zp; int32_t output_tmp = round(input_ptr[j] * scale + bias) + output_zp;


+ 3
- 3
mindspore/lite/nnacl/int8/squeeze_int8.h View File

@@ -17,14 +17,14 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_SQUEEZE_INT8_H_ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_SQUEEZE_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_SQUEEZE_INT8_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_SQUEEZE_INT8_H_


#include "nnacl/int8/quantize.h"
#include "nnacl/squeeze_parameter.h" #include "nnacl/squeeze_parameter.h"
#include "nnacl/int8/quantize.h"


#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
void SqueezeInt8(const int8_t *input_ptr, int8_t *output_ptr, int task_id, SqueezeQuantArg *quant_Squeeze_parm,
SqueezeParameter *para_, const int num);
void SqueezeInt8(const int8_t *input_ptr, int8_t *output_ptr, const SqueezeQuantArg *quant_Squeeze_parm, int num,
int task_id, int thread_count);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif


+ 0
- 35
mindspore/lite/nnacl/squeeze.c View File

@@ -1,35 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "nnacl/squeeze.h"
#include <string.h>
#include "nnacl/errorcode.h"

int DoSqueeze(const float *in_data, float *out_data, size_t data_size) {
if (in_data == NULL || out_data == NULL) {
return NNACL_ERR;
}
(void)memcpy(out_data, in_data, data_size);
return NNACL_OK;
}

int DoSqueezeInt32(int32_t *in_data, int32_t *out_data, size_t data_size) {
if (in_data == NULL || out_data == NULL) {
return NNACL_ERR;
}
(void)memcpy(out_data, in_data, data_size);
return NNACL_OK;
}

+ 0
- 21
mindspore/lite/nnacl/squeeze_parameter.h View File

@@ -19,30 +19,9 @@


#include "nnacl/op_base.h" #include "nnacl/op_base.h"


#define SQUEEZE_OFFSET_MAX_SIZE 4

typedef struct SqueezeQuantArg { typedef struct SqueezeQuantArg {
QuantArg *in_quant_args_; QuantArg *in_quant_args_;
QuantArg *out_quant_args_; QuantArg *out_quant_args_;
} SqueezeQuantArg; } SqueezeQuantArg;


typedef struct SqueezeParameter {
// primitive parameter
OpParameter op_parameter_;
int64_t axis_;

// shape correlative
const int *in_shape_;
const int *out_shape_;
int offset_size_;
int64_t offset_[SQUEEZE_OFFSET_MAX_SIZE];
int64_t in_offset_[SQUEEZE_OFFSET_MAX_SIZE];
int input_dim_;

// other parameter
SqueezeQuantArg quant_arg;
int thread_count_;
int thread_id_;
} SqueezeParameter;

#endif // MINDSPORE_LITE_NNACL_SQUEEZE_PARAMETER_H_ #endif // MINDSPORE_LITE_NNACL_SQUEEZE_PARAMETER_H_

mindspore/lite/nnacl/unstack.h → mindspore/lite/nnacl/unstack_parameter.h View File

@@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */


#ifndef MINDSPORE_LITE_NNACL_UNSTACK_H_
#define MINDSPORE_LITE_NNACL_UNSTACK_H_
#ifndef MINDSPORE_LITE_NNACL_UNSTACK_PARAMETER_H_
#define MINDSPORE_LITE_NNACL_UNSTACK_PARAMETER_H_


#include "nnacl/op_base.h" #include "nnacl/op_base.h"


@@ -31,12 +31,4 @@ typedef struct UnstackParameter {
int after_dims_; int after_dims_;
} UnstackParameter; } UnstackParameter;


#ifdef __cplusplus
extern "C" {
#endif
void Unistack(const float *input, float **output, UnstackParameter *para);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_NNACL_UNSTACK_H_
#endif // MINDSPORE_LITE_NNACL_UNSTACK_PARAMETER_H_

+ 0
- 20
mindspore/lite/nnacl/zeroslike.c View File

@@ -1,20 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "nnacl/zeroslike.h"
#include <string.h>

void ApproximateZerosLike(float *output, int number) { memset(output, 0.0, number * sizeof(float)); }

+ 0
- 1
mindspore/lite/src/ops/clip.cc View File

@@ -19,7 +19,6 @@
#ifndef PRIMITIVE_WRITEABLE #ifndef PRIMITIVE_WRITEABLE
#include "src/ops/ops_register.h" #include "src/ops/ops_register.h"
#endif #endif
#include "nnacl/clip.h"


namespace mindspore { namespace mindspore {
namespace lite { namespace lite {


+ 0
- 42
mindspore/lite/src/ops/populate/clip_populate.cc View File

@@ -1,42 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "src/ops/clip.h"
#include "src/ops/primitive_c.h"
#include "src/ops/populate/populate_register.h"
#include "nnacl/clip.h"

namespace mindspore {
namespace lite {

OpParameter *PopulateClipParameter(const mindspore::lite::PrimitiveC *primitive) {
ClipParameter *act_param = reinterpret_cast<ClipParameter *>(malloc(sizeof(ClipParameter)));
if (act_param == nullptr) {
MS_LOG(ERROR) << "malloc ClipParameter failed.";
return nullptr;
}
memset(act_param, 0, sizeof(ClipParameter));
act_param->op_parameter_.type_ = primitive->Type();
auto activation = reinterpret_cast<mindspore::lite::Clip *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
act_param->min_val_ = activation->GetMin();
act_param->max_val_ = activation->GetMax();
return reinterpret_cast<OpParameter *>(act_param);
}

Registry ClipParameterRegistry(schema::PrimitiveType_Clip, PopulateClipParameter);

} // namespace lite
} // namespace mindspore

+ 0
- 3
mindspore/lite/src/ops/populate/squeeze_populate.cc View File

@@ -14,14 +14,11 @@
* limitations under the License. * limitations under the License.
*/ */


#include "src/ops/squeeze.h"
#include "src/ops/primitive_c.h" #include "src/ops/primitive_c.h"
#include "src/ops/populate/populate_register.h" #include "src/ops/populate/populate_register.h"
#include "nnacl/squeeze.h"


namespace mindspore { namespace mindspore {
namespace lite { namespace lite {

OpParameter *PopulateSqueezeParameter(const mindspore::lite::PrimitiveC *primitive) { OpParameter *PopulateSqueezeParameter(const mindspore::lite::PrimitiveC *primitive) {
OpParameter *squeeze_param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter))); OpParameter *squeeze_param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
if (squeeze_param == nullptr) { if (squeeze_param == nullptr) {


+ 1
- 1
mindspore/lite/src/ops/populate/unstack_populate.cc View File

@@ -17,7 +17,7 @@
#include "src/ops/unstack.h" #include "src/ops/unstack.h"
#include "src/ops/primitive_c.h" #include "src/ops/primitive_c.h"
#include "src/ops/populate/populate_register.h" #include "src/ops/populate/populate_register.h"
#include "nnacl/unstack.h"
#include "nnacl/unstack_parameter.h"


namespace mindspore { namespace mindspore {
namespace lite { namespace lite {


+ 1
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.h View File

@@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include "include/errorcode.h" #include "include/errorcode.h"
#include "nnacl/batch_to_space.h" #include "nnacl/batch_to_space.h"
#include "nnacl/base/batch_to_space_base.h"
#include "src/lite_kernel.h" #include "src/lite_kernel.h"


namespace mindspore::kernel { namespace mindspore::kernel {


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc View File

@@ -149,7 +149,7 @@ void PadCPUKernel::InitMirrorPadBlock() {
} }


MirrorPadBlock block; MirrorPadBlock block;
int size_offset = DEFAULT_PAD_NDIMS - static_cast<int>(pad_region.size());
const int size_offset = DEFAULT_PAD_NDIMS - static_cast<int>(pad_region.size());
for (size_t i = 0; i < pad_region.size(); ++i) { for (size_t i = 0; i < pad_region.size(); ++i) {
int di = size_offset + i; int di = size_offset + i;
int si = remain_dim_offset + i; int si = remain_dim_offset + i;


+ 1
- 17
mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc View File

@@ -15,8 +15,6 @@
*/ */


#include "src/runtime/kernel/arm/fp32/squeeze_fp32.h" #include "src/runtime/kernel/arm/fp32/squeeze_fp32.h"
#include <vector>
#include "nnacl/squeeze.h"
#include "schema/model_generated.h" #include "schema/model_generated.h"
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "include/errorcode.h" #include "include/errorcode.h"
@@ -32,22 +30,8 @@ int SqueezeCPUKernel::Init() { return RET_OK; }
int SqueezeCPUKernel::ReSize() { return RET_OK; } int SqueezeCPUKernel::ReSize() { return RET_OK; }


int SqueezeCPUKernel::Run() { int SqueezeCPUKernel::Run() {
mindspore::lite::STATUS ret = RET_ERROR;
size_t data_size = in_tensors_.front()->Size(); size_t data_size = in_tensors_.front()->Size();

if (in_tensors_.front()->data_type() == kNumberTypeInt32) {
auto input_ptr = reinterpret_cast<int32_t *>(in_tensors_.front()->MutableData());
auto output_ptr = reinterpret_cast<int32_t *>(out_tensors_.front()->MutableData());
MS_ASSERT(input_ptr);
MS_ASSERT(output_ptr);
ret = DoSqueezeInt32(input_ptr, output_ptr, data_size);
} else {
auto input_ptr = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
auto output_ptr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
MS_ASSERT(input_ptr);
MS_ASSERT(output_ptr);
ret = DoSqueeze(input_ptr, output_ptr, data_size);
}
int ret = DoSqueeze(in_tensors_.front()->data_c(), out_tensors_.front()->data_c(), data_size);


if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Do squeeze fail!ret: " << ret; MS_LOG(ERROR) << "Do squeeze fail!ret: " << ret;


+ 1
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h View File

@@ -19,6 +19,7 @@


#include <vector> #include <vector>
#include "src/lite_kernel.h" #include "src/lite_kernel.h"
#include "nnacl/base/squeeze_base.h"


namespace mindspore::kernel { namespace mindspore::kernel {


@@ -33,9 +34,6 @@ class SqueezeCPUKernel : public LiteKernel {
int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;

private:
std::vector<int> axes_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel




+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc View File

@@ -56,7 +56,7 @@ int UnstackCPUKernel::ReSize() {
free(output_addr_array_); free(output_addr_array_);
output_addr_array_ = nullptr; output_addr_array_ = nullptr;
} }
output_addr_array_ = reinterpret_cast<float **>(malloc(sizeof(float *) * out_tensors_.size()));
output_addr_array_ = reinterpret_cast<void **>(malloc(sizeof(void *) * out_tensors_.size()));
if (output_addr_array_ == nullptr) { if (output_addr_array_ == nullptr) {
MS_LOG(ERROR) << "Failed to malloc memory"; MS_LOG(ERROR) << "Failed to malloc memory";
return lite::RET_ERROR; return lite::RET_ERROR;
@@ -69,12 +69,12 @@ int UnstackCPUKernel::Run() {
MS_ASSERT(input); MS_ASSERT(input);
size_t out_num = out_tensors_.size(); size_t out_num = out_tensors_.size();
for (size_t i = 0; i < out_num; i++) { for (size_t i = 0; i < out_num; i++) {
output_addr_array_[i] = reinterpret_cast<float *>(out_tensors_.at(i)->MutableData());
output_addr_array_[i] = out_tensors_.at(i)->data_c();
} }
MS_ASSERT(output_addr_array_); MS_ASSERT(output_addr_array_);
auto para = reinterpret_cast<UnstackParameter *>(op_parameter_); auto para = reinterpret_cast<UnstackParameter *>(op_parameter_);
para->num_ = out_num; para->num_ = out_num;
Unistack(input, output_addr_array_, para);
Unstack(input, output_addr_array_, para, sizeof(float));
return RET_OK; return RET_OK;
} }




+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.h View File

@@ -18,7 +18,7 @@


#include <vector> #include <vector>
#include "src/lite_kernel.h" #include "src/lite_kernel.h"
#include "nnacl/unstack.h"
#include "nnacl/base/unstack_base.h"


namespace mindspore::kernel { namespace mindspore::kernel {
class UnstackCPUKernel : public LiteKernel { class UnstackCPUKernel : public LiteKernel {
@@ -34,7 +34,7 @@ class UnstackCPUKernel : public LiteKernel {
int Run() override; int Run() override;


private: private:
float **output_addr_array_ = nullptr;
void **output_addr_array_ = nullptr;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel




+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc View File

@@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */

#include "src/runtime/kernel/arm/fp32/zeroslike_fp32.h" #include "src/runtime/kernel/arm/fp32/zeroslike_fp32.h"
#include <vector>
#include "schema/model_generated.h" #include "schema/model_generated.h"
#include "nnacl/zeroslike.h"
#include "mindspore/lite/nnacl/base/zeroslike_base.h"
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "include/errorcode.h" #include "include/errorcode.h"


@@ -31,7 +31,7 @@ int ZerosLikeCPUKernel::Init() { return RET_OK; }


int ZerosLikeCPUKernel::Run() { int ZerosLikeCPUKernel::Run() {
auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
ApproximateZerosLike(output_data, in_tensors_.at(0)->ElementsNum());
ApproximateZerosLike(output_data, in_tensors_.at(0)->ElementsNum(), sizeof(float));
return RET_OK; return RET_OK;
} }




+ 1
- 0
mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.h View File

@@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include "include/errorcode.h" #include "include/errorcode.h"
#include "nnacl/batch_to_space.h" #include "nnacl/batch_to_space.h"
#include "nnacl/base/batch_to_space_base.h"
#include "nnacl/int8/batch_to_space_int8.h" #include "nnacl/int8/batch_to_space_int8.h"
#include "src/lite_kernel.h" #include "src/lite_kernel.h"




+ 1
- 5
mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc View File

@@ -14,12 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */


#include "nnacl/int8/squeeze_int8.h"
#include "src/runtime/kernel/arm/int8/squeeze_int8.h" #include "src/runtime/kernel/arm/int8/squeeze_int8.h"
#include "nnacl/squeeze_parameter.h"
#include "schema/model_generated.h" #include "schema/model_generated.h"
#include "src/runtime/runtime_api.h"
#include "include/errorcode.h"
#include "src/kernel_registry.h" #include "src/kernel_registry.h"


using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::kernel::KERNEL_ARCH::kCPU;
@@ -116,7 +112,7 @@ int SqueezeInt8CPUKernel::DoExecute(int task_id) {
MS_ASSERT(output_data); MS_ASSERT(output_data);


int num = input_tensor->ElementsNum(); int num = input_tensor->ElementsNum();
SqueezeInt8(input_data, output_data, task_id, quant_squeeze_param_, para_, num);
SqueezeInt8(input_data, output_data, quant_squeeze_param_, num, task_id, op_parameter_->thread_num_);
return RET_OK; return RET_OK;
} }




+ 5
- 5
mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h View File

@@ -18,9 +18,12 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SQUEEZE_INT8_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SQUEEZE_INT8_H_


#include <vector> #include <vector>
#include "src/lite_kernel.h"
#include "include/context.h" #include "include/context.h"
#include "include/errorcode.h"
#include "src/lite_kernel.h"
#include "src/runtime/runtime_api.h" #include "src/runtime/runtime_api.h"
#include "nnacl/int8/squeeze_int8.h"
#include "nnacl/squeeze_parameter.h"


using mindspore::lite::InnerContext; using mindspore::lite::InnerContext;
namespace mindspore::kernel { namespace mindspore::kernel {
@@ -29,9 +32,7 @@ class SqueezeInt8CPUKernel : public LiteKernel {
SqueezeInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, SqueezeInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
para_ = reinterpret_cast<SqueezeParameter *>(parameter);
}
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~SqueezeInt8CPUKernel() override; ~SqueezeInt8CPUKernel() override;


int Init() override; int Init() override;
@@ -40,7 +41,6 @@ class SqueezeInt8CPUKernel : public LiteKernel {
int DoExecute(int tId); int DoExecute(int tId);


private: private:
SqueezeParameter *para_;
SqueezeQuantArg *quant_squeeze_param_; SqueezeQuantArg *quant_squeeze_param_;
}; };




+ 1
- 0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batch_to_space_fp32_test.cc View File

@@ -15,6 +15,7 @@
*/ */
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#include "common/common_test.h" #include "common/common_test.h"
#include "mindspore/lite/nnacl/base/batch_to_space_base.h"
#include "mindspore/lite/nnacl/batch_to_space.h" #include "mindspore/lite/nnacl/batch_to_space.h"
#include "mindspore/lite/nnacl/common_func.h" #include "mindspore/lite/nnacl/common_func.h"




+ 1
- 1
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/unstack_fp32_tests.cc View File

@@ -17,7 +17,7 @@
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include "common/common_test.h" #include "common/common_test.h"
#include "mindspore/lite/nnacl/unstack.h"
#include "mindspore/lite/nnacl/base/unstack_base.h"
#include "mindspore/lite/src/kernel_registry.h" #include "mindspore/lite/src/kernel_registry.h"


namespace mindspore { namespace mindspore {


+ 2
- 7
mindspore/lite/test/ut/src/runtime/kernel/arm/int8/squeeze_int8_tests.cc View File

@@ -64,14 +64,10 @@ TEST_F(TestSqueezeInt8, Squeeze_1d_axis0_offset0_quant0_thread2) {
output0_tensor->set_data_type(tid_int8); output0_tensor->set_data_type(tid_int8);
outputs_tensor[0] = output0_tensor; outputs_tensor[0] = output0_tensor;


SqueezeParameter op_param;
op_param.op_parameter_.type_ = schema::PrimitiveType_Squeeze;
OpParameter op_param;
op_param.type_ = schema::PrimitiveType_Squeeze;
lite::InnerContext *ctx = new lite::InnerContext; lite::InnerContext *ctx = new lite::InnerContext;
ctx->thread_num_ = 2; ctx->thread_num_ = 2;
ASSERT_EQ(lite::RET_OK, ctx->Init());
op_param.axis_ = 0;
op_param.offset_[0] = 1;
op_param.offset_size_ = 1;
kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Squeeze}; kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Squeeze};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr); ASSERT_NE(creator, nullptr);
@@ -93,5 +89,4 @@ TEST_F(TestSqueezeInt8, Squeeze_1d_axis0_offset0_quant0_thread2) {
delete output0_tensor; delete output0_tensor;
delete ctx; delete ctx;
} }

} // namespace mindspore } // namespace mindspore

Loading…
Cancel
Save