Browse Source

move cplusplus file to C file

tags/v0.7.0-beta
songhonglei413 5 years ago
parent
commit
03f817b35f
100 changed files with 406 additions and 210 deletions
  1. +9
    -5
      mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
  2. +6
    -5
      mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
  3. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/base/resize_base.h
  4. +11
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
  5. +11
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
  6. +1
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
  7. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/caffeprelu.cc
  8. +11
    -11
      mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
  9. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc
  10. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
  11. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc
  12. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
  13. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc
  14. +5
    -5
      mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc
  15. +1
    -3
      mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc
  16. +11
    -11
      mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h
  17. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc
  18. +1
    -2
      mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
  19. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc
  20. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
  21. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h
  22. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc
  23. +1
    -3
      mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h
  24. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
  25. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc
  26. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/CMakeLists.txt
  27. +7
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/activation_grad.h
  28. +3
    -4
      mindspore/lite/src/runtime/kernel/arm/nnacl/add_int8.c
  29. +7
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/add_int8.h
  30. +3
    -5
      mindspore/lite/src/runtime/kernel/arm/nnacl/arg_min_max.c
  31. +7
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/arg_min_max.h
  32. +4
    -5
      mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_common.c
  33. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_common.h
  34. +0
    -4
      mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_parameter.h
  35. +2
    -4
      mindspore/lite/src/runtime/kernel/arm/nnacl/batch_to_space.c
  36. +7
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/batch_to_space.h
  37. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/caffeprelu.c
  38. +9
    -3
      mindspore/lite/src/runtime/kernel/arm/nnacl/caffeprelu.h
  39. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/common_func.c
  40. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/conv_parameter.h
  41. +1
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/depth_to_space.c
  42. +7
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/depth_to_space.h
  43. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.c
  44. +7
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.h
  45. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/cast_fp16.c
  46. +8
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/cast_fp16.h
  47. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/common_func.h
  48. +10
    -11
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_depthwise_fp16.c
  49. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_depthwise_fp16.h
  50. +13
    -5
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_fp16.c
  51. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_fp16.h
  52. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pack_fp16.c
  53. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pack_fp16.h
  54. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pooling_fp16.c
  55. +8
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pooling_fp16.h
  56. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.c
  57. +7
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.h
  58. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/activation.h
  59. +3
    -4
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arg_min_max.c
  60. +7
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arg_min_max.h
  61. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic.c
  62. +7
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic.h
  63. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic_self.c
  64. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic_self.h
  65. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.c
  66. +8
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h
  67. +9
    -15
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/broadcast_to.c
  68. +7
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/broadcast_to.h
  69. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.c
  70. +7
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.h
  71. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c
  72. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h
  73. +2
    -3
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/concat.c
  74. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/concat.h
  75. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv.c
  76. +7
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv.h
  77. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.c
  78. +8
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.h
  79. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.c
  80. +7
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h
  81. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.c
  82. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.h
  83. +0
    -2
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/elu.c
  84. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/elu.h
  85. +0
    -3
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/embedding_lookup.c
  86. +13
    -7
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/embedding_lookup.h
  87. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/expandDims.c
  88. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/expandDims.h
  89. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/fill.c
  90. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/fill.h
  91. +2
    -3
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gather.c
  92. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gather.h
  93. +0
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gatherNd.c
  94. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gatherNd.h
  95. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/local_response_norm.c
  96. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/local_response_norm.h
  97. +0
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/lstm.c
  98. +6
    -0
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/lstm.h
  99. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/matmul.c
  100. +3
    -5
      mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/matmul.h

+ 9
- 5
mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt View File

@@ -2,10 +2,10 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)

file(GLOB KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
nnacl/*.cc
nnacl/fp32/*.cc
nnacl/int8/*.cc
nnacl/quantization/*.cc
nnacl/*.c
nnacl/fp32/*.c
nnacl/int8/*.c
nnacl/quantization/*.c
${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
)
@@ -13,13 +13,15 @@ file(GLOB KERNEL_SRC
if (SUPPORT_TRAIN)
file (GLOB TRAIN_KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/nnacl/fp32_grad/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/nnacl/fp32_grad/*.c
)
endif()

if (PLATFORM_ARM64)
# assembly
file(GLOB ASSEMBLY_SRC nnacl/assembly/arm64/*.s
nnacl/assembly/opt/*.s
nnacl/assembly/opt/*.S
nnacl/assembly/arm64/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
@@ -29,6 +31,8 @@ if (PLATFORM_ARM32)
# assembly
file(GLOB ASSEMBLY_SRC nnacl/assembly/arm32/*.s
nnacl/assembly/arm32/*.S
nnacl/assembly/opt/*.s
nnacl/assembly/opt/*.S
)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})


+ 6
- 5
mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc View File

@@ -54,7 +54,8 @@ int QuantDTypeCastCPUKernel::Init() {
}
inverse_ = true;
} else {
MS_LOG(ERROR) << "param data type not supported:" << " src: " << param->srcT << " dst: " << param->dstT;
MS_LOG(ERROR) << "param data type not supported:"
<< " src: " << param->srcT << " dst: " << param->dstT;
return RET_PARAM_INVALID;
}

@@ -81,11 +82,11 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
auto quant_arg = in_tensors_.front()->GetQuantParams().front();
int ret;
if (inverse_) {
ret = DequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
ret = DoDequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale,
quant_arg.zeroPoint, num_unit_thread);
} else {
ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
ret = DoQuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale,
quant_arg.zeroPoint, num_unit_thread);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]";


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/base/resize_base.h View File

@@ -27,9 +27,9 @@ namespace mindspore::kernel {
class ResizeBaseCPUKernel : public LiteKernel {
public:
ResizeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) {}

~ResizeBaseCPUKernel() = default;



+ 11
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h View File

@@ -22,6 +22,17 @@
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "src/runtime/kernel/arm/nnacl/fp16/conv_depthwise_fp16.h"

#ifdef __cplusplus
extern "C" {
#endif
void ConvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const float16_t *weight_data,
const float16_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
int task_id);
#ifdef __cplusplus
}
#endif


namespace mindspore::kernel {
class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
public:


+ 11
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h View File

@@ -22,6 +22,17 @@
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "src/runtime/kernel/arm/nnacl/fp16/conv_depthwise_fp16.h"

#ifdef __cplusplus
extern "C" {
#endif
void DeconvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const float16_t *weight_data,
const float16_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
int task_id);
void ComputeStrides(int *shape, int *strides, int ndim);
#ifdef __cplusplus
}
#endif

namespace mindspore::kernel {
class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
public:


+ 1
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc View File

@@ -40,9 +40,7 @@ void ArithmeticCPUKernel::FreeTileData() {
}
}

ArithmeticCPUKernel::~ArithmeticCPUKernel() {
FreeTileData();
}
ArithmeticCPUKernel::~ArithmeticCPUKernel() { FreeTileData(); }

int ArithmeticCPUKernel::Init() {
if (!InferShapeDone()) {


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/caffeprelu.cc View File

@@ -31,7 +31,7 @@ namespace mindspore::kernel {
int CaffePReluCPUKernel::Init() { return RET_OK; }

int CaffePReluCPUKernel::DoExcute(int task_id) {
PRelu(input_data, output_data, prelu_param_, task_id);
CaffePRelu(input_data, output_data, prelu_param_, task_id);
return RET_OK;
}



+ 11
- 11
mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc View File

@@ -79,17 +79,17 @@ int CastCPUKernel::DoCast(int thread_id) {
}
} else {
switch (input_data_type) {
case kNumberTypeUInt8:
Uint8ToFloat32(reinterpret_cast<uint8_t *>(input->Data()) + offset,
reinterpret_cast<float *>(output_data) + offset, data_num);
break;
case kNumberTypeInt32:
Int32ToFloat32(reinterpret_cast<int32_t *>(input->Data()) + offset,
reinterpret_cast<float *>(output_data) + offset, data_num);
break;
default:
MS_LOG(ERROR) << "Unsupport input data type " << input_data_type;
return RET_ERROR;
case kNumberTypeUInt8:
Uint8ToFloat32(reinterpret_cast<uint8_t *>(input->Data()) + offset,
reinterpret_cast<float *>(output_data) + offset, data_num);
break;
case kNumberTypeInt32:
Int32ToFloat32(reinterpret_cast<int32_t *>(input->Data()) + offset,
reinterpret_cast<float *>(output_data) + offset, data_num);
break;
default:
MS_LOG(ERROR) << "Unsupport input data type " << input_data_type;
return RET_ERROR;
}
}
return RET_OK;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/resize.cc View File

@@ -66,8 +66,8 @@ int ResizeCPUKernel::RunImpl(int task_id) {
int ret = 0;
switch (method_) {
case static_cast<int>(schema::ResizeMethod_BILINEAR): {
ret = ResizeBilinear(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(),
align_corners_, task_id, context_->thread_num_);
ret = ResizeBilinear(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(), align_corners_,
task_id, context_->thread_num_);
break;
}
case static_cast<int>(schema::ResizeMethod_NEAREST_NEIGHBOR): {
@@ -76,7 +76,7 @@ int ResizeCPUKernel::RunImpl(int task_id) {
return RET_ERROR;
}
ret = ResizeNearestNeighbor(input_data, output_data, input_shape.data(), out_tensors_[0]->shape().data(), task_id,
context_->thread_num_);
context_->thread_num_);
break;
}
case schema::ResizeMethod_UNKNOW:


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/scale.h View File

@@ -26,8 +26,8 @@ namespace mindspore::kernel {
class ScaleCPUKernel : public LiteKernel {
public:
ScaleCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
scale_param_ = reinterpret_cast<ScaleParameter *>(op_parameter_);
}


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd.cc View File

@@ -44,7 +44,7 @@ int ScatterNDCPUKernel::Init() {
}

int ScatterNDCPUKernel::ReSize() {
auto shape = in_tensors_.at(kScatterShapeIndex);
auto shape = in_tensors_.at(kScatterShapeIndex);
auto indices = in_tensors_.at(kScatterIndicesIndex);
auto update = in_tensors_.at(kScatterUpdateIndex);



+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc View File

@@ -23,9 +23,9 @@
#include "include/errorcode.h"

using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_FORMAT_ERR;
using mindspore::lite::RET_OK;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OP_EXECUTE_FAILURE;
using mindspore::schema::PrimitiveType_SpaceToBatch;

@@ -73,8 +73,8 @@ int SpaceToBatchCPUKernel::Run() {
float *tmp_space[3] = {nullptr, nullptr, nullptr};
if (param->need_paddings_) {
for (int i = 0; i < 3; ++i) {
tmp_space[i]
= reinterpret_cast<float *>(context_->allocator->Malloc(param->num_elements_padded_ * sizeof(float)));
tmp_space[i] =
reinterpret_cast<float *>(context_->allocator->Malloc(param->num_elements_padded_ * sizeof(float)));
(void)memset(tmp_space[i], 0, param->num_elements_padded_ * sizeof(float));
if (tmp_space[i] == nullptr) {
MS_LOG(ERROR) << "malloc tmp buffer fail!";


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth.cc View File

@@ -45,7 +45,7 @@ int SpaceToDepthCPUKernel::Init() {
}

int SpaceToDepthCPUKernel::ReSize() {
if (in_tensors_[0]->GetFormat() != schema::Format_NHWC) {
if (in_tensors_[0]->GetFormat() != schema::Format_NHWC) {
MS_LOG(ERROR) << "space_to_depth only support NHWC now!";
return RET_FORMAT_ERR;
}


+ 5
- 5
mindspore/lite/src/runtime/kernel/arm/int8/argminmax_int8.cc View File

@@ -62,22 +62,22 @@ int ArgMinMaxInt8CPUKernel::Run() {
auto in_shape = input->shape().data();
auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
if (param->topk_ == 1) {
ArgMinMaxQuant(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
Int8ArgMinMaxQuant(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
return RET_OK;
}

switch (param->axis_) {
case 0:
ArgMinMaxDim0(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
Int8ArgMinMaxDim0(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
case 1:
ArgMinMaxDim1(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
Int8ArgMinMaxDim1(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
case 2:
ArgMinMaxDim2(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
Int8ArgMinMaxDim2(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
case 3:
ArgMinMaxDim3(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
Int8ArgMinMaxDim3(input_data, output_data, in_shape, param, &in_quant_arg_, &out_quant_arg_);
break;
}
FreeTmpMemory();


+ 1
- 3
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.cc View File

@@ -71,9 +71,7 @@ void ArithmeticInt8CPUKernel::FreeTileData() {
tile_data1_ = nullptr;
}

ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() {
FreeTileData();
}
ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() { FreeTileData(); }

int ArithmeticInt8CPUKernel::Init() {
switch (op_parameter_->type_) {


+ 11
- 11
mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_self_int8.h View File

@@ -48,37 +48,37 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel {
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
switch (parameter->type_) {
case PrimitiveType_Round:
arithmeticSelf_run_ = ElementRound;
arithmeticSelf_run_ = Int8ElementRound;
break;
case PrimitiveType_Floor:
arithmeticSelf_run_ = ElementFloor;
arithmeticSelf_run_ = Int8ElementFloor;
break;
case PrimitiveType_Ceil:
arithmeticSelf_run_ = ElementCeil;
arithmeticSelf_run_ = Int8ElementCeil;
break;
case PrimitiveType_Abs:
arithmeticSelf_run_ = ElementAbs;
arithmeticSelf_run_ = Int8ElementAbs;
break;
case PrimitiveType_Sin:
arithmeticSelf_run_ = ElementSin;
arithmeticSelf_run_ = Int8ElementSin;
break;
case PrimitiveType_Cos:
arithmeticSelf_run_ = ElementCos;
arithmeticSelf_run_ = Int8ElementCos;
break;
case PrimitiveType_Log:
arithmeticSelf_run_ = ElementLog;
arithmeticSelf_run_ = Int8ElementLog;
break;
case PrimitiveType_Sqrt:
arithmeticSelf_run_ = ElementSqrt;
arithmeticSelf_run_ = Int8ElementSqrt;
break;
case PrimitiveType_Rsqrt:
arithmeticSelf_run_ = ElementRsqrt;
arithmeticSelf_run_ = Int8ElementRsqrt;
break;
case PrimitiveType_Square:
arithmeticSelf_run_ = ElementSquare;
arithmeticSelf_run_ = Int8ElementSquare;
break;
case PrimitiveType_LogicalNot:
arithmeticSelf_run_ = ElementLogicalNot;
arithmeticSelf_run_ = Int8ElementLogicalNot;
break;
default:
break;


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/int8/batch_to_space_int8.cc View File

@@ -70,11 +70,11 @@ int BatchToSpaceInt8CPUKernel::Run() {
}
} else {
if (IsNoCrop()) {
BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
&in_quant_arg_, &out_quant_arg_);
BatchToSpaceNoCropForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
&in_quant_arg_, &out_quant_arg_);
} else {
BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_,
&in_quant_arg_, &out_quant_arg_);
BatchToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_,
param->crops_, &in_quant_arg_, &out_quant_arg_);
}
}



+ 1
- 2
mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc View File

@@ -55,7 +55,6 @@ int ConcatInt8CPUKernel::Init() {
return ReSize();
}


int ConcatInt8CPUKernel::ReSize() {
auto ret = ConcatBaseCPUKernel::ReSize();
if (ret != RET_OK) {
@@ -127,7 +126,7 @@ int ConcatInt8CPUKernel::DoExecute(int task_id) {
if (real_dst_count <= 0) {
return lite::RET_OK;
}
Concat(input_data_, output_data_, concat_param_, axis_, real_dst_count, task_id);
Int8Concat(input_data_, output_data_, concat_param_, axis_, real_dst_count, task_id);
return lite::RET_OK;
}
} // namespace mindspore::kernel

+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/int8/depth_to_space_int8.cc View File

@@ -65,7 +65,7 @@ int DepthToSpaceInt8CPUKernel::Run() {
if (in_quant_arg_.scale_ == out_quant_arg_.scale_ && in_quant_arg_.zp_ == out_quant_arg_.zp_) {
DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param);
} else {
DepthToSpaceForNHWC(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_);
DepthToSpaceForNHWCInt8(input_data, output_data, in_shape.data(), param, &in_quant_arg_, &out_quant_arg_);
}
return RET_OK;
}


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc View File

@@ -78,7 +78,7 @@ int ReshapeInt8CPUKernel::DoExecute(int task_id) {
int8_t *cur_input0_data = input_data_ + task_id * count_unit_;
int8_t *cur_output_data = output_data_ + task_id * count_unit_;

Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_);
Int8Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_);
return lite::RET_OK;
}
} // namespace mindspore::kernel

+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/int8/sigmoid_int8.h View File

@@ -25,8 +25,8 @@ namespace mindspore::kernel {
class SigmoidInt8CPUKernel : public LiteKernel {
public:
SigmoidInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~SigmoidInt8CPUKernel() override = default;



+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.cc View File

@@ -100,7 +100,7 @@ int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) {
output_ptr += stride * task_id * inner_size;
exp_data_ += stride * task_id * inner_size;

auto error_code = Softmax(input_ptr, output_ptr, count, exp_data_, sum_data_, quant_params_, softmax_param_);
auto error_code = Int8Softmax(input_ptr, output_ptr, count, exp_data_, sum_data_, quant_params_, softmax_param_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "DoSoftmax error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;


+ 1
- 3
mindspore/lite/src/runtime/kernel/arm/int8/softmax_int8.h View File

@@ -28,9 +28,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~SoftmaxInt8CPUKernel() {
FreeTmpBuffer();
}
~SoftmaxInt8CPUKernel() { FreeTmpBuffer(); }

int Init() override;
int ReSize() override;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc View File

@@ -62,8 +62,8 @@ int SplitInt8CPUKernel::Split(int task_id) {
return RET_OK;
}
int thread_offset = task_id * thread_n_stride_;
auto ret =
DoSplit(input_ptr_, output_ptr_.data(), in_tensors_.front()->shape().data(), thread_offset, num_unit_thread, param);
auto ret = Int8DoSplit(input_ptr_, output_ptr_.data(), in_tensors_.front()->shape().data(), thread_offset,
num_unit_thread, param);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Split error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR;


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/int8/unsqueeze_int8.cc View File

@@ -65,7 +65,7 @@ int Unsqueezeint8CPUKernel::DoUnsqueeze(int task_id) {
auto output_ptr = reinterpret_cast<int8_t *>(out_tensors_.front()->Data());
size_t data_size = out_tensors_.front()->Size();

int ret = Unsqueeze(input_ptr, output_ptr, Unsq_para_, data_size, task_id);
int ret = Int8Unsqueeze(input_ptr, output_ptr, Unsq_para_, data_size, task_id);
if (ret != RET_OK) {
MS_LOG(ERROR) << "UnsqueezeRun error task_id[" << task_id << "] error_code[" << ret << "]";
return ret;


+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/CMakeLists.txt View File

@@ -11,7 +11,7 @@ file(GLOB OPTIMIZED_ASSEMBLY
)

file(GLOB FP16_SRC
${NNACL_DIR}/fp16/*.cc
${NNACL_DIR}/fp16/*.c
${NNACL_DIR}/../fp16/*.cc
)



+ 7
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/activation_grad.h View File

@@ -24,8 +24,11 @@
typedef struct ActivationGradParameter {
OpParameter op_parameter{};
int type_;
float alpha_{0.01};
float alpha_;
} ActivationGradParameter;
#ifdef __cplusplus
extern "C" {
#endif

inline int ReluGrad(float *src0, float *src1, int length, float *dst) {
for (int i = 0; i < length; ++i) {
@@ -84,5 +87,8 @@ inline int HSigmoidGrad(float *src0, float *src1, int length, float *dst) {
}
return NNACL_OK;
}
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ACTIVATION_GRAD_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/add_int8.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/add_int8.c View File

@@ -35,7 +35,7 @@ int32x4_t ClacScaledInput(int32x4_t input, int32x4_t left_shift_result_vec, int3
return vrshlq_s32(vqaddq_s32(shifted_input, fixup), right_shift_vec);
}

int16x4_t ClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec,
int16x4_t AddClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec,
int32x4_t output_multiplier_vec, AddQuantParameter *para) {
int32x4_t raw_sum = vaddq_s32(scaled_input0, scaled_input1);

@@ -77,9 +77,9 @@ void AddInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data,
ClacScaledInput(input1_high, left_shift_result1_vec, input1_multiplier_vec, right_shift1_vec);

int16x4_t sum_low =
ClacSumHalfWord(scaled_input0_low, scaled_input1_low, left_shift_out_vec, output_multiplier_vec, para);
AddClacSumHalfWord(scaled_input0_low, scaled_input1_low, left_shift_out_vec, output_multiplier_vec, para);
int16x4_t sum_high =
ClacSumHalfWord(scaled_input0_high, scaled_input1_high, left_shift_out_vec, output_multiplier_vec, para);
AddClacSumHalfWord(scaled_input0_high, scaled_input1_high, left_shift_out_vec, output_multiplier_vec, para);

int16x8_t res_s16 = vcombine_s16(sum_low, sum_high);
int8x8_t res_u8_n0 = vqmovn_s16(res_s16);
@@ -115,4 +115,3 @@ void AddInt8(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int6
}
return;
}


+ 7
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/add_int8.h View File

@@ -42,8 +42,15 @@ typedef struct AddQuantParameter {
int right_shift_out_;
} AddQuantParameter;

#ifdef __cplusplus
extern "C" {
#endif

void AddInt8(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
AddQuantParameter *para);
#ifdef __cplusplus
}
#endif

#ifdef ENABLE_NEON
#include <arm_neon.h>


mindspore/lite/src/runtime/kernel/arm/nnacl/arg_min_max.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/arg_min_max.c View File

@@ -41,11 +41,9 @@ void ArgMinMaxTopk1(const void *input, void *output, const int *shape, ArgMinMax
switch (param->data_type_) {
case FLOAT_DATA_TYPE: {
if (param->get_max_) {
ArgMax(reinterpret_cast<const float *>(input), reinterpret_cast<float *>(output), param, pre_axis_count,
axis_count, after_axis_count);
ArgMax(input, output, param, pre_axis_count, axis_count, after_axis_count);
} else {
ArgMin(reinterpret_cast<const float *>(input), reinterpret_cast<float *>(output), param, pre_axis_count,
axis_count, after_axis_count);
ArgMin(input, output, param, pre_axis_count, axis_count, after_axis_count);
}
break;
}
@@ -96,7 +94,7 @@ void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxPa

switch (param->data_type_) {
case FLOAT_DATA_TYPE: {
ArgMinMaxTopknFp32(reinterpret_cast<const float *>(input), reinterpret_cast<float *>(output), in_shape, param);
ArgMinMaxTopknFp32(input, output, in_shape, param);
return;
}
default:

+ 7
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/arg_min_max.h View File

@@ -18,5 +18,12 @@

#include "nnacl/arg_min_max_parameter.h"

#ifdef __cplusplus
extern "C" {
#endif
void ArgMinMax(const void *input, void *output, const int *in_shape, ArgMinMaxParameter *param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ARG_MIN_MAX_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_common.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_common.c View File

@@ -91,9 +91,8 @@ void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, ui
void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
ArithmeticParameter *param) {
CalcMultiplesAndStrides(param);
TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_,
param->in_shape0_, param->in_strides0_, param->out_strides_, param->multiples0_);
TileOneDimensionUint8((uint8_t *)(data1), (uint8_t *)(tile_data1), 0, param->ndim_,
param->in_shape1_, param->in_strides1_, param->out_strides_, param->multiples1_);
TileOneDimensionUint8((uint8_t *)(data0), (uint8_t *)(tile_data0), 0, param->ndim_, param->in_shape0_,
param->in_strides0_, param->out_strides_, param->multiples0_);
TileOneDimensionUint8((uint8_t *)(data1), (uint8_t *)(tile_data1), 0, param->ndim_, param->in_shape1_,
param->in_strides1_, param->out_strides_, param->multiples1_);
}


+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_common.h View File

@@ -40,6 +40,9 @@ typedef struct ArithmeticParameter {
int multiples1_[5];
} ArithmeticParameter;

#ifdef __cplusplus
extern "C" {
#endif
void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int *inShape, int *inStrides,
int *outStrides, int *multiple);
void ComputeStrides(int *shape, int *strides, int ndim);
@@ -49,5 +52,8 @@ void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, ui
ArithmeticParameter *param);
void TileDimensionsInt8(int8_t *data0, int8_t *data1, int8_t *tile_data0, int8_t *tile_data1,
ArithmeticParameter *param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ARITHMETIC_COMMON_H_

+ 0
- 4
mindspore/lite/src/runtime/kernel/arm/nnacl/arithmetic_parameter.h View File

@@ -19,8 +19,4 @@

#include "nnacl/op_attribute.h"




#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ARTITHMETIC_PARAMETER_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/batch_to_space.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/batch_to_space.c View File

@@ -37,8 +37,7 @@ void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_sh
size_t w_offset = w * in_c;
for (int bw = 0; bw < block_w; ++bw) {
size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
memcpy(reinterpret_cast<int8_t *>(output) + output_offset,
reinterpret_cast<const int8_t *>(input) + in_offset * data_size, copy_size);
memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size);
output_offset += copy_size;
}
}
@@ -85,8 +84,7 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i
continue;
}
size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
memcpy(reinterpret_cast<int8_t *>(output) + output_offset,
reinterpret_cast<const int8_t *>(input) + in_offset * data_size, copy_size);
memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size);
output_offset += copy_size;
}
}

+ 7
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/batch_to_space.h View File

@@ -26,8 +26,15 @@ typedef struct BatchToSpaceParameter {
int32_t crops_[BATCH_TO_SPACE_CROPS_SIZE];
} BatchToSpaceParameter;

#ifdef __cplusplus
extern "C" {
#endif
void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block,
int data_size);
void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block,
const int *crops, int data_size);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_BATCH_TO_SPACE_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/caffeprelu.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/caffeprelu.c View File

@@ -15,8 +15,8 @@
*/
#include "src/runtime/kernel/arm/nnacl/caffeprelu.h"

void PRelu(float *input, float *output, CaffePReluParameter *prelu_param_, int task_id) {
int block = int(prelu_param_->input_num_ / prelu_param_->op_parameter_.thread_num_);
void CaffePRelu(float *input, float *output, CaffePReluParameter *prelu_param_, int task_id) {
int block = (int)(prelu_param_->input_num_ / prelu_param_->op_parameter_.thread_num_);
int start = task_id * block;
int end = start + block;
if (task_id == prelu_param_->op_parameter_.thread_num_ - 1) {

+ 9
- 3
mindspore/lite/src/runtime/kernel/arm/nnacl/caffeprelu.h View File

@@ -18,15 +18,21 @@

#include "src/runtime/kernel/arm/nnacl/op_base.h"

struct CaffePReluParameter {
typedef struct CaffePReluParameter {
OpParameter op_parameter_;
float *negtive_slope_;
bool channeShared;
int channel_num_;
int input_num_;
int thread_num_;
};
} CaffePReluParameter;

void PRelu(float *input, float *output, CaffePReluParameter *prelu_param_, int task_id);
#ifdef __cplusplus
extern "C" {
#endif
void CaffePRelu(float *input, float *output, CaffePReluParameter *prelu_param_, int task_id);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_CAFFEPRELU_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/common_func.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/common_func.c View File

@@ -227,7 +227,7 @@ void PostFuncInt8(const int *in, const int *bias, int8_t *out, int oc, int plane
int src_index = c8div * plane8 * 8 + r * 8 + c8mod;
int dst_index = r * oc + c;
int32_t value = in[src_index];
if (bias != nullptr) {
if (bias != NULL) {
value = in[src_index] + bias[c];
}
value = MultiplyByQuantizedMultiplier(value, multiplier, left_shift, right_shift) + zp;

+ 0
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/conv_parameter.h View File

@@ -55,7 +55,6 @@ typedef struct ConvParameter {
bool is_relu6_;
} ConvParameter;


typedef struct SlidingWindowParam {
int left_;
int right_;


mindspore/lite/src/runtime/kernel/arm/nnacl/depth_to_space.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/depth_to_space.c View File

@@ -33,8 +33,7 @@ void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthTo
for (int l = 0; l < block_size; ++l) {
size_t out_offset = (out_offset_w + l * param->out_stride_dim1_) * param->data_type_size_;
size_t in_offset = (in_offset_w + l * block_size * param->out_stride_dim2_) * param->data_type_size_;
memcpy(reinterpret_cast<int8_t *>(output) + out_offset, reinterpret_cast<const int8_t *>(input) + in_offset,
copy_size);
memcpy((int8_t *)output + out_offset, (int8_t *)input + in_offset, copy_size);
}
}
}

+ 7
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/depth_to_space.h View File

@@ -17,5 +17,12 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_DEPTH_TO_SPACE_H_
#include "nnacl/depth_to_space_parameter.h"

#ifdef __cplusplus
extern "C" {
#endif
void DepthToSpaceForNHWC(const void *input, void *output, int *in_shape, DepthToSpaceParameter *param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_DEPTH_TO_SPACE_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.c View File

@@ -19,4 +19,3 @@
void Flatten(const void *input, void *output, FlattenParameter *flatten_param) {
memcpy(output, input, flatten_param->size);
}


+ 7
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/flatten.h View File

@@ -22,5 +22,12 @@ typedef struct FlattenParameter {
int size;
} FlattenParameter;

#ifdef __cplusplus
extern "C" {
#endif
void Flatten(const void *input, void *output, FlattenParameter *flatten_param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FLATTEN_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/cast_fp16.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/cast_fp16.c View File


+ 8
- 2
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/cast_fp16.h View File

@@ -16,11 +16,17 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_CAST_FP16_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_CAST_FP16_H_

#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/op_base.h"
#include "nnacl/fp32/cast.h"

#ifdef __cplusplus
extern "C" {
#endif
void Float32ToFloat16(const float *input, float16_t *output, int number);
void Float16ToFloat32(const float16_t *input, float *output, int number);

#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_CAST_FP16_H_

+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/common_func.h View File

@@ -30,8 +30,8 @@ extern "C" {
#ifdef ENABLE_ARM64
void ConvDwFp16Center(float16_t *dst, const float16_t *src, const float16_t *weight, const float16_t *bias,
size_t height, size_t width, size_t kernel_h, size_t kernel_w, size_t out_h_step,
size_t block_channel, size_t in_sh_step, size_t in_sw_step, size_t in_kh_step,
size_t in_kw_step, size_t relu, size_t relu6);
size_t block_channel, size_t in_sh_step, size_t in_sw_step, size_t in_kh_step, size_t in_kw_step,
size_t relu, size_t relu6);
void DeconvDwFp16Center(float16_t *dst, const float16_t *src, const float16_t *weight, size_t height, size_t width,
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
size_t in_sw_step, size_t in_kh_step, size_t in_kw_step);


mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_depthwise_fp16.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_depthwise_fp16.c View File

@@ -158,12 +158,11 @@ void ConvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const flo
const float16_t *in_t = src_data + in_h_start * sliding->in_h_step_ + in_w_start * sliding->block_channel_;
float16_t *out_t = dst_data + sliding->top_ * sliding->out_h_step_ + sliding->left_ * sliding->block_channel_;
#ifdef ENABLE_ARM64
ConvDwFp16Center(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_,
sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_,
sliding->out_h_step_ * sizeof(float16_t), sliding->block_channel_ * sizeof(float16_t),
sliding->in_sh_step_ * sizeof(float16_t), sliding->in_sw_step_ * sizeof(float16_t),
sliding->in_kh_step_ * sizeof(float16_t), sliding->in_kw_step_ * sizeof(float16_t),
conv_param->is_relu_, conv_param->is_relu6_);
ConvDwFp16Center(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_ * sizeof(float16_t),
sliding->block_channel_ * sizeof(float16_t), sliding->in_sh_step_ * sizeof(float16_t),
sliding->in_sw_step_ * sizeof(float16_t), sliding->in_kh_step_ * sizeof(float16_t),
sliding->in_kw_step_ * sizeof(float16_t), conv_param->is_relu_, conv_param->is_relu6_);
#else
DepthwiseCenterFp16(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_,
sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_,
@@ -313,11 +312,11 @@ void DeconvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const f
const float16_t *in_t =
src_data + sliding->top_ * sliding->out_h_step_ + sliding->left_ * sliding->block_channel_;
#ifdef ENABLE_ARM64
DeconvDwFp16Center(out_t, in_t, weight, sliding->bottom_ - sliding->top_,
sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_,
sliding->out_h_step_ * sizeof(float16_t), sliding->block_channel_ * sizeof(float16_t),
sliding->in_sh_step_ * sizeof(float16_t), sliding->in_sw_step_ * sizeof(float16_t),
sliding->in_kh_step_ * sizeof(float16_t), sliding->in_kw_step_ * sizeof(float16_t));
DeconvDwFp16Center(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_,
conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_ * sizeof(float16_t),
sliding->block_channel_ * sizeof(float16_t), sliding->in_sh_step_ * sizeof(float16_t),
sliding->in_sw_step_ * sizeof(float16_t), sliding->in_kh_step_ * sizeof(float16_t),
sliding->in_kw_step_ * sizeof(float16_t));
#else
DeconvDepthwiseCenterFp16(out_t, in_t, weight, sliding->bottom_ - sliding->top_,
sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_,

+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_depthwise_fp16.h View File

@@ -20,6 +20,9 @@
#include "nnacl/conv_parameter.h"
#include "nnacl/fp32/conv_depthwise.h"

#ifdef __cplusplus
extern "C" {
#endif
void ConvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const float16_t *weight_data,
const float16_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
int task_id);
@@ -27,5 +30,8 @@ void ConvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const flo
void DeconvDwC8Fp16(float16_t *output_data, const float16_t *input_data, const float16_t *weight_data,
const float16_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
int task_id);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONV_DEPTHWISE_FP16_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_fp16.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_fp16.c View File

@@ -18,14 +18,19 @@
#include "nnacl/fp16/pack_fp16.h"
#include "nnacl/fp16/winograd_transform_fp16.h"


#ifdef __cplusplus
extern "C" {
#endif
#ifdef ENABLE_ARM64
void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weight, float16_t *bias, size_t step,
size_t ic4, size_t oc8, size_t offset, size_t mode, size_t writeC4, size_t relu,
size_t relu6);
#endif
}

#ifdef __cplusplus
}
#endif
#ifndef ENABLE_NEON
void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weight, float16_t *bias, size_t step,
size_t ic4, size_t out_channel, size_t offset, size_t mode, size_t writeC4, size_t relu,
@@ -54,12 +59,15 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
}
}

(output + out_tile_offset)[0] = tmp_out + bias[i];
float16_t *tmp = output + out_tile_offset;
if (bias != NULL) {
tmp[0] = tmp_out + bias[i];
}
if (relu) {
(output + out_tile_offset)[0] = (output + out_tile_offset)[0] < 0 ? 0 : (output + out_tile_offset)[0];
tmp[0] = tmp[0] < 0 ? 0 : tmp[0];
} else if (relu6) {
(output + out_tile_offset)[0] = (output + out_tile_offset)[0] < 0 ? 0 : (output + out_tile_offset)[0];
(output + out_tile_offset)[0] = (output + out_tile_offset)[0] > 6 ? 6 : (output + out_tile_offset)[0];
mp[0] = tmp[0] < 0 ? 0 : tmp[0];
tmp[0] = tmp[0] > 6 ? 6 : tmp[0];
}
}
}

+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/conv_fp16.h View File

@@ -25,6 +25,9 @@ void IndirectGemmFp16_16x8(float16_t *output, float16_t *input, float16_t *weigh
size_t relu6);
#endif

#ifdef __cplusplus
extern "C" {
#endif
// fp16 convolution common (im2col+gemm)
void ConvFp16(float16_t *input_data, float16_t *packed_input, float16_t *packed_weight, float16_t *bias_data,
float16_t *tmp_out_block, float16_t *output_data, int task_id, ConvParameter *conv_param);
@@ -33,6 +36,8 @@ void ConvFp16(float16_t *input_data, float16_t *packed_input, float16_t *packed_
void Conv3x3Fp16(float16_t *input_data, float16_t *transed_weight, const float16_t *bias_data, float16_t *output_data,
float16_t *tile_buffer, float16_t *block_unit_buffer, float16_t *tmp_dst_buffer, float16_t *tmp_out,
int task_id, ConvParameter *conv_param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONV_FP16_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pack_fp16.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pack_fp16.c View File

@@ -15,8 +15,8 @@
*/

#include "nnacl/fp16/pack_fp16.h"
#include <cstring>
#include <cstdlib>
#include <string.h>
#include <stdlib.h>

void Im2ColPackUnitFp16(float16_t *input_data, ConvParameter *conv_param, float16_t *packed_input, int real_cal_num,
int block_index) {

+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pack_fp16.h View File

@@ -23,6 +23,9 @@
#include "nnacl/conv_parameter.h"
#include "nnacl/op_base.h"

#ifdef __cplusplus
extern "C" {
#endif
void Im2ColPackUnitFp16(float16_t *input_data, ConvParameter *conv_param, float16_t *packed_input, int real_cal_num,
int block_index);

@@ -53,5 +56,8 @@ void PackNCHWFp32ToNC8HW8Fp16(float *src, float16_t *dst, int batch, int plane,
void PackNHWCFp32ToNHWC8Fp16(float *src, float16_t *dst, int batch, int plane, int channel);

void PackNHWC8Fp16ToNHWCFp32(float16_t *src, float *dst, int batch, int plane, int channel);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_PACK_FP16_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pooling_fp16.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pooling_fp16.c View File


+ 8
- 2
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/pooling_fp16.h View File

@@ -17,11 +17,17 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_POOLING_FP16_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_POOLING_FP16_H_

#ifdef ENABLE_NEON
#include <arm_neon.h>
#endif
#include "nnacl/pooling_parameter.h"

#ifdef __cplusplus
extern "C" {
#endif
void AvgPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, int task_id);

void MaxPoolingFp16(const float16_t *input_ptr, float16_t *output_ptr, PoolingParameter *pooling_param, int task_id);

#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_POOLING_FP16_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.c View File


+ 7
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/winograd_transform_fp16.h View File

@@ -22,6 +22,10 @@
#include "nnacl/fp16/pack_fp16.h"
#include "nnacl/fp16/conv_fp16.h"

#ifdef __cplusplus
extern "C" {
#endif

// for fp16 convolution 3x3 filter/input/output transform
void Conv3x3Fp16InputUnit(float16_t *tmp_data, float16_t *trans_input_data, size_t step);

@@ -35,5 +39,8 @@ void Conv3x3Fp16OutputUnit(const float16_t *gemm_out, const float16_t *bias_data

void Conv3x3Fp16OutputTransform(const float16_t *gemm_out, float16_t *out_data, const float16_t *bias_data,
int start_index, int real_cal_num, int out_w_block, ConvParameter *conv_param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_WINOGRAD_TRANSFORM_FP16_H_

+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/activation.h View File

@@ -24,7 +24,7 @@
typedef struct ActivationParameter {
OpParameter op_parameter_;
int type_;
float alpha_{0.2};
float alpha_;
} ActivationParameter;

inline int Relu(const float *src, int length, float *dst) {


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arg_min_max.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arg_min_max.c View File

@@ -18,14 +18,13 @@
#include <float.h>

int ArgCompareAscFp32(const void *a, const void *b) {
return reinterpret_cast<const ArgElement *>(a)->data_.f_data_
- reinterpret_cast<const ArgElement *>(b)->data_.f_data_;
return ((ArgElement *)a)->data_.f_data_ - ((ArgElement *)b)->data_.f_data_;
}

int ArgCompareDescFp32(const void *a, const void *b) {
// cmp funtion of qsort must return int type
auto b_value = reinterpret_cast<const ArgElement *>(b)->data_.f_data_;
auto a_value = reinterpret_cast<const ArgElement *>(a)->data_.f_data_;
auto b_value = ((ArgElement *)b)->data_.f_data_;
auto a_value = ((ArgElement *)a)->data_.f_data_;
int res = b_value > a_value ? 1 : -1;
return res;
}

+ 7
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arg_min_max.h View File

@@ -18,6 +18,9 @@

#include "nnacl/arg_min_max_parameter.h"

#ifdef __cplusplus
extern "C" {
#endif
void ArgMax(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
int after_axis_count);
void ArgMin(const float *input, float *output, ArgMinMaxParameter *param, int pre_axis_count, int axis_count,
@@ -30,4 +33,8 @@ void ArgMaxDim2(const float *input, float *output, const int *in_shape, ArgMinMa
void ArgMinDim2(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMaxDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
void ArgMinDim3(const float *input, float *output, const int *in_shape, ArgMinMaxParameter *param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_ARG_MIN_MAX_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic.c View File

@@ -405,7 +405,7 @@ int ElementLogicalAnd(float *input0, float *input1, float *output, int element_s
#ifdef ENABLE_NEON
float32x4_t vtrue = {1, 1, 1, 1};
float32x4_t vfalse = {0, 0, 0, 0};
uint32x4_t mask = vmovq_n_u32((uint32_t(1u << 31) - 1));
uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
uint32x4_t zeros = {0, 0, 0, 0};
#endif

@@ -455,7 +455,7 @@ int ElementLogicalOr(float *input0, float *input1, float *output, int element_si
#ifdef ENABLE_NEON
float32x4_t vtrue = {1, 1, 1, 1};
float32x4_t vfalse = {0, 0, 0, 0};
uint32x4_t mask = vmovq_n_u32((uint32_t(1u << 31) - 1));
uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
uint32x4_t zeros = {0, 0, 0, 0};
#endif

@@ -552,8 +552,8 @@ int BroadcastMinimum(float *input0, float *input1, float *tile_input0, float *ti
}

float FloatNotEqualCheck(float in0, float in1) {
float minus = in0 - in1;
if (minus <= ACCURACY_DATA && minus >= -ACCURACY_DATA) {
float tmp = in0 - in1;
if (tmp <= ACCURACY_DATA && tmp >= -ACCURACY_DATA) {
return (float)false;
}
return (float)true;
@@ -595,8 +595,8 @@ int BroadcastNotEqual(float *input0, float *input1, float *tile_input0, float *t
}

float FloatEqualCheck(float in0, float in1) {
float minus = in0 - in1;
if (minus <= ACCURACY_DATA && minus >= -ACCURACY_DATA) {
float tmp = in0 - in1;
if (tmp <= ACCURACY_DATA && tmp >= -ACCURACY_DATA) {
return (float)true;
}
return (float)false;

+ 7
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic.h View File

@@ -23,6 +23,9 @@
#include "nnacl/arithmetic_common.h"
#include "nnacl/errorcode.h"

#ifdef __cplusplus
extern "C" {
#endif
int ElementMul(float *input0, float *input1, float *output, int element_size);
int ElementMulRelu(float *input0, float *input1, float *output, int element_size);
int ElementMulRelu6(float *input0, float *input1, float *output, int element_size);
@@ -102,5 +105,8 @@ int BroadcastGreater(float *input0, float *input1, float *tile_input0, float *ti
int ElementGreaterEqual(float *input0, float *input1, float *output, int element_size);
int BroadcastGreaterEqual(float *input0, float *input1, float *tile_input0, float *tile_input1, float *output,
int element_size, ArithmeticParameter *param);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ARITHMETIC_H_
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ARITHMETIC_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic_self.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic_self.c View File


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/arithmetic_self.h View File

@@ -23,6 +23,9 @@
#include "nnacl/op_base.h"
#include "nnacl/errorcode.h"

#ifdef __cplusplus
extern "C" {
#endif
int ElementAbs(float *input, float *output, int element_size);

int ElementCos(float *input, float *output, int element_size);
@@ -46,6 +49,8 @@ int ElementRound(float *input, float *output, int element_size);
int ElementFloor(float *input, float *output, int element_size);

int ElementCeil(float *input, float *output, int number);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_ARITHMETIC_SELF_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.c View File

@@ -20,7 +20,7 @@
void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id,
BatchNormParameter *param) {
for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) {
auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_);
float variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_);
for (int u = 0; u < param->unit_; u++) {
output_ptr[u * param->channel_ + c] = (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt;
}
@@ -30,7 +30,7 @@ void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr,
void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr,
const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param) {
for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) {
auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_);
float variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_);
for (int u = 0; u < param->unit_; u++) {
output_ptr[u * param->channel_ + c] =
(input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt * scale_ptr[c] + offest_ptr[c];

+ 8
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h View File

@@ -26,10 +26,18 @@ typedef struct BatchNormParameter {
int channel_;
} BatchNormParameter;

#ifdef __cplusplus
extern "C" {
#endif

void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id,
BatchNormParameter *param);

void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr,
const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param);

#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/broadcast_to.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/broadcast_to.c View File

@@ -68,32 +68,28 @@ int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *outpu
for (int32_t dim1 = 0; dim1 < shape_info->input_shape_[1]; ++dim1) {
for (int32_t dim2 = 0; dim2 < shape_info->input_shape_[2]; ++dim2) {
if (shape_info->input_shape_[3] == shape_info->output_shape_[3]) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1
+ output_dim_offset[2] * dim2,
in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1
+ input_dim_offset[2] * dim2, input_dim_offset[2]);
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + output_dim_offset[2] * dim2,
in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1 + input_dim_offset[2] * dim2,
input_dim_offset[2]);
} else {
for (int32_t dim3 = 0; dim3 < shape_info->output_shape_[3]; ++dim3) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1
+ output_dim_offset[2] * dim2 + dim3 * 4,
in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1
+ input_dim_offset[2] * dim2, 4);
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + output_dim_offset[2] * dim2 +
dim3 * 4,
in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1 + input_dim_offset[2] * dim2, 4);
}
}
}
if (shape_info->input_shape_[2] != shape_info->output_shape_[2]) {
for (int32_t dim2 = 0; dim2 < shape_info->output_shape_[2]; ++dim2) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1
+ dim2 * output_dim_offset[2],
out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1,
output_dim_offset[2]);
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + dim2 * output_dim_offset[2],
out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1, output_dim_offset[2]);
}
}
}
if (shape_info->input_shape_[1] != shape_info->output_shape_[1]) {
for (int32_t dim1 = 0; dim1 < shape_info->output_shape_[1]; ++dim1) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1,
out_base + output_dim_offset[0] * dim0, output_dim_offset[1]);
out_base + output_dim_offset[0] * dim0, output_dim_offset[1]);
}
}
}
@@ -104,5 +100,3 @@ int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *outpu
}
return 0;
}



+ 7
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/broadcast_to.h View File

@@ -36,6 +36,12 @@ typedef struct BroadcastShapeInfo {
int output_shape_size_;
} BroadcastShapeInfo;

#ifdef __cplusplus
extern "C" {
#endif
int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *output);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_BROADCAST_TO_H_
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_BROADCAST_TO_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.c View File


+ 7
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/cast.h View File

@@ -28,10 +28,16 @@ typedef struct CastParameter {
int dst_type_;
} CastParameter;

#ifdef __cplusplus
extern "C" {
#endif
void Uint8ToFloat32(const uint8_t *input, float *output, int number);
void Uint8ToInt8(const uint8_t *input, int8_t *output, int number);
void Int8ToUint8(const int8_t *input, uint8_t *output, int number);
void Int32ToFloat32(const int32_t *input, float *output, int number);
void Float32ToInt32(const float *input, int32_t *output, int number);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_CAST_H_
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_CAST_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.c View File

@@ -71,7 +71,7 @@ void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_p
int src_index = oc_div * size * plane_size + hw * size + oc_mod;
int dst_index = hw * stride + oc;
float value = src_ptr_[src_index];
if (bias_ptr != nullptr) {
if (bias_ptr != NULL) {
value = value + bias_ptr[oc];
}
value = (is_relu || is_relu6) ? (MSMAX(0.f, value)) : (value);
@@ -87,7 +87,7 @@ void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bi
#ifndef ENABLE_ARM64
PostConvFuncComm(c4_out_ptr, out_ptr, bias_ptr, output_channel, plane_size, stride, is_relu, is_relu6, C4NUM);
#else
if (bias_ptr != nullptr) {
if (bias_ptr != NULL) {
if (is_relu) {
C4BiasAddRelu(out_ptr, c4_out_ptr, bias_ptr, output_channel, plane_size, stride * sizeof(float));
} else if (is_relu6) {

+ 6
- 6
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/common_func.h View File

@@ -42,9 +42,9 @@ void MatrixMultiAdd(float *c11, float *c12, float *c21, float *c22, float *x_ptr
void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
size_t in_sw_step, size_t in_kh_step, size_t in_kw_step, size_t relu, size_t relu6);
void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_t height, size_t width,
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
size_t in_sw_step, size_t in_kh_step, size_t in_kw_step);
void DeconvDwFp32Center(float *dst, const float *src, const float *weight, size_t height, size_t width, size_t kernel_h,
size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step, size_t in_sw_step,
size_t in_kh_step, size_t in_kw_step);
#endif

#ifdef ENABLE_ARM64
@@ -53,9 +53,9 @@ void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size);
void BiasAddRelu(const float *bias, float *data, size_t oc4, size_t plan_size);
void Relu6(float *data, size_t element4);
void Relu(float *data, size_t element4);
void C4BiasAdd(float *dst, const float *input, const float* bias, size_t oc, size_t plane_size, size_t stride);
void C4BiasAddRelu(float *dst, const float *input, const float* bias, size_t oc, size_t plane_size, size_t stride);
void C4BiasAddRelu6(float *dst, const float *input, const float* bias, size_t oc, size_t plane_size, size_t stride);
void C4BiasAdd(float *dst, const float *input, const float *bias, size_t oc, size_t plane_size, size_t stride);
void C4BiasAddRelu(float *dst, const float *input, const float *bias, size_t oc, size_t plane_size, size_t stride);
void C4BiasAddRelu6(float *dst, const float *input, const float *bias, size_t oc, size_t plane_size, size_t stride);
void C4Relu(float *dst, const float *input, size_t oc, size_t plane_size, size_t stride);
void C4Relu6(float *dst, const float *input, size_t oc, size_t plane_size, size_t stride);
#endif


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/concat.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/concat.c View File

@@ -28,10 +28,10 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si
after_axis_size *= inputs_output_shape[0][i];
}
int axis_offset = 0;
uint8_t *dst_base = reinterpret_cast<uint8_t *>(output);
uint8_t *dst_base = (output);
size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
for (int i = 0; i < input_num; ++i) {
uint8_t *src_base = reinterpret_cast<uint8_t *>(input[i]);
uint8_t *src_base = (input[i]);
size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
for (int j = 0; j < before_axis_size; ++j) {
uint8_t *src = src_base + j * input_stride;
@@ -41,4 +41,3 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si
axis_offset += inputs_output_shape[i][axis];
}
}


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/concat.h View File

@@ -19,7 +19,12 @@

#include "nnacl/op_base.h"

#ifdef __cplusplus
extern "C" {
#endif
void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_CONCAT_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv.c View File

@@ -288,7 +288,7 @@ void ConvWinogardFp32(float *input_data, float *trans_weight, const float *bias_
tmp_data + task_id * tmp_data_offset, cal_num, out_tile_index, out_w_block, conv_param,
input_trans_func);
// step 3 : gemm
gemm_func(gemm_out + task_id * gemm_out_offset, trans_input + task_id * trans_input_offset, trans_weight, nullptr,
gemm_func(gemm_out + task_id * gemm_out_offset, trans_input + task_id * trans_input_offset, trans_weight, NULL,
input_unit_square, ic4, oc4 * C4NUM, output_offset, 1, 1, 0, 0);

// step 4 : output transform
@@ -380,7 +380,7 @@ void Conv3x3Fp32(float *input_data, float *transed_weight, const float *bias_dat
out_w_block, conv_param);

gemm_func(tmp_dst_buffer + task_id * tmp_dst_buffer_offset, tile_buffer + task_id * tile_buffer_offset,
transed_weight, nullptr, input_unit_square, ic4, oc4 * C4NUM,
transed_weight, NULL, input_unit_square, ic4, oc4 * C4NUM,
oc4 * C4NUM * input_unit_square * sizeof(float), 1, 1, 0, 0);

Conv3x3Fp32OutputTransform(tmp_dst_buffer + task_id * tmp_dst_buffer_offset, nc4hw4_out, bias_data, start_index,

+ 7
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv.h View File

@@ -28,11 +28,14 @@
#include "nnacl/winograd_utils.h"
#include "nnacl/fp32/conv_depthwise.h"

using TmpBufferAddress = float *;
typedef float *TmpBufferAddress;
typedef void (*GEMM_FUNC_FP32)(float *output, const float *input, const float *weight, const float *bias, size_t step,
size_t ic4, size_t output_channel, size_t offset, size_t mode, size_t writeC4,
size_t relu, size_t relu6);

#ifdef __cplusplus
extern "C" {
#endif
void SWBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, int left,
int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding);

@@ -64,5 +67,8 @@ void UnPackWinogradOutput(const float *src, float *dst, int batch, int height, i
// fp32 conv3x3
void Conv3x3Fp32(float *input_data, float *transed_weight, const float *bias_data, float *output_data,
TmpBufferAddress *buffer_list, int task_id, ConvParameter *conv_param, GEMM_FUNC_FP32 gemm_func);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_CONV_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.c View File


+ 8
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/conv_depthwise.h View File

@@ -25,6 +25,10 @@ void DepthwiseCenter(float *dst, const float *src, const float *weight, const fl
int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6);
#endif

#ifdef __cplusplus
extern "C" {
#endif

void InitSlidingParam(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block);

void InitSlidingParamConv(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block);
@@ -49,4 +53,8 @@ void ConvDw3x3Fp32(float *output_data, const float *input_data, const float *wei
void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data,
const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id);

#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_CONV_DEPTHWISE_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.c View File

@@ -91,4 +91,3 @@ void Crop4DNoParallel(const float *input, float *output, const int *in_shape, co
}
}
}


+ 7
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h View File

@@ -20,8 +20,14 @@

#define CROP_OFFSET_MAX_SIZE 4

#ifdef __cplusplus
extern "C" {
#endif
void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param);
void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape,
CropParameter *crop_param);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_CROP_H_
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_CROP_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.c View File


+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/deconv.h View File

@@ -21,6 +21,9 @@
#include "nnacl/conv_parameter.h"
#include "nnacl/fp32/strassen_matmul.h"

#ifdef __cplusplus
extern "C" {
#endif
void PackDeConvWeightFp32(const float *weight, float *dst, int input_channel, int output_channel, int plane);

int DeConvFp32(const float *input, const float *weight, float *output, float *tmp_buffer,
@@ -30,5 +33,8 @@ int DeConvPostFp32C4(const float *src, float *tmp_c4, float *dst, const float *b
int input_plane, int kernel_plane, int output_plane, ConvParameter *conv_param);
int DeConvPostFp32C8x8(const float *src, float *tmp_out, const float *bias, float *dst, int output_channel,
ConvParameter *conv_param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_DECONV_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/elu.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/elu.c View File

@@ -16,9 +16,7 @@

#include "nnacl/fp32/elu.h"
#include <math.h>
#include "include/errorcode.h"
#include "nnacl/errorcode.h"
#include "mindspore/core/utils/log_adapter.h"

void Calculate_Data(float *input_data, float *output_data, int num, EluParameter *parameter) {
output_data[num] = input_data[num] < 0 ? parameter->alpha_ * expm1(input_data[num]) : input_data[num];

+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/elu.h View File

@@ -26,6 +26,12 @@ typedef struct EluParameter {
int in_size_;
} EluParameter;

#ifdef __cplusplus
extern "C" {
#endif
int Elu(float *input_data, float *output_data, EluParameter *parameter, int task_id);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_ELU_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/embedding_lookup.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/embedding_lookup.c View File

@@ -16,9 +16,7 @@

#include "nnacl/fp32/embedding_lookup.h"
#include <string.h>
#include "include/errorcode.h"
#include "nnacl/errorcode.h"
#include "mindspore/core/utils/log_adapter.h"

void l2_regulate(float *data, int size, float max_norm) {
float sum = 0;
@@ -35,7 +33,6 @@ void l2_regulate(float *data, int size, float max_norm) {

int CopyData(float *input_data, int *ids, float *output_data, int num, EmbeddingLookupParameter *parameter) {
if (ids[num] >= parameter->layer_num_ || ids[num] < 0) {
MS_LOG(ERROR) << "Embedding lookup index out of range";
return NNACL_ERRCODE_INDEX_OUT_OF_RANGE;
}
float *out_data = output_data + num * parameter->layer_size_;

+ 13
- 7
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/embedding_lookup.h View File

@@ -20,15 +20,21 @@
#include "nnacl/op_base.h"

typedef struct EmbeddingLookupParameter {
OpParameter op_parameter_;
bool *is_regulated_;
float max_norm_;
int ids_size_;
int layer_size_;
int layer_num_;
int thread_num;
OpParameter op_parameter_;
bool *is_regulated_;
float max_norm_;
int ids_size_;
int layer_size_;
int layer_num_;
int thread_num;
} EmbeddingLookupParameter;

#ifdef __cplusplus
extern "C" {
#endif
int EmbeddingLookup(float *input_data, int *ids, float *output_data, EmbeddingLookupParameter *parameter, int task_id);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_EMBEDDING_LOOKUP_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/expandDims.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/expandDims.c View File

@@ -22,4 +22,3 @@ int ExpandDims(float *input_ptr, float *output_ptr, size_t data_size) {
memcpy(output_ptr, input_ptr, data_size);
return NNACL_OK;
}


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/expandDims.h View File

@@ -24,7 +24,12 @@ typedef struct ExpandDimsParameter {
int dim_;
} ExpandDimsParameter;

#ifdef __cplusplus
extern "C" {
#endif
int ExpandDims(float *input_ptr, float *output_ptr, size_t data_size);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_EXPANDDIMS_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/fill.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/fill.c View File

@@ -22,4 +22,3 @@ int Fill(float *output, int size, float data) {
}
return NNACL_OK;
}


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/fill.h View File

@@ -30,7 +30,12 @@ typedef struct FillParameter {
int num_dims_;
} FillParameter;

#ifdef __cplusplus
extern "C" {
#endif
int Fill(float *output, int size, float data);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FILL_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gather.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gather.c View File

@@ -29,8 +29,8 @@ int Gather(float *input, int outer_size, int inner_size, int limit, int *indices
float *output) {
int i, m;
for (m = 0; m < outer_size; ++m) {
auto inputm = input + inner_size * m * limit;
auto outputm = output + inner_size * m * indices_element_size;
float *inputm = input + inner_size * m * limit;
float *outputm = output + inner_size * m * indices_element_size;
for (i = 0; i < indices_element_size; ++i) {
if (indices[i] < 0 || indices[i] > limit) {
return -1;
@@ -40,4 +40,3 @@ int Gather(float *input, int outer_size, int inner_size, int limit, int *indices
}
return 0;
}


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gather.h View File

@@ -25,8 +25,13 @@ typedef struct GatherParameter {
int batchDims_;
} GatherParameter;

#ifdef __cplusplus
extern "C" {
#endif
int Gather(float *input, int outer_size, int inner_size, int limit, int *indices, int indices_element_size,
float *output);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_GATHER_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gatherNd.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gatherNd.c View File

@@ -25,4 +25,3 @@ int GatherNd(float *input, float *output, int *in_offset, int area, int count) {
}
return NNACL_OK;
}


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/gatherNd.h View File

@@ -24,7 +24,12 @@ typedef struct GatherNdParameter {
int batchDims_;
} GatherNdParameter;

#ifdef __cplusplus
extern "C" {
#endif
int GatherNd(float *input, float *output, int *in_offset, int area, int count);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_GATHERND_H_


mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/local_response_norm.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/local_response_norm.c View File


+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/local_response_norm.h View File

@@ -27,7 +27,13 @@ typedef struct LocalResponseNormParameter {
float beta_;
} LocalResponseNormParameter;

#ifdef __cplusplus
extern "C" {
#endif
int LocalResponseNorm(float *input_ptr, int out_size, int channel, float *output_ptr,
LocalResponseNormParameter *param);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_LOCAL_RESPONSE_NORM_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/lstm.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/lstm.c View File


+ 6
- 0
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/lstm.h View File

@@ -30,7 +30,13 @@ typedef struct LstmParameter {
bool bidirectional_;
} LstmParameter;

#ifdef __cplusplus
extern "C" {
#endif
void Lstm(float *output, const float *input, const float *weight_i, const float *weight_h, const float *bias,
float *hidden_state, float *cell_state, float *gate_buffer, LstmParameter *lstm_parm);
#ifdef __cplusplus
}
#endif

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_LSTM_H_

mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/matmul.cc → mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/matmul.c View File

@@ -235,7 +235,7 @@ void MatMul8x8(const float *a, const float *b, float *c, const float *bias, ActT
size_t bi = c8div * deep * 8 + d * 8 + c8mod;
value = value + a[ai] * b[bi];
}
if (bias != nullptr) value += bias[col];
if (bias != NULL) value += bias[col];
if (act_type == ActType_Relu6) value = MSMIN(6.0f, value);
if (act_type != ActType_No) value = MSMAX(0.0f, value);
c[ci] = value;

+ 3
- 5
mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/matmul.h View File

@@ -23,15 +23,13 @@
#include "nnacl/op_base.h"
#include "nnacl/matmul_parameter.h"

#ifdef __cplusplus
extern "C" {
#endif
void MatMul(const float *a, const float *b, float *c, const float *bias, ActType act_type, int depth, int row, int col);
void RowMajor2Row8Major(float *src_ptr, float *dst_ptr, int row, int col);
void RowMajor2Col8Major(float *src_ptr, float *dst_ptr, size_t row, size_t col);
void Row8x8Major2RowMajor(float *src_ptr, float *dst_ptr, size_t row, size_t col, size_t stride);
void MatMul8x8(const float *a, const float *b, float *c, const float *bias, float maxf, float minf, int deep,
int row_8_, int col_8_);
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __aarch64__
void MatmulFloatNeon64(const float *a, const float *b, float *c, const float *bias, int act_type, int depth, int row,
int col);


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save