Browse Source

IR Unify

tags/v1.2.0-rc1
chenjianping 4 years ago
parent
commit
7108df0d2c
100 changed files with 2118 additions and 675 deletions
  1. +1
    -0
      mindspore/lite/CMakeLists.txt
  2. +3
    -3
      mindspore/lite/include/model.h
  3. +1
    -1
      mindspore/lite/include/version.h
  4. +3
    -9
      mindspore/lite/micro/CMakeLists.txt
  5. +183
    -17
      mindspore/lite/micro/cmake/file_list.cmake
  6. +21
    -0
      mindspore/lite/micro/cmake/package_cmsis.cmake
  7. +0
    -32
      mindspore/lite/micro/cmake/package_micro_ops.cmake
  8. +20
    -0
      mindspore/lite/micro/cmake/package_nnacl.cmake
  9. +25
    -0
      mindspore/lite/micro/cmake/package_wrapper.cmake
  10. +0
    -12
      mindspore/lite/micro/cmake/wrapper.cmake
  11. +14
    -9
      mindspore/lite/micro/coder/CMakeLists.txt
  12. +3
    -5
      mindspore/lite/micro/coder/allocator/allocator.cc
  13. +3
    -3
      mindspore/lite/micro/coder/allocator/allocator.h
  14. +12
    -7
      mindspore/lite/micro/coder/coder.cc
  15. +6
    -6
      mindspore/lite/micro/coder/coder_config.h
  16. +3
    -3
      mindspore/lite/micro/coder/context.cc
  17. +1
    -15
      mindspore/lite/micro/coder/generator/component/benchmark_component.cc
  18. +0
    -2
      mindspore/lite/micro/coder/generator/component/benchmark_component.h
  19. +4
    -5
      mindspore/lite/micro/coder/generator/component/cmake_component.cc
  20. +4
    -6
      mindspore/lite/micro/coder/generator/component/common_component.cc
  21. +5
    -5
      mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.h
  22. +6
    -6
      mindspore/lite/micro/coder/generator/component/const_blocks/debug_utils.h
  23. +4
    -4
      mindspore/lite/micro/coder/generator/component/const_blocks/license.h
  24. +6
    -6
      mindspore/lite/micro/coder/generator/component/const_blocks/load_input.h
  25. +6
    -18
      mindspore/lite/micro/coder/generator/component/const_blocks/micro_tensor.h
  26. +99
    -0
      mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.h
  27. +61
    -0
      mindspore/lite/micro/coder/generator/component/parallel_component.cc
  28. +35
    -0
      mindspore/lite/micro/coder/generator/component/parallel_component.h
  29. +8
    -8
      mindspore/lite/micro/coder/generator/component/weight_component.cc
  30. +12
    -8
      mindspore/lite/micro/coder/generator/generator.cc
  31. +14
    -5
      mindspore/lite/micro/coder/generator/inference/inference_generator.cc
  32. +1
    -1
      mindspore/lite/micro/coder/generator/inference/inference_generator.h
  33. +1
    -1
      mindspore/lite/micro/coder/generator/train/train_generator.cc
  34. +1
    -1
      mindspore/lite/micro/coder/generator/train/train_generator.h
  35. +8
    -2
      mindspore/lite/micro/coder/graph.cc
  36. +36
    -9
      mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc
  37. +13
    -9
      mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h
  38. +153
    -0
      mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.cc
  39. +54
    -0
      mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.h
  40. +1
    -1
      mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.cc
  41. +1
    -1
      mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.h
  42. +6
    -2
      mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.cc
  43. +3
    -2
      mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.h
  44. +46
    -35
      mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.cc
  45. +3
    -5
      mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.h
  46. +3
    -3
      mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.cc
  47. +4
    -4
      mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.h
  48. +104
    -0
      mindspore/lite/micro/coder/opcoders/base/resize_base_coder.cc
  49. +49
    -0
      mindspore/lite/micro/coder/opcoders/base/resize_base_coder.h
  50. +1
    -1
      mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.cc
  51. +1
    -3
      mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.h
  52. +3
    -3
      mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc
  53. +24
    -12
      mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
  54. +0
    -4
      mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc
  55. +2
    -2
      mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.h
  56. +2
    -2
      mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc
  57. +5
    -7
      mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc
  58. +2
    -2
      mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc
  59. +143
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/dequant/de_quant.cc
  60. +63
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/dequant/de_quant.h
  61. +5
    -5
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
  62. +6
    -9
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
  63. +18
    -17
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc
  64. +4
    -4
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h
  65. +4
    -4
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.h
  66. +2
    -2
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc
  67. +5
    -7
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
  68. +6
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.h
  69. +77
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
  70. +43
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.h
  71. +4
    -7
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
  72. +1
    -1
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h
  73. +81
    -42
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc
  74. +8
    -6
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.h
  75. +9
    -8
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
  76. +1
    -1
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h
  77. +51
    -25
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
  78. +1
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h
  79. +5
    -2
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_coder.cc
  80. +6
    -6
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc
  81. +8
    -16
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc
  82. +1
    -1
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h
  83. +2
    -2
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/power_fp32_coder.cc
  84. +2
    -2
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
  85. +18
    -25
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
  86. +0
    -74
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc
  87. +0
    -37
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/slice_fp32_coder.h
  88. +3
    -3
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
  89. +1
    -1
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h
  90. +57
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/splice_fp32_coder.cc
  91. +35
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/splice_fp32_coder.h
  92. +2
    -2
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc
  93. +5
    -5
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
  94. +8
    -8
      mindspore/lite/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h
  95. +74
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/int8/activation_int8_coder.cc
  96. +37
    -27
      mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.cc
  97. +5
    -7
      mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.h
  98. +162
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/int8/batchnorm_int8_coder.cc
  99. +49
    -0
      mindspore/lite/micro/coder/opcoders/nnacl/int8/batchnorm_int8_coder.h
  100. +16
    -12
      mindspore/lite/micro/coder/opcoders/nnacl/int8/concat_int8_coder.cc

+ 1
- 0
mindspore/lite/CMakeLists.txt View File

@@ -72,6 +72,7 @@ add_compile_definitions(NO_DLIB)
add_compile_options(-fPIC)

if(SUPPORT_TRAIN)
set(BUILD_MINDDATA "full")
if(PLATFORM_ARM64)
set(RUNTIME_COMPONENT_NAME train-android-aarch64)
elseif(PLATFORM_ARM32)


+ 3
- 3
mindspore/lite/include/model.h View File

@@ -19,14 +19,14 @@
#include "include/lite_utils.h"

namespace mindspore::lite {
class PrimitiveC;
struct MS_API Model {
struct Node {
String name_;
NodeType node_type_;
PrimitiveC *primitive_;
const void *primitive_;
Uint32Vector input_indices_;
Uint32Vector output_indices_;
int quant_type_;
};
using NodePtrVector = std::vector<Node *>;
struct SubGraph {
@@ -55,7 +55,7 @@ struct MS_API Model {
/// \brief Free meta graph temporary buffer
virtual void Free() = 0;

/// \brief Free all temporay buffer.EG: nodes in the model.
/// \brief Free all temporary buffer.EG: nodes in the model.
virtual void Destroy() = 0;

/// \brief Model destruct, free all memory


+ 1
- 1
mindspore/lite/include/version.h View File

@@ -22,7 +22,7 @@
namespace mindspore {
namespace lite {
const int ms_version_major = 1;
const int ms_version_minor = 1;
const int ms_version_minor = 2;
const int ms_version_revision = 0;

/// \brief Global method to get a version string.


+ 3
- 9
mindspore/lite/micro/CMakeLists.txt View File

@@ -9,16 +9,10 @@ include_directories(${CMAKE_BINARY_DIR})
include(${TOP_DIR}/cmake/utils.cmake)
include(${TOP_DIR}/cmake/dependency_utils.cmake)
include(${TOP_DIR}/cmake/dependency_securec.cmake)
include(${TOP_DIR}/cmake/external_libs/glog.cmake)
include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake)
include(${TOP_DIR}/cmake/external_libs/cmsis.cmake)

set(FBS_FILES
${CMAKE_CURRENT_SOURCE_DIR}/../schema/model.fbs
${CMAKE_CURRENT_SOURCE_DIR}/../schema/ops.fbs
${CMAKE_CURRENT_SOURCE_DIR}/../schema/model_v0.fbs
${CMAKE_CURRENT_SOURCE_DIR}/../schema/ops_v0.fbs
)

file(GLOB FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/../schema/*.fbs)
ms_build_flatbuffers_lite(FBS_FILES
${CMAKE_CURRENT_SOURCE_DIR}/../schema/
fbs_src
@@ -50,6 +44,6 @@ if(ENABLE_ASAN)
endif()

add_subdirectory(coder)
if(${BUILD_TESTCASES})
if(BUILD_TESTCASES)
add_subdirectory(test)
endif()

+ 183
- 17
mindspore/lite/micro/cmake/file_list.cmake View File

@@ -5,6 +5,9 @@ set(CODER_SRC
${MICRO_DIR}/coder/graph.cc
${MICRO_DIR}/coder/session.cc
${MICRO_DIR}/coder/train.cc
${MICRO_DIR}/coder/utils/coder_utils.cc
${MICRO_DIR}/coder/utils/dir_utils.cc
${MICRO_DIR}/coder/utils/type_cast.cc
)

set(CODER_ALLOCATOR_SRC
@@ -21,6 +24,11 @@ set(CODER_GENERATOR_SRC
${MICRO_DIR}/coder/generator/component/weight_component.cc
${MICRO_DIR}/coder/generator/component/cmake_component.cc
${MICRO_DIR}/coder/generator/component/train_component.cc
${MICRO_DIR}/coder/generator/component/parallel_component.cc
)

set(MINDSPORE_CORE
${TOP_DIR}/mindspore/core/gvar/logging_level.cc
)

set(CODER_OPCODERS_SRC
@@ -28,16 +36,20 @@ set(CODER_OPCODERS_SRC
${MICRO_DIR}/coder/opcoders/op_coder.cc
${MICRO_DIR}/coder/opcoders/op_coder_builder.cc
${MICRO_DIR}/coder/opcoders/op_coder_register.cc
${MICRO_DIR}/coder/opcoders/parallel.cc
#### serializer
${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.cc
${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_stream_utils.cc
#### base coder
${MICRO_DIR}/coder/opcoders/base/conv2d_base_coder.cc
${MICRO_DIR}/coder/opcoders/base/dtype_cast_coder.cc
${MICRO_DIR}/coder/opcoders/base/full_connection_base_coder.cc
${MICRO_DIR}/coder/opcoders/base/quant_dtype_cast_coder.cc
${MICRO_DIR}/coder/opcoders/base/reduce_base_coder.cc
${MICRO_DIR}/coder/opcoders/base/resize_base_coder.cc
${MICRO_DIR}/coder/opcoders/base/softmax_base_coder.cc
${MICRO_DIR}/coder/opcoders/base/detection_post_process_base_coder.cc
#### cmsis int8 coder
${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc
${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc
@@ -55,6 +67,7 @@ set(CODER_OPCODERS_SRC
${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
@@ -64,21 +77,20 @@ set(CODER_OPCODERS_SRC
${MICRO_DIR}/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/matmul_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/nchw2nhwc_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/nhwc2nchw_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/reshape_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/squeeze_dims_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
#### nnacl int8 coder
${MICRO_DIR}/coder/opcoders/nnacl/int8/activation_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/add_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/batchnorm_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/concat_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/fullconnection_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/matmul_int8_coder.cc
@@ -87,40 +99,69 @@ set(CODER_OPCODERS_SRC
${MICRO_DIR}/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/deconvolution_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/resize_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
)

set(CODER_UTILS_SRC
${MICRO_DIR}/coder/utils/coder_utils.cc
${MICRO_DIR}/coder/utils/dir_utils.cc
${MICRO_DIR}/coder/utils/type_cast.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/sub_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/detection_post_process_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/sigmoid_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/relux_int8_coder.cc
${MICRO_DIR}/coder/opcoders/nnacl/int8/div_int8_coder.cc
#### nnacl dequant coder
${MICRO_DIR}/coder/opcoders/nnacl/dequant/de_quant.cc
)

set(LITE_SRC
${LITE_DIR}/src/common/file_utils.cc
${LITE_DIR}/src/common/graph_util.cc
${LITE_DIR}/src/common/string_util.cc
${LITE_DIR}/src/common/prim_util.cc
${LITE_DIR}/src/common/tensor_util.cc
${LITE_DIR}/src/runtime/allocator.cc
${LITE_DIR}/src/runtime/infer_manager.cc
${LITE_DIR}/src/runtime/runtime_api.cc
${LITE_DIR}/src/lite_model.cc
${LITE_DIR}/src/tensorlist.cc
${LITE_DIR}/src/tensor.cc
${LITE_DIR}/src/scheduler.cc
${LITE_DIR}/src/inner_context.cc
${LITE_DIR}/src/dequant.cc
${LITE_DIR}/src/kernel_registry.cc
${LITE_DIR}/src/lite_kernel.cc
${LITE_DIR}/src/sub_graph_kernel.cc
${LITE_DIR}/src/huffman_decode.cc
${LITE_DIR}/src/executor.cc
${LITE_DIR}/src/common/log_adapter.cc
### src/ops for parameter and infer shape
${LITE_DIR}/src/ops/batch_norm.cc
${LITE_DIR}/src/ops/conv2d.cc
${LITE_DIR}/src/ops/primitive_c.cc
${LITE_DIR}/src/ops/slice.cc
${LITE_DIR}/src/ops/while.cc
${LITE_DIR}/src/common/utils.cc
### populate operator parameter
${LITE_DIR}/src/ops/populate/conv2d_populate.cc
${LITE_DIR}/src/ops/populate/arithmetic_populate.cc
${LITE_DIR}/src/ops/populate/add_populate.cc
${LITE_DIR}/src/ops/populate/concat_populate.cc
${LITE_DIR}/src/ops/populate/conv2d_populate.cc
${LITE_DIR}/src/ops/populate/detection_post_process_populate.cc
${LITE_DIR}/src/ops/populate/depthwise_conv2d_populate.cc
${LITE_DIR}/src/ops/populate/full_connection_populate.cc
${LITE_DIR}/src/ops/populate/pooling_populate.cc
${LITE_DIR}/src/ops/populate/quant_dtype_cast_populate.cc
${LITE_DIR}/src/ops/populate/resize_populate.cc
${LITE_DIR}/src/ops/populate/reshape_populate.cc
${LITE_DIR}/src/ops/populate/batch_norm_populate.cc
${LITE_DIR}/src/ops/populate/slice_populate.cc
${LITE_DIR}/src/ops/populate/while_populate.cc
${LITE_DIR}/src/ops/populate/matmul_populate.cc
${LITE_DIR}/src/ops/populate/bias_add_populate.cc
${LITE_DIR}/src/ops/populate/activation_populate.cc
### tools
${LITE_DIR}/tools/common/flag_parser.cc
)
set(LITE_KERNEL_SRC
### nnacl
${LITE_DIR}/nnacl/common_func.c
${LITE_DIR}/nnacl/base/minimal_filtering_generator.c
${LITE_DIR}/nnacl/base/arithmetic_base.c
${LITE_DIR}/nnacl/base/slice_base.c
${LITE_DIR}/nnacl/fp32/winograd_utils.c
${LITE_DIR}/nnacl/fp32/pack_fp32.c
${LITE_DIR}/nnacl/int8/quantize.c
@@ -128,13 +169,138 @@ set(LITE_KERNEL_SRC
${LITE_DIR}/nnacl/int8/matmul_int8.c
${LITE_DIR}/nnacl/int8/fixed_point.c
${LITE_DIR}/nnacl/fp32/matmul_fp32.c
${LITE_DIR}/nnacl/int8/arithmetic_int8.c
${LITE_DIR}/nnacl/int8/add_int8.c
${LITE_DIR}/nnacl/int8/concat_int8.c
${LITE_DIR}/nnacl/int8/conv_int8.c
${LITE_DIR}/nnacl/int8/conv3x3_int8.c
${LITE_DIR}/nnacl/int8/conv1x1_int8.c
${LITE_DIR}/nnacl/base/conv1x1_base.c
${LITE_DIR}/nnacl/int8/conv_depthwise_int8.c
${LITE_DIR}/nnacl/int8/deconv_int8.c
${LITE_DIR}/nnacl/int8/common_func_int8.c
${LITE_DIR}/nnacl/int8/slice_int8.c
${LITE_DIR}/nnacl/int8/batchnorm_int8.c
${LITE_DIR}/nnacl/int8/sub_int8.c
${LITE_DIR}/nnacl/int8/quant_dtype_cast_int8.c
${LITE_DIR}/nnacl/int8/sigmoid_int8.c
${LITE_DIR}/nnacl/int8/resize_int8.c
### infer
${LITE_DIR}/nnacl/infer/adam_infer.c
${LITE_DIR}/nnacl/infer/add_sub_grad_infer.c
${LITE_DIR}/nnacl/infer/addn_infer.c
${LITE_DIR}/nnacl/infer/apply_momentum_infer.c
${LITE_DIR}/nnacl/infer/argmin_max_infer.c
${LITE_DIR}/nnacl/infer/arithmetic_compare_infer.c
${LITE_DIR}/nnacl/infer/arithmetic_grad_infer.c
${LITE_DIR}/nnacl/infer/arithmetic_infer.c
${LITE_DIR}/nnacl/infer/assert_op_infer.c
${LITE_DIR}/nnacl/infer/assign_add_infer.c
${LITE_DIR}/nnacl/infer/assign_infer.c
${LITE_DIR}/nnacl/infer/audio_spectrogram_infer.c
${LITE_DIR}/nnacl/infer/batch_to_space_infer.c
${LITE_DIR}/nnacl/infer/bias_grad_infer.c
${LITE_DIR}/nnacl/infer/binary_cross_entropy_infer.c
${LITE_DIR}/nnacl/infer/bn_grad_infer.c
${LITE_DIR}/nnacl/infer/broadcast_to_infer.c
${LITE_DIR}/nnacl/infer/cast_infer.c
${LITE_DIR}/nnacl/infer/common_infer.c
${LITE_DIR}/nnacl/infer/concat_infer.c
${LITE_DIR}/nnacl/infer/constant_of_shape_infer.c
${LITE_DIR}/nnacl/infer/conv2d_grad_filter_infer.c
${LITE_DIR}/nnacl/infer/conv2d_grad_input_infer.c
${LITE_DIR}/nnacl/infer/conv2d_infer.c
${LITE_DIR}/nnacl/infer/crop_and_resize_infer.c
${LITE_DIR}/nnacl/infer/crop_infer.c
${LITE_DIR}/nnacl/infer/custom_extract_features_infer.c
${LITE_DIR}/nnacl/infer/custom_normalize_infer.c
${LITE_DIR}/nnacl/infer/custom_predict_infer.c
${LITE_DIR}/nnacl/infer/deconv2d_infer.c
${LITE_DIR}/nnacl/infer/dedepthwise_conv2d_infer.c
${LITE_DIR}/nnacl/infer/depth_to_space_infer.c
${LITE_DIR}/nnacl/infer/depthwise_conv2d_infer.c
${LITE_DIR}/nnacl/infer/detection_post_process_infer.c
${LITE_DIR}/nnacl/infer/dropout_grad_infer.c
${LITE_DIR}/nnacl/infer/dropout_infer.c
${LITE_DIR}/nnacl/infer/embedding_lookup_infer.c
${LITE_DIR}/nnacl/infer/expand_dims_infer.c
${LITE_DIR}/nnacl/infer/fft_imag_infer.c
${LITE_DIR}/nnacl/infer/fft_real_infer.c
${LITE_DIR}/nnacl/infer/fill_infer.c
${LITE_DIR}/nnacl/infer/flatten_grad_infer.c
${LITE_DIR}/nnacl/infer/flatten_infer.c
${LITE_DIR}/nnacl/infer/full_connection_infer.c
${LITE_DIR}/nnacl/infer/fused_batchnorm_infer.c
${LITE_DIR}/nnacl/infer/gather_infer.c
${LITE_DIR}/nnacl/infer/gather_nd_infer.c
${LITE_DIR}/nnacl/infer/group_conv2d_grad_input_infer.c
${LITE_DIR}/nnacl/infer/gru_infer.c
${LITE_DIR}/nnacl/infer/hashtable_lookup_infer.c
${LITE_DIR}/nnacl/infer/invert_permutation_infer.c
${LITE_DIR}/nnacl/infer/layer_norm_infer.c
${LITE_DIR}/nnacl/infer/lin_space_infer.c
${LITE_DIR}/nnacl/infer/lsh_projection_infer.c
${LITE_DIR}/nnacl/infer/lstm_infer.c
${LITE_DIR}/nnacl/infer/matmul_infer.c
${LITE_DIR}/nnacl/infer/maximum_grad_infer.c
${LITE_DIR}/nnacl/infer/mean_infer.c
${LITE_DIR}/nnacl/infer/merge_infer.c
${LITE_DIR}/nnacl/infer/mfcc_infer.c
${LITE_DIR}/nnacl/infer/non_max_suppression_infer.c
${LITE_DIR}/nnacl/infer/one_hot_infer.c
${LITE_DIR}/nnacl/infer/pad_infer.c
${LITE_DIR}/nnacl/infer/partial_infer.c
${LITE_DIR}/nnacl/infer/pooling_grad_infer.c
${LITE_DIR}/nnacl/infer/pooling_infer.c
${LITE_DIR}/nnacl/infer/power_infer.c
${LITE_DIR}/nnacl/infer/prior_box_infer.c
${LITE_DIR}/nnacl/infer/quant_dtype_cast_infer.c
${LITE_DIR}/nnacl/infer/random_standard_normal_infer.c
${LITE_DIR}/nnacl/infer/range_infer.c
${LITE_DIR}/nnacl/infer/rank_infer.c
${LITE_DIR}/nnacl/infer/reduce_infer.c
${LITE_DIR}/nnacl/infer/reshape_infer.c
${LITE_DIR}/nnacl/infer/resize_infer.c
${LITE_DIR}/nnacl/infer/rfft_infer.c
${LITE_DIR}/nnacl/infer/roi_pooling_infer.c
${LITE_DIR}/nnacl/infer/scatter_nd_infer.c
${LITE_DIR}/nnacl/infer/select_infer.c
${LITE_DIR}/nnacl/infer/sgd_infer.c
${LITE_DIR}/nnacl/infer/shape_infer.c
${LITE_DIR}/nnacl/infer/size_infer.c
${LITE_DIR}/nnacl/infer/skip_gram_infer.c
${LITE_DIR}/nnacl/infer/slice_infer.c
${LITE_DIR}/nnacl/infer/softmax_cross_entropy_infer.c
${LITE_DIR}/nnacl/infer/softmax_infer.c
${LITE_DIR}/nnacl/infer/space_to_batch_infer.c
${LITE_DIR}/nnacl/infer/space_to_batch_nd_infer.c
${LITE_DIR}/nnacl/infer/space_to_depth_infer.c
${LITE_DIR}/nnacl/infer/sparse_softmax_cross_entropy_infer.c
${LITE_DIR}/nnacl/infer/sparse_to_dense_infer.c
${LITE_DIR}/nnacl/infer/split_infer.c
${LITE_DIR}/nnacl/infer/squeeze_infer.c
${LITE_DIR}/nnacl/infer/stack_infer.c
${LITE_DIR}/nnacl/infer/strided_slice_grad_infer.c
${LITE_DIR}/nnacl/infer/strided_slice_infer.c
${LITE_DIR}/nnacl/infer/switch_infer.c
${LITE_DIR}/nnacl/infer/tensorlist_fromtensor_infer.c
${LITE_DIR}/nnacl/infer/tensorlist_getitem_infer.c
${LITE_DIR}/nnacl/infer/tensorlist_reserve_infer.c
${LITE_DIR}/nnacl/infer/tensorlist_setitem_infer.c
${LITE_DIR}/nnacl/infer/tensorlist_stack_infer.c
${LITE_DIR}/nnacl/infer/tile_infer.c
${LITE_DIR}/nnacl/infer/topk_infer.c
${LITE_DIR}/nnacl/infer/transpose_infer.c
${LITE_DIR}/nnacl/infer/uniform_real_infer.c
${LITE_DIR}/nnacl/infer/unique_infer.c
${LITE_DIR}/nnacl/infer/unsorted_segment_sum_infer.c
${LITE_DIR}/nnacl/infer/unsqueeze_infer.c
${LITE_DIR}/nnacl/infer/unstack_infer.c
${LITE_DIR}/nnacl/infer/where_infer.c
${LITE_DIR}/nnacl/infer/while_infer.c
${LITE_DIR}/nnacl/infer/splice_infer.c
)

list(APPEND FILE_SET ${CODER_SRC} ${CODER_UTILS_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC}
${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC})
list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC}
${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE})


+ 21
- 0
mindspore/lite/micro/cmake/package_cmsis.cmake View File

@@ -0,0 +1,21 @@
set(CMSIS_DIR ${LITE_DIR}/micro/build/cmsis)
if(MICRO_CMSIS_X86)
message("build cmsis kernels")
include_directories(${CMSIS_DIR}/CMSIS/Core/Include)
include_directories(${CMSIS_DIR}/CMSIS/DSP/Include)
include_directories(${CMSIS_DIR}/CMSIS/NN/Include)

file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c)

file(GLOB CMSIS_OPS
${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c
)
endif()

+ 0
- 32
mindspore/lite/micro/cmake/package_micro_ops.cmake View File

@@ -1,32 +0,0 @@
include_directories(${NNACL_DIR}/..)

set(CMSIS_SRC ${NNACL_DIR}/../micro/build/cmsis)
if(MICRO_CMSIS_X86)
message("*****build cmsis x86 codes****")
include_directories(${CMSIS_SRC}/CMSIS/Core/Include)
include_directories(${CMSIS_SRC}/CMSIS/DSP/Include)
include_directories(${CMSIS_SRC}/CMSIS/NN/Include)
file(GLOB RUNTIME_KERNEL_CMSIS_SRC
${CMSIS_SRC}/CMSIS/NN/Source/BasicMathFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/ActivationFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/ConcatenationFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/ConvolutionFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/FullyConnectedFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/NNSupportFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/PoolingFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/ReshapeFunctions/*.c
${CMSIS_SRC}/CMSIS/NN/Source/SoftmaxFunctions/*.c
)
endif()

########################### files ###########################
file(GLOB RUNTIME_KERNEL_SRC
${NNACL_DIR}/kernel/fp32/*.c
${NNACL_DIR}/kernel/int8/*.c
)
if(MICRO_CMSIS_X86)
set(RUNTIME_OPS ${RUNTIME_KERNEL_SRC} ${RUNTIME_TRAIN_SRC} ${RUNTIME_KERNEL_CMSIS_SRC})
else()
set(RUNTIME_OPS ${RUNTIME_KERNEL_SRC} ${RUNTIME_TRAIN_SRC})
endif()


+ 20
- 0
mindspore/lite/micro/cmake/package_nnacl.cmake View File

@@ -0,0 +1,20 @@
include_directories(${LITE_DIR})
set(NNACL_DIR ${LITE_DIR}/nnacl)
file(GLOB KERNEL_SRC
${NNACL_DIR}/*.c
${NNACL_DIR}/base/*.c
${NNACL_DIR}/fp32/*.c
${NNACL_DIR}/int8/*.c
)

if(MICRO_BUILD_ARM64)
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
endif()

if(MICRO_BUILD_ARM32A)
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm32/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
endif()

set(NNACL_OPS ${KERNEL_SRC} ${ASSEMBLY_SRC})

+ 25
- 0
mindspore/lite/micro/cmake/package_wrapper.cmake View File

@@ -0,0 +1,25 @@
include_directories(${LITE_DIR}/micro/coder/operator_library)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
set(WRAPPER_DIR ${LITE_DIR}/micro/coder/operator_library/wrapper/)

set(RUNTIME_SRC
${LITE_DIR}/src/runtime/thread_pool.c
)

set(WRAPPER_SRC
${WRAPPER_DIR}/base/detection_post_process_base_wrapper.c
${WRAPPER_DIR}/fp32/matmul_fp32_wrapper.c
${WRAPPER_DIR}/int8/matmul_int8_wrapper.c
${WRAPPER_DIR}/int8/add_int8_wrapper.c
${WRAPPER_DIR}/int8/concat_int8_wrapper.c
${WRAPPER_DIR}/int8/convolution_int8_wrapper.c
${WRAPPER_DIR}/int8/conv_init_int8_wrapper.c
${WRAPPER_DIR}/int8/conv1x1_init_int8_wrapper.c
${WRAPPER_DIR}/int8/conv1x1_run_int8_wrapper.c
${WRAPPER_DIR}/int8/convolution_depthwise_int8_wrapper.c
${WRAPPER_DIR}/int8/resize_int8_wrapper.c
${WRAPPER_DIR}/int8/slice_int8_wrapper.c
${WRAPPER_DIR}/int8/batchnorm_int8_wrapper.c
)

list(APPEND FILE_SET ${WRAPPER_SRC} ${RUNTIME_SRC})

+ 0
- 12
mindspore/lite/micro/cmake/wrapper.cmake View File

@@ -1,12 +0,0 @@
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")

set(MICRO_WRAPPER_SRC
${LITE_DIR}/src/runtime/thread_pool.c
${MICRO_DIR}/wrapper/fp32/matmul_fp32_wrapper.c
${MICRO_DIR}/wrapper/int8/matmul_int8_wrapper.c
${MICRO_DIR}/wrapper/int8/conv_init_int8_wrapper.c
${MICRO_DIR}/wrapper/int8/conv1x1_init_int8_wrapper.c
${MICRO_DIR}/wrapper/int8/conv1x1_run_int8_wrapper.c
)

list(APPEND FILE_SET ${MICRO_WRAPPER_SRC})

+ 14
- 9
mindspore/lite/micro/coder/CMakeLists.txt View File

@@ -1,7 +1,10 @@
add_definitions(-DUSE_GLOG)

set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections ")
set(MICRO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)
set(3RD_DIR ${TOP_DIR}/third_party)
set(LITE_DIR ${TOP_DIR}/mindspore/lite)
set(MICRO_DIR ${LITE_DIR}/micro)

if(ENABLE_CONVERTER)
set(CODEGEN_PATH ${CMAKE_BINARY_DIR}/micro/coder/codegen)
else()
@@ -13,17 +16,19 @@ include_directories(${3RD_DIR})
include_directories(${3RD_DIR}/flatbuffers/include)
#include ms
include_directories(${TOP_DIR}/)
include_directories(${LITE_DIR})
include_directories(${TOP_DIR}/mindspore/core/)

include_directories(${LITE_DIR})
include_directories(${MICRO_DIR})
#include coder
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
include(${TOP_DIR}/cmake/external_libs/cmsis.cmake)
include(${MICRO_DIR}/cmake/file_list.cmake)
include(${MICRO_DIR}/cmake/wrapper.cmake)
include(${MICRO_DIR}/cmake/package_wrapper.cmake)
add_subdirectory(operator_library)

add_executable(codegen main.cc ${FILE_SET})
add_dependencies(codegen fbs_src)
add_dependencies(codegen fbs_inner_src)
target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY})
if(NOT WIN32)
add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH})
target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog)
if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release")
add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH})
endif()

+ 3
- 5
mindspore/lite/micro/coder/allocator/allocator.cc View File

@@ -22,11 +22,9 @@

namespace mindspore::lite::micro {
void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocType type) {
static const std::map<TypeId, size_t> size_map = {{kNumberTypeFloat32, sizeof(float)},
{kNumberTypeInt32, sizeof(int)},
{kNumberTypeInt32, sizeof(int32_t)},
{kNumberTypeInt16, sizeof(int16_t)},
{kNumberTypeInt8, sizeof(int8_t)}};
static const std::map<TypeId, size_t> size_map = {
{kNumberTypeFloat, sizeof(float)}, {kNumberTypeFloat32, sizeof(float)}, {kNumberTypeInt32, sizeof(int32_t)},
{kNumberTypeInt16, sizeof(int16_t)}, {kNumberTypeInt8, sizeof(int8_t)}, {kNumberTypeUInt8, sizeof(uint8_t)}};
auto item = size_map.find(type_id);
MS_CHECK_TRUE_RET_NULL(item != size_map.end(), "unsupported type idnex");
size_t type_size = item->second;


+ 3
- 3
mindspore/lite/micro/coder/allocator/allocator.h View File

@@ -73,7 +73,7 @@ class MemoryAllocator {
if (type != kWorkspace) {
return MallocWeightTensor(type_id, size, type);
}
if (size == 0 && size >= UINT_MAX) {
if (size == 0 || size >= UINT_MAX) {
return nullptr;
}

@@ -94,12 +94,12 @@ class MemoryAllocator {
template <typename T>
std::string GetRuntimeAddr(T t, bool is_const = false) {
if (!t) {
return "NULL";
return "";
}
std::string type_info = is_const ? "const " : "";
std::string type_name;
if (std::type_index(typeid(T)) == std::type_index(typeid(Tensor *))) {
type_name = GetTensorDataType(reinterpret_cast<Tensor *>(t)->data_type()) + " *";
type_name = GetTensorDataType(reinterpret_cast<Tensor *>(t)->data_type()) + "*";
} else {
type_name = GetVariableTypeName<T>();
}


+ 12
- 7
mindspore/lite/micro/coder/coder.cc View File

@@ -34,19 +34,20 @@ namespace mindspore::lite::micro {
class CoderFlags : public virtual FlagParser {
public:
CoderFlags() {
AddFlag(&CoderFlags::is_weight_file_, "isWeightFile", "whether generating weight .net file, true| false", false);
AddFlag(&CoderFlags::is_weight_file_, "isWeightFile", "whether generating weight binary file, true| false", false);
AddFlag(&CoderFlags::model_path_, "modelPath", "Input model path", "");
AddFlag(&CoderFlags::code_path_, "codePath", "Input code path", ".");
AddFlag(&CoderFlags::code_module_name_, "moduleName", "Input code module name", "");
AddFlag(&CoderFlags::target_, "target", "generateed code target, x86| ARM32M| ARM32A| ARM64", "x86");
AddFlag(&CoderFlags::code_mode_, "codeMode", "generated code mode, Normal | Inference | Train", "Normal");
AddFlag(&CoderFlags::debug_mode_, "debugMode", "dump perlayer's time cost and tensor, true | false", false);
AddFlag(&CoderFlags::target_, "target", "generated code target, x86| ARM32M| ARM32A| ARM64", "x86");
AddFlag(&CoderFlags::code_mode_, "codeMode", "generated code mode, Inference | Train", "Inference");
AddFlag(&CoderFlags::support_parallel_, "supportParallel", "whether support parallel launch, true | false", false);
AddFlag(&CoderFlags::debug_mode_, "debugMode", "dump the tensors data for debugging, true | false", false);
}

~CoderFlags() override = default;

public:
std::string model_path_;
bool support_parallel_{false};
bool is_weight_file_{false};
std::string code_module_name_;
std::string code_path_;
@@ -87,8 +88,7 @@ int Coder::Run(const std::string &model_path) {
int Coder::Init(const CoderFlags &flags) const {
static const std::map<std::string, Target> kTargetMap = {
{"x86", kX86}, {"ARM32M", kARM32M}, {"ARM32A", kARM32A}, {"ARM64", kARM64}, {"All", kAllTargets}};
static const std::map<std::string, CodeMode> kCodeModeMap = {
{"Normal", Code_Normal}, {"Inference", Code_Inference}, {"Train", Code_Train}};
static const std::map<std::string, CodeMode> kCodeModeMap = {{"Inference", Inference}, {"Train", Train}};

Configurator *config = Configurator::GetInstance();

@@ -112,6 +112,11 @@ int Coder::Init(const CoderFlags &flags) const {
return true;
});

parsers.emplace_back([&flags, config]() -> bool {
config->set_support_parallel(flags.support_parallel_);
return true;
});

parsers.emplace_back([&flags, config]() -> bool {
config->set_debug_mode(flags.debug_mode_);
return true;


+ 6
- 6
mindspore/lite/micro/coder/coder_config.h View File

@@ -21,7 +21,7 @@

namespace mindspore::lite::micro {
enum Target { kX86 = 0, kARM32M = 1, kARM32A = 2, kARM64 = 3, kAllTargets = 4, kTargetUnknown = 99 };
enum CodeMode { Code_Normal = 0, Code_Inference = 1, Code_Train = 2, Code_Unknown = 99 };
enum CodeMode { Inference = 0, Train = 1, Code_Unknown = 99 };

class Configurator {
public:
@@ -36,9 +36,6 @@ class Configurator {
void set_code_path(const std::string &code_path) { code_path_ = code_path; }
std::string code_path() const { return code_path_; }

void set_subgraph_(const std::string &subgraph) { sub_graph_ = subgraph; }
std::string sub_graph() { return sub_graph_; }

void set_target(Target target) { target_ = target; }
Target target() const { return target_; }

@@ -51,16 +48,19 @@ class Configurator {
void set_is_weight_file(bool flag) { is_weight_file_ = flag; }
bool is_weight_file() const { return is_weight_file_; }

void set_support_parallel(bool parallel) { support_parallel_ = parallel; }
bool support_parallel() const { return support_parallel_; }

private:
Configurator() = default;
~Configurator() = default;

bool is_weight_file_{false};
std::string module_name_;
std::string code_path_;
std::string sub_graph_;
Target target_{kTargetUnknown};
CodeMode code_mode_{Code_Unknown};
bool is_weight_file_{false};
bool support_parallel_{false};
bool debug_mode_{false};
};
} // namespace mindspore::lite::micro


+ 3
- 3
mindspore/lite/micro/coder/context.cc View File

@@ -14,9 +14,9 @@
* limitations under the License.
*/

#include "micro/coder/context.h"
#include "micro/coder/coder_config.h"
#include "micro/coder/allocator/allocator.h"
#include "coder/context.h"
#include "coder/coder_config.h"
#include "coder/allocator/allocator.h"

namespace mindspore::lite::micro {
CoderContext::CoderContext() {


+ 1
- 15
mindspore/lite/micro/coder/generator/component/benchmark_component.cc View File

@@ -108,7 +108,7 @@ void CodeBenchmarkSetBuffer(std::ofstream &ofs, const std::string &module_name)
<< "_SetBuffer(buffer);\n"
" if (ret != RET_OK) {\n"
" MICRO_ERROR(\"set inputs failed\");\n"
" return RET_ERROR;"
" return RET_ERROR;\n"
" }\n";
}

@@ -128,19 +128,6 @@ void CodeBenchmarkInitWeight(std::ofstream &ofs, const std::string &module_name)
" weight_buffer = NULL;\n";
}

void CodeBenchmarkConfigThread(std::ofstream &ofs) {
ofs << " int thread_num = 4;\n"
" BindMode bind_mode = NO_BIND_MODE;\n"
" if (argc >= 6) {\n"
" thread_num = atoi(argv[4]);\n"
" bind_mode = atoi(argv[5]);\n"
" }\n"
" ret = ConfigThreadPool(THREAD_POOL_DEFAULT, thread_num, bind_mode);\n"
" if (ret != 0) {\n"
" MICRO_ERROR(\"create thread pool failed\");\n"
" }\n";
}

void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name) {
ofs << " if (argc >= 4) {\n"
<< " " << module_name << "_WarmUp();\n"
@@ -170,7 +157,6 @@ void CodeBenchmarkPrintOutputs(std::ofstream &ofs, const std::string &module_nam
" PrintTensorData(tensor);\n"
" }\n";
ofs << " printf(\"" << module_name << " inference success.\\n\");\n";
ofs << " free(buffer);\n";
}

/**


+ 0
- 2
mindspore/lite/micro/coder/generator/component/benchmark_component.h View File

@@ -39,8 +39,6 @@ void CodeBenchmarkSetBuffer(std::ofstream &ofs, const std::string &module_name);

void CodeBenchmarkInitWeight(std::ofstream &ofs, const std::string &module_name);

void CodeBenchmarkConfigThread(std::ofstream &ofs);

void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name);

void CodeBenchmarkPrintOutputs(std::ofstream &ofs, const std::string &module_name);


+ 4
- 5
mindspore/lite/micro/coder/generator/component/cmake_component.cc View File

@@ -24,10 +24,9 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con
Target target) {
ofs << "include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include/)\n";
if (target == kARM32M) {
ofs << "include_directories(${OP_HEADER_PATH}/cmsis)\n"
<< "include_directories(${OP_HEADER_PATH}/cmsis/CMSIS/NN/Include)\n"
<< "include_directories(${OP_HEADER_PATH}/cmsis/CMSIS/DSP/Include)\n"
<< "include_directories(${OP_HEADER_PATH}/cmsis/CMSIS/Core/Include)\n";
ofs << "include_directories(${OP_HEADER_PATH}/CMSIS/NN/Include)\n"
<< "include_directories(${OP_HEADER_PATH}/CMSIS/DSP/Include)\n"
<< "include_directories(${OP_HEADER_PATH}/CMSIS/Core/Include)\n";
}
ofs << "set(OP_SRC\n";
for (const std::string &c_file : ctx->c_files()) {
@@ -38,7 +37,7 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con
<< ")\n";

std::set<std::string> kernel_cmake_asm_set_files = ctx->asm_files();
if (!kernel_cmake_asm_set_files.empty()) {
if (!kernel_cmake_asm_set_files.empty() && (target == kARM32A || target == kARM64)) {
ofs << "set(ASSEMBLY_SRC\n";
for (const std::string &asm_file : kernel_cmake_asm_set_files) {
ofs << " " << asm_file << ".o\n";


+ 4
- 6
mindspore/lite/micro/coder/generator/component/common_component.cc View File

@@ -26,7 +26,7 @@ namespace mindspore::lite::micro {
void CodeSourceFileInclude(std::ofstream &ofs, const std::string &weight_file, const std::string &header) {
ofs << g_hwLicense << "#include \"microtensor.h\"\n"
<< "#include \"" << weight_file << "\"\n"
<< "#include \"" << header << "\"\n";
<< "#include \"" << header << "\"\n\n";
}

void CodeInputAndOutputState(std::ofstream &ofs, const std::string &module_name) {
@@ -53,13 +53,13 @@ void PrintMicroTensors(std::ofstream &ofs, std::vector<Tensor *> tensors, const
MS_LOG(ERROR) << "nonexistent tensor";
break;
}
ofs << " static int dim[] = {";
ofs << " static int dim" << i << "[] = {";
for (size_t j = 0; j < tensor->shape().size(); ++j) {
ofs << tensor->shape()[j] << ", ";
}
ofs << "};\n"
<< " " << name << "[" << i << "].ndim = " << tensor->shape().size() << ";\n"
<< " " << name << "[" << i << "].dim = dim;\n"
<< " " << name << "[" << i << "].dim = dim" << i << ";\n"
<< " " << name << "[" << i << "].type = " << EnumMicroTensorDataType(tensor->data_type()) << ";\n"
<< " " << name << "[" << i << "].format = " << std::to_string(tensor->format()) << ";\n"
<< " " << name << "[" << i << "].data =" << item->second << ";\n";
@@ -69,7 +69,6 @@ void PrintMicroTensors(std::ofstream &ofs, std::vector<Tensor *> tensors, const
void CodeInputAndOutputImplement(std::ofstream &ofs, const std::string &module_name,
const std::unique_ptr<CoderContext> &ctx) {
// input tensors
ofs << "\n// input tensors\n";
std::vector<Tensor *> inputs = ctx->graph_inputs();
for (size_t i = 0; i < inputs.size(); ++i) {
ofs << "static const unsigned char *" << ctx->input_name() + std::to_string(i) << " = 0;\n";
@@ -88,7 +87,6 @@ void CodeInputAndOutputImplement(std::ofstream &ofs, const std::string &module_n
ofs << " return RET_OK;\n}\n";

// output tensors
ofs << "\n// output tensors\n";
std::vector<Tensor *> outputs = ctx->graph_outputs();
size_t output_num = outputs.size();
std::string output_name = ctx->output_name();
@@ -158,7 +156,7 @@ void CodeManageResourceState(std::ofstream &ofs, const std::string &module_name)

void CodeInitResourceImplement(std::ofstream &ofs, const std::string &module_name,
const std::unique_ptr<CoderContext> &ctx) {
ofs << "int " << module_name << "deconv_GetBufferSize() {\n"
ofs << "int " << module_name << "_GetBufferSize() {\n"
<< " return " << ctx->total_buffer_size() << ";\n"
<< "}\n";
ofs << "int " << module_name << "_SetBuffer( void *buffer) {\n";


+ 5
- 5
mindspore/lite/micro/coder/generator/component/const_blocks/cmake_lists.h View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_
#ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_
#define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_

static const char bench_cmake_lists_txt[] =
const char *bench_cmake_lists_txt =
"cmake_minimum_required(VERSION 3.14)\n"
"project(${PROJ_NAME})\n"
"\n"
@@ -55,9 +55,9 @@ static const char bench_cmake_lists_txt[] =
"link_directories(${MODEL_LIB_PATH})\n"
"include(benchmark.cmake)\n"
"add_executable(${PROJ_NAME}_bench ${SRC_FILES})\n"
"target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm)\n";
"target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm -pthread)\n";

static const char src_cmake_lists_txt[] =
const char *src_cmake_lists_txt =
"cmake_minimum_required(VERSION 3.14)\n"
"project(${PROJ_NAME})\n"
"\n"
@@ -112,4 +112,4 @@ static const char src_cmake_lists_txt[] =
"string(CONCAT library_name \"lib\" ${PROJ_NAME} \".a\")\n"
"create_library()\n";

#endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_
#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_

+ 6
- 6
mindspore/lite/micro/coder/generator/component/const_blocks/debug_utils.h View File

@@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BEN_DEBUG_UTILS_H_
#define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BEN_DEBUG_UTILS_H_
#ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_DEBUG_UTILS_H_
#define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_DEBUG_UTILS_H_

static const char debug_utils_h[] =
const char *debug_utils_h =
"/**\n"
" * Copyright 2021 Huawei Technologies Co., Ltd\n"
" *\n"
@@ -50,7 +50,7 @@ static const char debug_utils_h[] =
"\n"
"#endif // MINDSPORE_LITE_MICRO_MICRODEBUGUTIL_H_\n";

static const char debug_utils_c[] =
const char *debug_utils_c =
"/**\n"
" * Copyright 2021 Huawei Technologies Co., Ltd\n"
" *\n"
@@ -239,7 +239,7 @@ static const char debug_utils_c[] =
"}\n"
"\n"
"void PrintTensor(MicroTensor *tensor, FILE *output_file, const char *is_input) {\n"
" if (output_file != NULL) {\n"
" if (output_file == NULL) {\n"
" MICRO_ERROR(\"output file is NULL\");\n"
" return;\n"
" }\n"
@@ -269,4 +269,4 @@ static const char debug_utils_c[] =
" return retval;\n"
"}\n";

#endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BEN_DEBUG_UTILS_H_
#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_DEBUG_UTILS_H_

+ 4
- 4
mindspore/lite/micro/coder/generator/component/const_blocks/license.h View File

@@ -14,12 +14,12 @@
* limitations under the License.
*/

#ifndef MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H
#define MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H
#ifndef MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H_
#define MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H_

namespace mindspore::lite::micro {

const char g_hwLicense[] =
static const char *g_hwLicense =
"/**\n"
" * Copyright 2021 Huawei Technologies Co., Ltd\n"
" *\n"
@@ -37,4 +37,4 @@ const char g_hwLicense[] =
" */\n\n";
} // namespace mindspore::lite::micro

#endif // MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H
#endif // MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H_

+ 6
- 6
mindspore/lite/micro/coder/generator/component/const_blocks/load_input.h View File

@@ -14,9 +14,9 @@
* limitations under the License.
*/

#ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_
#define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_
static const char load_input_h[] =
#ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_
#define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_
const char *load_input_h =
"/**\n"
" * Copyright 2021 Huawei Technologies Co., Ltd\n"
" *\n"
@@ -43,7 +43,7 @@ static const char load_input_h[] =
"\n"
"#endif // MICRO_EXAMPLE_LOAD_INPUT_LOAD_INPUT_H_\n";

static const char load_input_c[] =
const char *load_input_c =
"/**\n"
" * Copyright 2021 Huawei Technologies Co., Ltd\n"
" *\n"
@@ -131,11 +131,11 @@ static const char load_input_c[] =
" int size = 0;\n"
" buffers[i] = ReadInputData(inputs_path[i], &size);\n"
" if (size != inputs_size[i] || buffers[i] == NULL) {\n"
" printf(\"size mismatch, %s, %d, %d\\n\", inputs_path[i], size, inputs_size[i]);\n"
" printf(\"size mismatch, %s, input: %d, needed: %d\\n\", inputs_path[i], size, inputs_size[i]);\n"
" return -1;\n"
" }\n"
" }\n"
" return 0;\n"
"}\n";

#endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_
#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_

+ 6
- 18
mindspore/lite/micro/coder/generator/component/const_blocks/micro_tensor.h View File

@@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_
#define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_
#ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_
#define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_

static const char micro_tensor_h[] =
const char *micro_tensor_h =
"/**\n"
" * Copyright 2021 Huawei Technologies Co., Ltd\n"
" *\n"
@@ -42,20 +42,8 @@ static const char micro_tensor_h[] =
"#include <stdbool.h>\n"
"#include <stdint.h>\n"
"\n"
"inline bool IsPrint() {\n"
" char *env = getenv(\"GLOG_v\");\n"
" if (env == NULL) {\n"
" return false;\n"
" }\n"
" return strcmp(env, \"1\") == 0;\n"
"}\n"
"\n"
"#define MICRO_INFO(content, args...) \\\n"
" { \\\n"
" if (IsPrint()) { \\\n"
" printf(\"[INFO] %s|%d: \" #content \"\\r\\n\", __func__, __LINE__, ##args); \\\n"
" } \\\n"
" }\n"
"#define MICRO_INFO(content, args...) \\\n"
" { printf(\"[INFO] %s|%d: \" #content \"\\r\\n\", __func__, __LINE__, ##args); }\n"
"#define MICRO_ERROR(content, args...) \\\n"
" { printf(\"[ERROR] %s|%d: \" #content \"\\r\\n\", __func__, __LINE__, ##args); }\n"
"\n"
@@ -115,4 +103,4 @@ static const char micro_tensor_h[] =
"} GraphQuantArgs;\n"
"\n"
"#endif // MSMICRO_TENSOR_H\n";
#endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_
#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_

+ 99
- 0
mindspore/lite/micro/coder/generator/component/const_blocks/thread_pool.h View File

@@ -0,0 +1,99 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_THREAD_POOL_H_
#define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_THREAD_POOL_H_

namespace mindspore::lite::micro {

const char *thread_pool_h =
"/**\n"
" * Copyright 2021 Huawei Technologies Co., Ltd\n"
" *\n"
" * Licensed under the Apache License, Version 2.0 (the \"License\");\n"
" * you may not use this file except in compliance with the License.\n"
" * You may obtain a copy of the License at\n"
" *\n"
" * http://www.apache.org/licenses/LICENSE-2.0\n"
" *\n"
" * Unless required by applicable law or agreed to in writing, software\n"
" * distributed under the License is distributed on an \"AS IS\" BASIS,\n"
" * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n"
" * See the License for the specific language governing permissions and\n"
" * limitations under the License.\n"
" */\n"
"\n"
"#ifndef MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_\n"
"#define MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_\n"
"\n"
"#include <stdbool.h>\n"
"\n"
"#define MAX_TASK_NUM (2)\n"
"\n"
"/// \\brief BindMode defined for holding bind cpu strategy argument.\n"
"typedef enum {\n"
" NO_BIND_MODE = 0, /**< no bind */\n"
" HIGHER_MODE = 1, /**< bind higher cpu first */\n"
" MID_MODE = 2 /**< bind middle cpu first */\n"
"} BindMode;\n"
"\n"
"struct ThreadPool;\n"
"\n"
"struct ThreadPool *CreateThreadPool(int thread_num, int mode);\n"
"\n"
"/**\n"
" *\n"
" * @param session_index, support multi session\n"
" * @param job\n"
" * @param content\n"
" * @param task_num\n"
" */\n"
"int ParallelLaunch(struct ThreadPool *thread_pool, int (*job)(void *, int), void *content, int task_num);\n"
"\n"
"/**\n"
" * bind each thread to specified cpu core\n"
" * @param is_bind\n"
" * @param mode\n"
" */\n"
"int BindThreads(struct ThreadPool *thread_pool, bool is_bind, int mode);\n"
"\n"
"/**\n"
" * activate the thread pool\n"
" * @param thread_pool_id\n"
" */\n"
"void ActivateThreadPool(struct ThreadPool *thread_pool);\n"
"\n"
"/**\n"
" * deactivate the thread pool\n"
" * @param thread_pool_id\n"
" */\n"
"void DeactivateThreadPool(struct ThreadPool *thread_pool);\n"
"\n"
"/**\n"
" *\n"
" * @return current thread num\n"
" */\n"
"int GetCurrentThreadNum(struct ThreadPool *thread_pool);\n"
"\n"
"/**\n"
" * destroy thread pool, and release resource\n"
" */\n"
"void DestroyThreadPool(struct ThreadPool *thread_pool);\n"
"\n"
"#endif // MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_\n";
} // namespace mindspore::lite::micro

#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_THREAD_POOL_H_

+ 61
- 0
mindspore/lite/micro/coder/generator/component/parallel_component.cc View File

@@ -0,0 +1,61 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/generator/component/parallel_component.h"
#include <string>

namespace mindspore::lite::micro {

void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name) {
ofs << " int thread_num = 4;\n"
" BindMode bind_mode = NO_BIND_MODE;\n"
" if (argc >= 6) {\n"
" thread_num = atoi(argv[4]);\n"
" bind_mode = atoi(argv[5]);\n"
" }\n"
" struct ThreadPool *thread_pool = CreateThreadPool(thread_num, bind_mode);\n"
" if (thread_pool == NULL) {\n"
" MICRO_ERROR(\"create thread pool failed\");\n"
" return RET_ERROR;\n"
" }\n"
<< " ret = " << module_name << "_SetThreadPool(thread_pool);\n"
<< " if (ret != RET_OK) {\n"
" MICRO_ERROR(\"set global thread pool failed\");\n"
" return RET_ERROR;\n"
" }\n"
" MICRO_INFO(\"config: ThreadNum: %d, BindMode: %d\", thread_num, bind_mode);\n";
}

void CodeDestroyThreadPool(std::ofstream &ofs) { ofs << " DestroyThreadPool(thread_pool);\n"; }

void CodeSetGlobalThreadPoolState(std::ofstream &ofs, const std::string &module_name) {
ofs << "/*\n"
" * set global thread pool, which is created by user\n"
" */\n"
<< "int " << module_name << "_SetThreadPool(struct ThreadPool *thread_pool);\n\n";
}

void CodeSetGlobalThreadPoolImplement(std::ofstream &ofs, const std::string &module_name) {
ofs << "struct ThreadPool *g_thread_pool = NULL;\n"
<< "int " << module_name << "_SetThreadPool(struct ThreadPool *thread_pool) {\n"
<< " if (thread_pool == NULL) {\n"
" return RET_ERROR;\n"
" }\n"
" g_thread_pool = thread_pool;\n"
" return RET_OK;\n"
"}\n";
}
} // namespace mindspore::lite::micro

+ 35
- 0
mindspore/lite/micro/coder/generator/component/parallel_component.h View File

@@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_PARALLEL_COMPONENT_H_
#define MINDSPORE_LITE_MICRO_CODER_GENERATOR_PARALLEL_COMPONENT_H_

#include <string>
#include <fstream>

namespace mindspore::lite::micro {

void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name);

void CodeDestroyThreadPool(std::ofstream &ofs);

void CodeSetGlobalThreadPoolState(std::ofstream &ofs, const std::string &module_name);

void CodeSetGlobalThreadPoolImplement(std::ofstream &ofs, const std::string &module_name);

} // namespace mindspore::lite::micro

#endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_PARALLEL_COMPONENT_H_

+ 8
- 8
mindspore/lite/micro/coder/generator/component/weight_component.cc View File

@@ -74,10 +74,10 @@ void CodeModelParamsForNet(std::ofstream &hofs, std::ofstream &cofs, const std::
continue;
}
if (tensor->category() == Tensor::Category::CONST_TENSOR) {
hofs << "extern " << GetTensorDataType(tensor->data_type()) << name << " = [];\n";
cofs << GetTensorDataType(tensor->data_type()) << name << " = [" << tensor->ElementsNum() << "];\n";
hofs << "extern " << GetTensorDataType(tensor->data_type()) << name << "[];\n";
cofs << GetTensorDataType(tensor->data_type()) << name << "[" << tensor->ElementsNum() << "];\n";
} else if (tensor->category() == Tensor::Category::VAR) {
hofs << "extern " << GetTensorDataType(tensor->data_type()) << " *" << name << ";\n";
hofs << "extern " << GetTensorDataType(tensor->data_type()) << "*" << name << ";\n";
cofs << GetTensorDataType(tensor->data_type()) << "*" << name << " = NULL;\n";
}
}
@@ -87,7 +87,6 @@ void CodeModelParamsForNet(std::ofstream &hofs, std::ofstream &cofs, const std::
void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, const std::unique_ptr<CoderContext> &ctx) {
ofs << "int " << module_name << "_Init(void *weight_buffer, int weight_size) {\n"
<< " if (weight_buffer == NULL) {\n"
" MICRO_ERROR(\"weight buffer is NULL\");\n"
<< " return RET_ERROR;\n"
<< " }\n";

@@ -106,8 +105,9 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons
if (tensor->category() != Tensor::Category::CONST_TENSOR) {
continue;
}
auto iter = ctx->tensors_map().find(tensor);
if (iter != ctx->tensors_map().end()) {
std::map<Tensor *, std::string> ctx_tensor_map = ctx->tensors_map();
auto iter = ctx_tensor_map.find(tensor);
if (iter != ctx_tensor_map.end()) {
origins += " {" + name + ", " + std::to_string(tensor->Size()) + ", " + std::to_string(offset) + "},\n";
params_num++;
} else {
@@ -115,14 +115,14 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons
params +=
" " + GetTensorDataType(data_type) + "*" + name + " = (weight_buffer + " + std::to_string(offset) + ");\n";
}
offset += tensor->Size();
}
ofs << " struct ModelParameter model_params[] = {\n" << origins << " };\n";
ofs << params << "\n";
ofs << " struct ModelParameter model_params[] = {\n" << origins << " };\n";

ofs << "\n";
ofs << " for(int i = 0; i < " << params_num << "; ++i) {\n"
<< " if (model_params[i].offset + model_params[i].size > weight_size) {\n"
" MICRO_ERROR(\"buffer is invalid, size: %d, offset: %lu\", weight_size, model_params[i].offset);\n"
" return RET_ERROR;\n"
" }\n"
<< " memcpy(model_params[i].addr, (weight_buffer + model_params[i].offset), model_params[i].size);\n"


+ 12
- 8
mindspore/lite/micro/coder/generator/generator.cc View File

@@ -24,8 +24,9 @@
#include "coder/generator/component/const_blocks/cmake_lists.h"
#include "coder/generator/component/const_blocks/debug_utils.h"
#include "coder/generator/component/const_blocks/load_input.h"
#include "coder/generator/component/const_blocks/thread_pool.h"
#include "coder/generator/component/const_blocks/license.h"
#include "micro/coder/log.h"
#include "coder/log.h"

namespace mindspore::lite::micro {
int WriteContentToFile(const std::string &file, const std::string &content) {
@@ -61,11 +62,13 @@ Generator::~Generator() { (void)umask(origin_umask_); }
void Generator::CodeNetRunFunc(std::ofstream &ofs) {
// generate net inference code
ofs << "void " << config_->module_name() << "_Inference() {\n";
if (config_->code_mode() == CodeMode::Code_Inference) {
ofs << "int thread_num = GetCurrentThreadNum(THREAD_POOL_DEFAULT);\n";
if (config_->support_parallel()) {
ofs << " const int g_thread_num = GetCurrentThreadNum(g_thread_pool);\n";
} else {
ofs << " const int g_thread_num = 1;\n";
}
for (const auto &block : ctx_->code_blocks()) {
ofs << "\t{\n" << block << "\t}\n";
ofs << " {\n" << block << " }\n";
}
ofs << "}\n";
}
@@ -98,7 +101,7 @@ int Generator::CodeSourceCMakeFile() {
}

int Generator::CodeStaticContent() {
const std::vector<std::pair<std::string, std::string>> static_blocks = {
std::vector<std::pair<std::string, std::string>> static_blocks = {
{net_inc_file_path_ + "microtensor.h", micro_tensor_h},
{net_src_file_path_ + "CMakeLists.txt", src_cmake_lists_txt},
{net_main_file_path_ + "debug_utils.h", debug_utils_h},
@@ -106,12 +109,13 @@ int Generator::CodeStaticContent() {
{net_main_file_path_ + "load_input.h", load_input_h},
{net_main_file_path_ + "load_input.c", load_input_c},
{net_main_file_path_ + "CMakeLists.txt", bench_cmake_lists_txt}};
if (config_->support_parallel()) {
static_blocks.emplace_back(net_inc_file_path_ + "thread_pool.h", thread_pool_h);
}
for (const auto &static_block : static_blocks) {
std::string file_name = static_block.first;
std::string content = static_block.second;
if (WriteContentToFile(file_name, content) != RET_OK) {
return RET_ERROR;
}
MS_CHECK_RET_CODE(WriteContentToFile(file_name, content), "write file failed");
}
return RET_OK;
}


+ 14
- 5
mindspore/lite/micro/coder/generator/inference/inference_generator.cc View File

@@ -18,6 +18,7 @@
#include <vector>
#include <string>
#include "coder/generator/component/common_component.h"
#include "coder/generator/component/parallel_component.h"
#include "coder/generator/component/benchmark_component.h"
#include "coder/generator/component/const_blocks/license.h"

@@ -28,14 +29,17 @@ int InferenceGenerator::CodeNetHFile() {
MS_CHECK_TRUE(!ofs.bad(), "filed to open file");
MS_LOG(INFO) << "write " << net_include_file;
ofs << g_hwLicense;
if (config_->code_mode() == CodeMode::Code_Inference) {
ofs << "#include \"src/runtime/thread_pool.h\"\n";
if (config_->support_parallel()) {
ofs << "#include \"thread_pool.h\"\n";
}
ofs << "#include \"microtensor.h\"\n\n";
CodeInputAndOutputState(ofs, config_->module_name());
if (is_get_quant_args_) {
CodeGraphQuantArgsState(ofs, config_->module_name());
}
if (config_->support_parallel()) {
CodeSetGlobalThreadPoolState(ofs, config_->module_name());
}
if (config_->is_weight_file()) {
CodeInitWeightState(ofs, config_->module_name());
}
@@ -50,6 +54,9 @@ int InferenceGenerator::CodeNetCFile() {
MS_CHECK_TRUE(!ofs.bad(), "filed to open file");
MS_LOG(INFO) << "write " << net_impl_file;
CodeSourceFileInclude(ofs, net_weight_hfile_, net_inc_hfile_);
if (config_->support_parallel()) {
CodeSetGlobalThreadPoolImplement(ofs, config_->module_name());
}
CodeInputAndOutputImplement(ofs, config_->module_name(), ctx_);
CodeInitResourceImplement(ofs, config_->module_name(), ctx_);
CodeFreeResourceImplement(ofs, config_->module_name(), ctx_);
@@ -78,12 +85,14 @@ int InferenceGenerator::CodeBenchmarkFile() {
if (config_->is_weight_file()) {
CodeBenchmarkInitWeight(ofs, config_->module_name());
}
if (config_->code_mode() == CodeMode::Code_Inference) {
CodeBenchmarkConfigThread(ofs);
if (config_->support_parallel()) {
CodeCreateThreadPool(ofs, config_->module_name());
}
CodeBenchmarkInference(ofs, config_->module_name());
CodeBenchmarkPrintOutputs(ofs, config_->module_name());

if (config_->support_parallel()) {
CodeDestroyThreadPool(ofs);
}
CodeBenchmarkFreeResourse(ofs, config_->module_name(), inputs_num);
ofs.close();
return RET_OK;


+ 1
- 1
mindspore/lite/micro/coder/generator/inference/inference_generator.h View File

@@ -19,7 +19,7 @@

#include <utility>
#include <memory>
#include "micro/coder/generator/generator.h"
#include "coder/generator/generator.h"

namespace mindspore::lite::micro {
class InferenceGenerator : public Generator {


+ 1
- 1
mindspore/lite/micro/coder/generator/train/train_generator.cc View File

@@ -39,7 +39,7 @@ int TrainGenerator::CodeNetHFile() {
MS_CHECK_TRUE(!ofs.bad(), "filed to open file");
MS_LOG(INFO) << "write " << net_include_file;
ofs << g_hwLicense;
if (config_->code_mode() == CodeMode::Code_Inference) {
if (config_->code_mode() == CodeMode::Inference) {
ofs << "#include \"src/runtime/thread_pool.h\"\n";
}
ofs << "#include \"microtensor.h\"\n\n";


+ 1
- 1
mindspore/lite/micro/coder/generator/train/train_generator.h View File

@@ -19,7 +19,7 @@

#include <utility>
#include <memory>
#include "micro/coder/generator/generator.h"
#include "coder/generator/generator.h"

namespace mindspore::lite::micro {
class TrainGenerator : public Generator {


+ 8
- 2
mindspore/lite/micro/coder/graph.cc View File

@@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "micro/coder/graph.h"
#include "coder/graph.h"
#include <queue>
#include <deque>
#include <string>
@@ -23,7 +23,6 @@
#include <set>
#include "coder/log.h"
#include "schema/inner/model_generated.h"
#include "src/ops/primitive_c.h"
#include "securec/include/securec.h"

namespace mindspore::lite::micro {
@@ -92,8 +91,15 @@ int CoderGraph::ConvertTensors() {
if (quant_params != nullptr) {
for (int j = 0; j < static_cast<int>(quant_params->size()); j++) {
QuantArg quant_arg{};
quant_arg.bitNum = quant_params->Get(j)->numBits();
quant_arg.scale = quant_params->Get(j)->scale();
quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint();
quant_arg.var_corr = quant_params->Get(j)->varCorr();
quant_arg.mean_corr = quant_params->Get(j)->meanCorr();
quant_arg.inited = quant_params->Get(j)->inited();
quant_arg.roundType = quant_params->Get(j)->roundType();
quant_arg.multiplier = quant_params->Get(j)->multiplier();
quant_arg.dstDtype = quant_params->Get(j)->dstDtype();
dstTensor->AddQuantParam(quant_arg);
}
}


+ 36
- 9
mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.cc View File

@@ -14,12 +14,12 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/base/conv2d_base_coder.h"
#include "coder/opcoders/base/conv2d_base_coder.h"
#include <string>
#include <vector>
#include "nnacl/fp32/winograd_utils.h"
#include "nnacl/int8/quantize.h"
#include "micro/coder/log.h"
#include "coder/log.h"

namespace {
int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) {
@@ -37,8 +37,8 @@ int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t
} // namespace

namespace mindspore::lite::micro {
string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) {
string ret;
std::string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) {
std::string ret;
if (src_format == schema::Format_NHWC && dst_format == schema::Format_NC4HW4) {
ret = "PackNHWCToNC4HW4Fp32";
} else if (src_format == schema::Format_NHWC && dst_format == schema::Format_NHWC4) {
@@ -56,8 +56,8 @@ string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::F
return ret;
}

string Conv2DBaseCoder::LayoutTransformInt8(schema::Format src_format, schema::Format dst_format) {
string ret;
std::string Conv2DBaseCoder::LayoutTransformInt8(schema::Format src_format, schema::Format dst_format) {
std::string ret;
if (src_format == schema::Format_NHWC && dst_format == schema::Format_NHWC4) {
ret = "PackNHWCToNHWC4Int8";
} else {
@@ -67,8 +67,8 @@ string Conv2DBaseCoder::LayoutTransformInt8(schema::Format src_format, schema::F
return ret;
}

string Conv2DBaseCoder::LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format) {
string ret;
std::string Conv2DBaseCoder::LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format) {
std::string ret;
switch (data_type) {
case kNumberTypeInt8:
ret = LayoutTransformInt8(src_format, dst_format);
@@ -197,7 +197,7 @@ int Conv2DBaseCoder::SetQuantMultiplier() {
return RET_OK;
}

int Conv2DBaseCoder::CheckResizeValid() {
int Conv2DBaseCoder::CheckResizeValid() const {
// ===============check in channel================= //
int32_t filter_in_channel = filter_tensor_->Channel();
int32_t resize_in_channel = input_tensor_->Channel();
@@ -206,12 +206,39 @@ int Conv2DBaseCoder::CheckResizeValid() {
return RET_OK;
}

void Conv2DBaseCoder::SetRoundingAndMultipilerMode() {
auto input_quant_arg = input_tensor_->quant_params().front();
int round_type = input_quant_arg.roundType;
switch (round_type) {
case 1:
conv_quant_arg_->round_mode_ = Rounding_Away_from_zero;
break;
case 2:
conv_quant_arg_->round_mode_ = Rounding_Up;
break;
default:
conv_quant_arg_->round_mode_ = Rounding_No;
}
int cal_multiplier_type = input_quant_arg.multiplier;
switch (cal_multiplier_type) {
case 0:
conv_quant_arg_->quant_multiplier_mode_ = Method_SinglePrecision;
break;
case 1:
conv_quant_arg_->quant_multiplier_mode_ = Method_DoublePrecision;
break;
default:
conv_quant_arg_->quant_multiplier_mode_ = Method_No;
}
}

int Conv2DBaseCoder::SetQuantParam() {
MS_CHECK_RET_CODE(MallocQuantParam(), "Malloc quant param failed.");
MS_CHECK_RET_CODE(SetInputTensorQuantParam(), "Set Input Tensor Quant Param Failed.");
MS_CHECK_RET_CODE(SetFilterTensorQuantParam(), "Set Filter Tensor Quant Param Failed.");
MS_CHECK_RET_CODE(SetOutputTensorQuantParam(), "Set Output Tensor Quant Param Failed.");
MS_CHECK_RET_CODE(SetIfPerChannel(), "Set if per tensor channel failed.");
SetRoundingAndMultipilerMode();
MS_CHECK_RET_CODE(SetQuantMultiplier(), "Set Quant Multiplier Failed.");
// now only consider per tensor for output
MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_min_);


+ 13
- 9
mindspore/lite/micro/coder/opcoders/base/conv2d_base_coder.h View File

@@ -21,13 +21,11 @@
#include <vector>
#include <utility>
#include <memory>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "src/runtime/kernel/arm/base/layout_transform.h"
#include "nnacl/conv_parameter.h"
namespace mindspore::lite::micro {

using std::string;

class Conv2DBaseCoder : public OperatorCoder {
public:
Conv2DBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
@@ -47,10 +45,14 @@ class Conv2DBaseCoder : public OperatorCoder {
free(conv_quant_arg_->input_quant_args_);
free(conv_quant_arg_->filter_quant_args_);
free(conv_quant_arg_->output_quant_args_);
conv_param_ = nullptr;
conv_quant_arg_ = nullptr;
filter_tensor_ = nullptr;
bias_tensor_ = nullptr;
}

protected:
int Init();
virtual int Init();

int SetQuantParam();

@@ -62,19 +64,21 @@ class Conv2DBaseCoder : public OperatorCoder {

int SetOutputTensorQuantParam();

void SetRoundingAndMultipilerMode();

int SetQuantMultiplier();

int CheckResizeValid();
int CheckResizeValid() const;

int SetIfPerChannel();

int CheckLayout(lite::Tensor *input_tensor);

string LayoutTransformFp32(schema::Format src_format, schema::Format dst_format);
std::string LayoutTransformFp32(schema::Format src_format, schema::Format dst_format);

string LayoutTransformInt8(schema::Format src_format, schema::Format dst_format);
std::string LayoutTransformInt8(schema::Format src_format, schema::Format dst_format);

string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format);
std::string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format);

ConvParameter *conv_param_{nullptr};

@@ -84,7 +88,7 @@ class Conv2DBaseCoder : public OperatorCoder {

Tensor *bias_tensor_{nullptr};

string convert_func_;
std::string convert_func_;
};
} // namespace mindspore::lite::micro
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_CONV2D_BASE_CODER_H_

+ 153
- 0
mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.cc View File

@@ -0,0 +1,153 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/base/detection_post_process_base_coder.h"

#include "nnacl/int8/quant_dtype_cast_int8.h"

#include "coder/opcoders/file_collector.h"
#include "coder/log.h"
#include "include/errorcode.h"

namespace mindspore::lite::micro {

int DetectionPostProcessBaseCoder::Prepare(CoderContext *const context) {
MS_CHECK_PTR(parameter_);
params_ = reinterpret_cast<DetectionPostProcessParameter *>(parameter_);
params_->anchors_ = nullptr;
params_->decoded_boxes_ = nullptr;
params_->nms_candidate_ = nullptr;
params_->indexes_ = nullptr;
params_->scores_ = nullptr;
params_->all_class_indexes_ = nullptr;
params_->all_class_scores_ = nullptr;
params_->single_class_indexes_ = nullptr;
params_->selected_ = nullptr;

Tensor *anchor_tensor = input_tensors_.at(2);
MS_CHECK_PTR(anchor_tensor);
if (anchor_tensor->data_type() == kNumberTypeInt8) {
QuantArg quant_param = anchor_tensor->quant_params().at(0);
auto anchor_int8 = reinterpret_cast<int8_t *>(anchor_tensor->data_c());
MS_CHECK_PTR(anchor_int8);
auto anchor_fp32 = static_cast<float *>(
allocator_->Malloc(kNumberTypeFloat, anchor_tensor->ElementsNum() * sizeof(float), kOfflinePackWeight));
MS_CHECK_PTR(anchor_fp32);
DoDequantizeInt8ToFp32(anchor_int8, anchor_fp32, quant_param.scale, quant_param.zeroPoint,
anchor_tensor->ElementsNum());
params_->anchors_ = anchor_fp32;
} else if (anchor_tensor->data_type() == kNumberTypeUInt8) {
QuantArg quant_param = anchor_tensor->quant_params().front();
auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->data_c());
MS_CHECK_PTR(anchor_uint8);
auto anchor_fp32 = static_cast<float *>(
allocator_->Malloc(kNumberTypeFloat, anchor_tensor->ElementsNum() * sizeof(float), kOfflinePackWeight));
MS_CHECK_PTR(anchor_fp32);
DoDequantizeUInt8ToFp32(anchor_uint8, anchor_fp32, quant_param.scale, quant_param.zeroPoint,
anchor_tensor->ElementsNum());
params_->anchors_ = anchor_fp32;
} else if (anchor_tensor->data_type() == kNumberTypeFloat32 || anchor_tensor->data_type() == kNumberTypeFloat) {
params_->anchors_ = static_cast<float *>(
allocator_->Malloc(kNumberTypeFloat, anchor_tensor->ElementsNum() * sizeof(float), kOfflinePackWeight));
MS_CHECK_PTR(params_->anchors_);
memcpy(params_->anchors_, anchor_tensor->data_c(), anchor_tensor->Size());
} else {
MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type();
return RET_ERROR;
}
MS_CHECK_RET_CODE(AllocateBuffer(), "AllocateBuffer failed");
MS_CHECK_RET_CODE(MallocInputsBuffer(), "malloc inputs buffer failed");
return RET_OK;
}

int DetectionPostProcessBaseCoder::AllocateBuffer() {
MS_CHECK_PTR(input_tensors_.at(0));
MS_CHECK_PTR(input_tensors_.at(1));
num_boxes_ = input_tensors_.at(0)->shape().at(1);
num_classes_with_bg_ = input_tensors_.at(1)->shape().at(2);
params_->decoded_boxes_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * 4 * sizeof(float), kWorkspace);
MS_CHECK_PTR(params_->decoded_boxes_);
params_->nms_candidate_ = allocator_->Malloc(kNumberTypeUInt8, num_boxes_ * sizeof(uint8_t), kWorkspace);
MS_CHECK_PTR(params_->nms_candidate_);
params_->selected_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->selected_);
params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->single_class_indexes_);

if (params_->use_regular_nms_) {
params_->scores_ =
allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace);
MS_CHECK_PTR(params_->scores_);
params_->indexes_ =
allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->indexes_);
params_->all_class_scores_ =
allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace);
MS_CHECK_PTR(params_->all_class_scores_);
params_->all_class_indexes_ =
allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->all_class_indexes_);
} else {
params_->scores_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * sizeof(float), kWorkspace);
MS_CHECK_PTR(params_->scores_);
params_->indexes_ =
allocator_->Malloc(kNumberTypeFloat, num_boxes_ * params_->num_classes_ * sizeof(int), kWorkspace);
MS_CHECK_PTR(params_->indexes_);
}
return RET_OK;
}

int DetectionPostProcessBaseCoder::DoCode(CoderContext *const context) {
Collect(context, {"nnacl/detection_post_process_parameter.h", "wrapper/base/detection_post_process_base_wrapper.h"},
{"detection_post_process_fp32.c", "detection_post_process_base_wrapper.c"});

Serializer code;
MS_CHECK_RET_CODE(GetInputData(context, &code), "GetInputData failed");
Tensor *output_boxes = output_tensors_.at(0);
Tensor *output_classes = output_tensors_.at(1);
Tensor *output_scores = output_tensors_.at(2);
Tensor *output_num = output_tensors_.at(3);

code.CodeBaseStruct("DetectionPostProcessParameter", "params", params_->op_parameter_, params_->h_scale_,
params_->w_scale_, params_->x_scale_, params_->y_scale_, params_->nms_iou_threshold_,
params_->nms_score_threshold_, params_->max_detections_, params_->detections_per_class_,
params_->max_classes_per_detection_, params_->num_classes_, params_->use_regular_nms_,
params_->out_quantized_, params_->anchors_, params_->decoded_boxes_, params_->nms_candidate_,
params_->indexes_, params_->scores_, params_->all_class_indexes_, params_->all_class_scores_,
params_->single_class_indexes_, params_->selected_);

code.CodeFunction("DecodeBoxes", num_boxes_, input_boxes_, params_->anchors_, "&params");

if (params_->use_regular_nms_) {
code.CodeFunction("DetectionPostProcessRegular", num_boxes_, num_classes_with_bg_, input_scores_, output_boxes,
output_classes, output_scores, output_num, "PartialArgSort", "&params");
} else {
int task_id = 0;
int thread_num = 1;
code.CodeFunction("NmsMultiClassesFastCore", num_boxes_, num_classes_with_bg_, input_scores_, "PartialArgSort",
"&params", task_id, thread_num);

code.CodeFunction("DetectionPostProcessFast", num_boxes_, num_classes_with_bg_, input_scores_,
"(float *)(params.decoded_boxes_)", output_boxes, output_classes, output_scores, output_num,
"PartialArgSort", "&params");
}

context->AppendCode(code.str());

return RET_OK;
}

} // namespace mindspore::lite::micro

+ 54
- 0
mindspore/lite/micro/coder/opcoders/base/detection_post_process_base_coder.h View File

@@ -0,0 +1,54 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DETECTION_POST_PROCESS_BASE_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DETECTION_POST_PROCESS_BASE_CODER_H_

#include <string>
#include <vector>
#include <utility>
#include <memory>
#include "coder/opcoders/op_coder.h"
#include "nnacl/detection_post_process_parameter.h"
#include "coder/opcoders/serializers/serializer.h"

namespace mindspore::lite::micro {

class DetectionPostProcessBaseCoder : public OperatorCoder {
public:
DetectionPostProcessBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}

~DetectionPostProcessBaseCoder() override = default;

int Prepare(CoderContext *const context) override;

int DoCode(CoderContext *const context) override;

protected:
int AllocateBuffer();
virtual int GetInputData(CoderContext *const context, Serializer *const coder) = 0;
virtual int MallocInputsBuffer() = 0;

int num_boxes_{0};
int num_classes_with_bg_{0};
float *input_boxes_{nullptr};
float *input_scores_{nullptr};
DetectionPostProcessParameter *params_{nullptr};
};
} // namespace mindspore::lite::micro
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DETECTION_POST_PROCESS_BASE_CODER_H_

+ 1
- 1
mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.cc View File

@@ -15,7 +15,7 @@
*/

#include <string>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "micro/coder/opcoders/file_collector.h"
#include "micro/coder/opcoders/base/dtype_cast_coder.h"
#include "micro/coder/opcoders/serializers/serializer.h"


+ 1
- 1
mindspore/lite/micro/coder/opcoders/base/dtype_cast_coder.h View File

@@ -19,7 +19,7 @@

#include <vector>
#include <memory>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "nnacl/int8/quant_dtype_cast_int8.h"

namespace mindspore::lite::micro {


+ 6
- 2
mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.cc View File

@@ -14,10 +14,14 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/base/full_connection_base_coder.h"
#include "coder/opcoders/base/full_connection_base_coder.h"

namespace mindspore::lite::micro {
FullConnectionBaseCoder::~FullConnectionBaseCoder() { fc_param_ = nullptr; }
FullConnectionBaseCoder::~FullConnectionBaseCoder() {
fc_param_ = nullptr;
filter_tensor_ = nullptr;
bias_tensor_ = nullptr;
}

int FullConnectionBaseCoder::Init() {
this->fc_param_ = reinterpret_cast<MatMulParameter *>(parameter_);


+ 3
- 2
mindspore/lite/micro/coder/opcoders/base/full_connection_base_coder.h View File

@@ -18,7 +18,7 @@
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_FULLY_CONNECTED_BASE_CODER_H_

#include <vector>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "nnacl/matmul_parameter.h"

namespace mindspore::lite::micro {
@@ -29,7 +29,8 @@ class FullConnectionBaseCoder : public OperatorCoder {
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}

~FullConnectionBaseCoder() override;
int Init();

virtual int Init();

protected:
MatMulParameter *fc_param_{nullptr};


+ 46
- 35
mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.cc View File

@@ -14,61 +14,72 @@
* limitations under the License.
*/

#include <string>
#include "micro/coder/opcoders/op_coder.h"
#include "micro/coder/opcoders/file_collector.h"
#include "micro/coder/opcoders/base/quant_dtype_cast_coder.h"
#include "micro/coder/opcoders/serializers/serializer.h"
#include "coder/opcoders/op_coder.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/base/quant_dtype_cast_coder.h"
#include "coder/opcoders/serializers/serializer.h"
#include "coder/utils/type_cast.h"

using mindspore::schema::PrimitiveType_QuantDTypeCast;

namespace mindspore::lite::micro {

int QuantDTypeCastCoder::Prepare(CoderContext *const context) {
this->cast_param_ = reinterpret_cast<QuantDTypeCastParameter *>(parameter_);

if (cast_param_->srcT == kNumberTypeFloat32 && cast_param_->dstT == kNumberTypeInt8) {
if (input_tensor_->data_type() != kNumberTypeFloat32 || output_tensor_->data_type() != kNumberTypeInt8) {
MS_LOG(ERROR) << "cast_param_ data type and tensor data type do not match.";
return RET_ERROR;
}
inverse_ = false;
} else if (cast_param_->srcT == kNumberTypeInt8 && cast_param_->dstT == kNumberTypeFloat32) {
if (input_tensor_->data_type() != kNumberTypeInt8 || output_tensor_->data_type() != kNumberTypeFloat32) {
MS_LOG(ERROR) << "cast_param_ data type and tensor data type do not match.";
return RET_ERROR;
}
inverse_ = true;
} else {
MS_LOG(ERROR) << "cast_param_ data type not supported:"
<< " src: " << cast_param_->srcT << " dst: " << cast_param_->dstT;
return RET_PARAM_INVALID;
auto *param = reinterpret_cast<QuantDTypeCastParameter *>(parameter_);
if (input_tensor_->data_type() != static_cast<TypeId>(param->srcT) ||
output_tensor_->data_type() != static_cast<TypeId>(param->dstT)) {
MS_LOG(ERROR) << "param data type not supported:"
<< " src: " << param->srcT << " dst: " << param->dstT;
return RET_ERROR;
}
src_dtype = static_cast<TypeId>(param->srcT);
dst_dtype = static_cast<TypeId>(param->dstT);
return RET_OK;
}

int QuantDTypeCastCoder::DoCode(CoderContext *const context) {
// get quant params
QuantArg in_quant_arg = input_tensor_->quant_params().at(0);

// single thread for now
if (input_tensor_->quant_params().empty() && output_tensor_->quant_params().empty()) {
MS_LOG(ERROR) << "QuantDTypeCast need quantization parameters which is not found.";
return RET_ERROR;
}
auto quant_arg = (!output_tensor_->quant_params().empty() && output_tensor_->quant_params().at(0).inited)
? output_tensor_->quant_params().at(0)
: input_tensor_->quant_params().at(0);
int num_unit_thread = input_tensor_->ElementsNum();

// generate code .h .c
Collect(context, {"nnacl/int8/quant_dtype_cast_int8.h"}, {"quant_dtype_cast_int8.c"});

Serializer code;
code.precision(kPrecision);
std::string function = inverse_ ? "DoDequantizeInt8ToFp32" : "DoQuantizeFp32ToInt8";
code.CodeFunction(function, input_tensor_, output_tensor_, in_quant_arg.scale, in_quant_arg.zeroPoint,
num_unit_thread);

if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) {
code.CodeFunction("DoDequantizeInt8ToFp32", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
} else if (src_dtype == TypeId::kNumberTypeFloat32 && dst_dtype == TypeId::kNumberTypeInt8) {
bool from_uint8_src = false;
if (quant_arg.dstDtype == TypeId::kNumberTypeUInt8) {
from_uint8_src = true;
}
code.CodeFunction("DoQuantizeFp32ToInt8", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread, from_uint8_src);
} else if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeUInt8) {
code.CodeFunction("Int8ToUInt8", input_tensor_, output_tensor_, num_unit_thread);
} else if (src_dtype == TypeId::kNumberTypeUInt8 && dst_dtype == TypeId::kNumberTypeFloat32) {
code.CodeFunction("DoDequantizeUInt8ToFp32", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
} else if (src_dtype == TypeId::kNumberTypeFloat32 && dst_dtype == TypeId::kNumberTypeUInt8) {
code.CodeFunction("DoQuantizeFp32ToUInt8", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
} else if (src_dtype == TypeId::kNumberTypeUInt8 && dst_dtype == TypeId::kNumberTypeInt8) {
code.CodeFunction("UInt8ToInt8", input_tensor_, output_tensor_, num_unit_thread);
} else {
MS_LOG(INFO) << "unsupported type cast, src: " << EnumNameDataType(src_dtype)
<< ", dst: " << EnumNameDataType(dst_dtype);
return RET_ERROR;
}
context->AppendCode(code.str());

return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_QuantDTypeCast,
CPUOpCoderCreator<QuantDTypeCastCoder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_QuantDTypeCast, CPUOpCoderCreator<QuantDTypeCastCoder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeUInt8, PrimitiveType_QuantDTypeCast, CPUOpCoderCreator<QuantDTypeCastCoder>)
} // namespace mindspore::lite::micro

+ 3
- 5
mindspore/lite/micro/coder/opcoders/base/quant_dtype_cast_coder.h View File

@@ -19,7 +19,7 @@

#include <vector>
#include <memory>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "nnacl/int8/quant_dtype_cast_int8.h"

namespace mindspore::lite::micro {
@@ -36,10 +36,8 @@ class QuantDTypeCastCoder final : public OperatorCoder {
int DoCode(CoderContext *const context) override;

private:
QuantDTypeCastParameter *cast_param_{nullptr};
std::vector<Tensor *> inputs_;
std::vector<Tensor *> outputs_;
bool inverse_{false};
TypeId src_dtype{kTypeUnknown};
TypeId dst_dtype{kTypeUnknown};
int thread_num_{0};
int thread_n_num_{0};
int thread_n_stride_{0};


+ 3
- 3
mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.cc View File

@@ -14,16 +14,16 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/base/reduce_base_coder.h"
#include "coder/opcoders/base/reduce_base_coder.h"
#include <vector>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"

namespace mindspore::lite::micro {
namespace {
constexpr size_t kInputNum = 1;
constexpr size_t kOutputNum = 1;
} // namespace
int ReduceBaseCoder::CheckInputsOutputs() {
int ReduceBaseCoder::CheckInputsOutputs() const {
if (input_tensors_.size() < kInputNum) {
MS_LOG(ERROR) << "Reduce inputs size should be at least " << kInputNum << " but got " << input_tensors_.size();
return RET_ERROR;


+ 4
- 4
mindspore/lite/micro/coder/opcoders/base/reduce_base_coder.h View File

@@ -19,7 +19,7 @@

#include <vector>
#include <memory>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "nnacl/reduce_parameter.h"

namespace mindspore::lite::micro {
@@ -31,11 +31,10 @@ class ReduceBaseCoder : public OperatorCoder {

~ReduceBaseCoder() override = default;

int Init();
virtual int ReSize();
virtual int Init();

private:
int CheckInputsOutputs();
int CheckInputsOutputs() const;
int CheckParameters();

protected:
@@ -54,6 +53,7 @@ class ReduceBaseCoder : public OperatorCoder {
int outer_size_{0};
int inner_size_{0};
int axis_size_{0};
virtual int ReSize();
};
} // namespace mindspore::lite::micro
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_BASE_CODER_H

+ 104
- 0
mindspore/lite/micro/coder/opcoders/base/resize_base_coder.cc View File

@@ -0,0 +1,104 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/base/resize_base_coder.h"
#include "coder/opcoders/op_coder.h"

namespace mindspore::lite::micro {
constexpr int kMaxInputNum = 2;
constexpr int kOutputNum = 1;
constexpr int kSingleNum = 1;
constexpr int kDoubleNum = 2;
constexpr int kQuadrupleNum = 4;

int ResizeBaseCoder::CheckParameters() {
auto parameter = reinterpret_cast<ResizeParameter *>(parameter_);
if (parameter == nullptr) {
MS_LOG(ERROR) << "cast ResizeParameter failed.";
return RET_NULL_PTR;
}
method_ = parameter->method_;
if (method_ != static_cast<int>(schema::ResizeMethod_LINEAR) &&
method_ != static_cast<int>(schema::ResizeMethod_NEAREST)) {
MS_LOG(ERROR) << "Resize method should be bilinear or nearest_neighbor, but got " << method_;
return RET_INVALID_OP_ATTR;
}
if (this->input_tensors_.size() == kSingleNum) {
new_height_ = parameter->new_height_;
if (new_height_ < 1) {
MS_LOG(ERROR) << "Resize new_height should >= 1, but got " << new_height_;
return RET_INVALID_OP_ATTR;
}
new_width_ = parameter->new_width_;
if (new_width_ < 1) {
MS_LOG(ERROR) << "Resize new_width should >= 1, but got " << new_width_;
return RET_INVALID_OP_ATTR;
}
} else if (this->input_tensors_.size() == kDoubleNum) {
auto out_shape = this->input_tensors_.at(1)->data_c();
if (out_shape == nullptr) {
MS_LOG(INFO) << "Out shape is not assigned";
const_shape_ = false;
} else {
const_shape_ = true;
}
}
coordinate_transform_mode_ = parameter->coordinate_transform_mode_;
preserve_aspect_ratio_ = parameter->preserve_aspect_ratio_;
if (preserve_aspect_ratio_) {
MS_LOG(ERROR) << "Resize currently not support preserve_aspect_ratio true";
return RET_ERROR;
}
return RET_OK;
}

int ResizeBaseCoder::CheckInputsOuputs() {
if (input_tensors_.size() <= kQuadrupleNum) {
if (std::any_of(input_tensors_.begin(), input_tensors_.end(), [](const Tensor *t) { return t == nullptr; })) {
return RET_NULL_PTR;
}
} else {
MS_LOG(ERROR) << "Resize input num should be no more than" << kMaxInputNum << ", but got " << input_tensors_.size();
return RET_ERROR;
}
if (output_tensors_.size() != kOutputNum) {
MS_LOG(ERROR) << "Resize output num should be " << kOutputNum << ", but got " << output_tensors_.size();
return RET_ERROR;
}
auto output = output_tensors_.at(0);
if (output == nullptr) {
return RET_NULL_PTR;
}
return RET_OK;
}

int ResizeBaseCoder::Init() {
auto ret = CheckParameters();
if (ret != RET_OK) {
return ret;
}
ret = CheckInputsOuputs();
if (ret != RET_OK) {
return ret;
}
auto input_shape = input_tensor_->shape();
if (!input_shape.empty() && input_shape.size() != COMM_SHAPE_SIZE) {
MS_LOG(ERROR) << "Resize op support input rank 4, got " << input_shape.size();
return RET_ERROR;
}
return RET_OK;
}
} // namespace mindspore::lite::micro

+ 49
- 0
mindspore/lite/micro/coder/opcoders/base/resize_base_coder.h View File

@@ -0,0 +1,49 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_RESIZE_BASE_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_RESIZE_BASE_CODER_H_

#include <vector>
#include <memory>
#include "coder/opcoders/op_coder.h"
#include "nnacl/resize_parameter.h"

namespace mindspore::lite::micro {
class ResizeBaseCoder : public OperatorCoder {
public:
ResizeBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}

~ResizeBaseCoder() override = default;

int Init();

protected:
int method_{0};
int new_height_{0};
int new_width_{0};
int coordinate_transform_mode_{0};
bool preserve_aspect_ratio_{false};
bool const_shape_{false};

private:
int CheckParameters();
int CheckInputsOuputs();
};
} // namespace mindspore::lite::micro
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_RESIZE_BASE_CODER_H_

+ 1
- 1
mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.cc View File

@@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "micro/coder/opcoders/base/softmax_base_coder.h"
#include "coder/opcoders/base/softmax_base_coder.h"
#include <vector>
#include <type_traits>



+ 1
- 3
mindspore/lite/micro/coder/opcoders/base/softmax_base_coder.h View File

@@ -19,14 +19,12 @@

#include <vector>
#include <string>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "nnacl/softmax_parameter.h"
#include "nnacl/int8/quantize.h"

namespace mindspore::lite::micro {

using std::string;

class SoftmaxBaseCoder : public OperatorCoder {
public:
SoftmaxBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,


+ 3
- 3
mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc View File

@@ -17,13 +17,13 @@
#include "coder/opcoders/cmsis-nn/int8/add_int8_coder.h"
#include <algorithm>
#include <limits>
#include "micro/coder/opcoders/serializers/serializer.h"
#include "coder/opcoders/serializers/serializer.h"
#include "nnacl/arithmetic.h"
#include "nnacl/int8/quantize.h"
#include "coder/opcoders/file_collector.h"
#include "coder/log.h"

using mindspore::schema::PrimitiveType_Add;
using mindspore::schema::PrimitiveType_AddFusion;

namespace mindspore::lite::micro::cmsis {

@@ -85,5 +85,5 @@ int AddInt8Coder::DoCode(CoderContext *const context) {
context->AppendCode(code.str());
return RET_OK;
}
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Add, CPUOpCoderCreator<AddInt8Coder>)
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_AddFusion, CPUOpCoderCreator<AddInt8Coder>)
} // namespace mindspore::lite::micro::cmsis

+ 24
- 12
mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc View File

@@ -15,14 +15,13 @@
*/

#include "coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.h"
#include <memory>
#include <string>
#include <vector>
#include "coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.h"
#include "coder/opcoders/serializers/serializer.h"
#include "coder/opcoders/file_collector.h"
#include "src/common/prim_util.h"

using mindspore::schema::PrimitiveType_Conv2D;
using mindspore::schema::PrimitiveType_Conv2DFusion;

namespace mindspore::lite::micro::cmsis {

@@ -40,13 +39,11 @@ int Conv2DInt8Coder::Prepare(CoderContext *const context) {
int Conv2DInt8Coder::DoCode(CoderContext *const context) {
Serializer code;
code.precision(kPrecision);
std::vector<string> h_files;
std::vector<string> c_files;
std::vector<std::string> h_files;
std::vector<std::string> c_files;
h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h");
string buffer_str = "NULL";
if (opt_ != Convolve_1x1_fast) {
buffer_str = allocator_->GetRuntimeAddr(buffer_);
code << " memset(" << buffer_str << ", 0, " << buffer_size_ << ");\n";
code.CodeFunction("memset", buffer_, 0, buffer_size_);
}
code.CodeArray("output_shift", output_shift_, output_ch_);
code.CodeArray("output_mult", output_mult_, output_ch_);
@@ -57,7 +54,7 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) {
code.CodeFunction("arm_convolve_s8", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, filter_tensor_,
output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_,
output_tensor_, "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,
out_activation_max_, output_x_, output_y_, buffer_str);
out_activation_max_, output_x_, output_y_, buffer_);
break;
case Convolve_1_x_n:
c_files = {"arm_convolve_1_x_n_s8.c", "arm_nn_mat_mul_core_1x_s8.c"};
@@ -65,7 +62,7 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) {
code.CodeFunction("arm_convolve_1_x_n_s8", input_tensor_, input_x_, input_ch_, input_batches_, filter_tensor_,
output_ch_, kernel_x_, pad_x_, stride_x_, bias_tensor_, output_tensor_, "output_shift",
"output_mult", out_offset_, input_offset_, out_activation_min_, out_activation_max_, output_x_,
buffer_str);
buffer_);
break;
case Convolve_1x1_fast:
c_files = {"arm_convolve_1x1_s8_fast.c", "arm_nn_mat_mult_nt_t_s8.c", "arm_nn_mat_mul_core_4x_s8.c",
@@ -74,7 +71,7 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) {
code.CodeFunction("arm_convolve_1x1_s8_fast", input_tensor_, input_x_, input_y_, input_ch_, input_batches_,
filter_tensor_, output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_,
"output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_,
out_activation_max_, output_x_, output_y_, buffer_str);
out_activation_max_, output_x_, output_y_, buffer_);
break;
default:
MS_LOG(ERROR) << "opt enum value is not defined";
@@ -159,5 +156,20 @@ int Conv2DInt8Coder::InitTmpBuffer() {
return RET_OK;
}

REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Conv2D, CPUOpCoderCreator<Conv2DInt8Coder>)
std::unique_ptr<OperatorCoder> CmsisConv2DInt8OpCoderCreator(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index,
Target target) {
MS_CHECK_PTR_RET_NULL(node);
int pt = GetPrimitiveType(node->primitive_);
if (pt != schema::PrimitiveType::PrimitiveType_Conv2DFusion) {
MS_LOG(ERROR) << "unmatched primitive type " << PrimitiveTypeName(pt);
return nullptr;
}
std::unique_ptr<Conv2DInt8Coder> coder =
std::make_unique<Conv2DInt8Coder>(in_tensors, out_tensors, node, node_index, target);
return coder;
}

REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Conv2DFusion, CPUOpCoderCreator<Conv2DInt8Coder>)
} // namespace mindspore::lite::micro::cmsis

+ 0
- 4
mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc View File

@@ -20,8 +20,6 @@
#include "coder/opcoders/file_collector.h"
#include "coder/log.h"

using mindspore::schema::PrimitiveType_DepthwiseConv2D;

namespace mindspore::lite::micro::cmsis {

int DWConvInt8Coder::Prepare(CoderContext *const context) {
@@ -153,6 +151,4 @@ int DWConvInt8Coder::InitTmpBuffer() {
return 0;
}

REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_DepthwiseConv2D, CPUOpCoderCreator<DWConvInt8Coder>)

} // namespace mindspore::lite::micro::cmsis

+ 2
- 2
mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.h View File

@@ -19,8 +19,8 @@

#include <string>
#include <vector>
#include "micro/coder/opcoders/op_coder.h"
#include "micro/coder/opcoders/base/full_connection_base_coder.h"
#include "coder/opcoders/op_coder.h"
#include "coder/opcoders/base/full_connection_base_coder.h"
#include "nnacl/int8/quantize.h"
namespace mindspore::lite::micro::cmsis {
class FullConnectionInt8Coder final : public FullConnectionBaseCoder {


+ 2
- 2
mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc View File

@@ -20,7 +20,7 @@
#include "nnacl/int8/quantize.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Mul;
using mindspore::schema::PrimitiveType_MulFusion;

namespace mindspore::lite::micro::cmsis {

@@ -69,5 +69,5 @@ int MulInt8Coder::DoCode(CoderContext *const context) {
context->AppendCode(code.str());
return RET_OK;
}
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Mul, CPUOpCoderCreator<MulInt8Coder>)
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_MulFusion, CPUOpCoderCreator<MulInt8Coder>)
} // namespace mindspore::lite::micro::cmsis

+ 5
- 7
mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc View File

@@ -20,7 +20,8 @@
#include "coder/opcoders/serializers/serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Pooling;
using mindspore::schema::PrimitiveType_AvgPoolFusion;
using mindspore::schema::PrimitiveType_MaxPoolFusion;

namespace mindspore::lite::micro::cmsis {
int PoolingInt8Coder::Prepare(CoderContext *const context) {
@@ -39,14 +40,12 @@ int PoolingInt8Coder::Prepare(CoderContext *const context) {

int PoolingInt8Coder::DoCode(CoderContext *const context) {
// init struct PoolingParameters
std::string buffer_str = "NULL";
std::string pooling_func;

std::vector<std::string> cFiles;
if (pooling_parameter_->pool_mode_ == PoolMode_AvgPool) {
cFiles = {"arm_avgpool_s8.c"};
pooling_func = "arm_avgpool_s8";
buffer_str = allocator_->GetRuntimeAddr(buffer_);
} else if (pooling_parameter_->pool_mode_ == PoolMode_MaxPool) {
cFiles = {"arm_max_pool_s8.c"};
pooling_func = "arm_max_pool_s8";
@@ -59,11 +58,9 @@ int PoolingInt8Coder::DoCode(CoderContext *const context) {
Serializer code;
code.precision(kPrecision);

code.CodeFunction(pooling_func, "&nn_context", "&pool_params", "&input_dims", input_tensor_, "&filter_dims",
"&output_dims", output_tensor_);
code.CodeFunction(pooling_func, dim_src_height_, dim_src_width_, dim_dst_height_, dim_dst_width_, stride_height_,
stride_width_, dim_kernel_height_, dim_kernel_width_, padding_height_, padding_width_, act_min_,
act_max_, ch_src_, input_tensor_, buffer_str, output_tensor_);
act_max_, ch_src_, input_tensor_, buffer_, output_tensor_);
context->AppendCode(code.str());
return RET_OK;
}
@@ -97,6 +94,7 @@ int PoolingInt8Coder::SetParameters() {
return RET_OK;
}

REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Pooling, CPUOpCoderCreator<PoolingInt8Coder>)
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingInt8Coder>)
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_MaxPoolFusion, CPUOpCoderCreator<PoolingInt8Coder>)

} // namespace mindspore::lite::micro::cmsis

+ 2
- 2
mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc View File

@@ -19,7 +19,7 @@
#include "coder/opcoders/serializers/serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_SoftMax;
using mindspore::schema::PrimitiveType_Softmax;
namespace mindspore::lite::micro::cmsis {

int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
@@ -76,6 +76,6 @@ int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
context->AppendCode(code.str());
return RET_OK;
}
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_SoftMax, CPUOpCoderCreator<SoftMaxInt8Coder>)
REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Softmax, CPUOpCoderCreator<SoftMaxInt8Coder>)

} // namespace mindspore::lite::micro::cmsis

+ 143
- 0
mindspore/lite/micro/coder/opcoders/nnacl/dequant/de_quant.cc View File

@@ -0,0 +1,143 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/nnacl/dequant/de_quant.h"
#include <string>
#include <vector>
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"

static constexpr int kPerTensor = 1;
static constexpr size_t kPerBatch = 3;
namespace mindspore::lite::micro::nnacl {

void Dequant::set_de_quant_buffer_str(const std::string &dequant_buffer_str) {
de_quant_buffer_str_ = "(float *)(" + dequant_buffer_str + ")";
}

void Dequant::DequantRecordWorkspcae(size_t curr_workspace) {
de_quant_max_workspace_ = de_quant_max_workspace_ > curr_workspace ? de_quant_max_workspace_ : curr_workspace;
}

bool Dequant::CheckDequantFlag(const Tensor *weight_tensor) {
if (weight_tensor == nullptr) {
return false;
}
return !weight_tensor->quant_params().empty() && weight_tensor->quant_params().front().inited &&
weight_tensor->data_c() != nullptr;
}

void Dequant::DeQuantFunctionPerChannel(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args,
const std::string &de_quant_arg_base_str,
NNaclFp32Serializer *const de_quant_code) {
int quant_arg_dims = static_cast<int>(quant_tensor->quant_params().size());
int de_quant_nums = quant_tensor->ElementsNum();
for (int i = 0; i < quant_arg_dims; ++i) {
auto de_quant_arg = de_quant_args.at(i);
std::string de_quant_arg_str = de_quant_arg_base_str + std::to_string(i);
de_quant_code->CodeStruct(de_quant_arg_str, de_quant_arg);
}
std::string de_quant_args_name = "de_quant_args";
*de_quant_code << "const DeQuantArg *" << de_quant_args_name << "[" << quant_arg_dims << "] = {\n";
for (int i = 0; i < quant_arg_dims - 1; ++i) {
*de_quant_code << "&" << de_quant_arg_base_str << std::to_string(i) << ", ";
}
*de_quant_code << "&" << de_quant_arg_base_str << std::to_string(quant_arg_dims - 1);
*de_quant_code << "};\n";
size_t per_batch_size = quant_tensor->shape().at(0);
std::string quant_tensor_addr_str = "(int8_t *)(" + quant_tensor_addr_ + ")";
de_quant_code->CodeFunction("DequantDataPerChannel", quant_tensor_addr_str, de_quant_args_name, de_quant_nums,
per_batch_size, de_quant_buffer_str_);
}

void Dequant::DeQuantFunction(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args,
const std::string &de_quant_arg_base_str, NNaclFp32Serializer *const de_quant_code) {
int quant_arg_dims = static_cast<int>(quant_tensor->quant_params().size());
int de_quant_nums = quant_tensor->ElementsNum();
for (int i = 0; i < quant_arg_dims; ++i) {
auto de_quant_arg = de_quant_args.at(i);
std::string de_quant_arg_str = de_quant_arg_base_str + std::to_string(i);
de_quant_code->CodeStruct(de_quant_arg_str, de_quant_arg);
}
std::string de_quant_args_name = "de_quant_args";
*de_quant_code << "const DeQuantArg *" << de_quant_args_name << "[" << quant_arg_dims << "] = {\n";
for (int i = 0; i < quant_arg_dims - 1; ++i) {
*de_quant_code << "&" << de_quant_arg_base_str << std::to_string(i) << ", ";
}
*de_quant_code << "&" << de_quant_arg_base_str << std::to_string(quant_arg_dims - 1);
*de_quant_code << "};\n";
auto channels = static_cast<size_t>(quant_tensor->Batch());
std::string quant_tensor_addr_str = "(int8_t *)(" + quant_tensor_addr_ + ")";
de_quant_code->CodeFunction("DequantData", quant_tensor_addr_str, de_quant_args_name, de_quant_nums, channels,
de_quant_buffer_str_);
}

void Dequant::DeQuantFunctionPerTensor(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args,
const std::string &de_quant_arg_base_str,
NNaclFp32Serializer *const de_quant_code) {
size_t de_quant_nums = quant_tensor->ElementsNum();
auto de_quant_arg = de_quant_args.at(0);
std::string de_quant_arg_str = de_quant_arg_base_str + std::to_string(0);
de_quant_code->CodeStruct(de_quant_arg_str, de_quant_arg);
std::string de_quant_args_name = "de_quant_args";
*de_quant_code << "const DeQuantArg *" << de_quant_args_name << "[" << 1 << "] = {\n";
*de_quant_code << "&" << de_quant_arg_base_str << std::to_string(0);
*de_quant_code << "};\n";
std::string quant_tensor_addr_str = "(int8_t *)(" + quant_tensor_addr_ + ")";
de_quant_code->CodeFunction("DequantDataPerTensor", quant_tensor_addr_str, de_quant_args_name, de_quant_nums,
de_quant_buffer_str_);
}

std::string Dequant::GetMicroDeQuantFunction(const Tensor *quant_tensor, const std::string &quant_tensor_addr) {
std::string de_quant_block;
if (quant_tensor == nullptr || de_quant_buffer_str_.empty()) {
return de_quant_block;
}
quant_tensor_addr_ = quant_tensor_addr;
size_t de_quant_nums = quant_tensor->ElementsNum();
size_t quant_arg_dims = quant_tensor->quant_params().size();
DequantRecordWorkspcae(static_cast<size_t>(de_quant_nums * sizeof(float)));
NNaclFp32Serializer de_quant_code;
de_quant_code << "{\n";
size_t quant_tensor_dims = quant_tensor->shape().size();
std::vector<DeQuantArg> de_quant_args;
std::string de_quant_arg_base_str = "de_quant_arg_";
for (size_t i = 0; i < quant_arg_dims; ++i) {
auto curr_quant_param = quant_tensor->quant_params().at(i);
DeQuantArg de_quant_arg = {
.scale = static_cast<float>(curr_quant_param.scale),
.zeroPoint = curr_quant_param.zeroPoint,
.var_corr = curr_quant_param.var_corr,
.mean_corr = curr_quant_param.mean_corr,
// this clusters is meaningless which will be supported in future
.clusters = {},
.clusters_nums = static_cast<int>(curr_quant_param.clusters.size()),
.bitNum = quant_tensor->quant_params().at(i).bitNum,
};
de_quant_args.emplace_back(de_quant_arg);
}
de_quant_code.CodeFunction("memset", de_quant_buffer_str_, 0, de_quant_nums * sizeof(float));
if (quant_tensor_dims == kPerBatch && quant_arg_dims == static_cast<size_t>(quant_tensor->shape().at(0))) {
DeQuantFunctionPerChannel(quant_tensor, de_quant_args, de_quant_arg_base_str, &de_quant_code);
} else if (quant_arg_dims != kPerTensor) {
DeQuantFunction(quant_tensor, de_quant_args, de_quant_arg_base_str, &de_quant_code);
} else {
DeQuantFunctionPerTensor(quant_tensor, de_quant_args, de_quant_arg_base_str, &de_quant_code);
}
de_quant_code << "}\n";
de_quant_block = de_quant_code.str();
return de_quant_block;
}
} // namespace mindspore::lite::micro::nnacl

+ 63
- 0
mindspore/lite/micro/coder/opcoders/nnacl/dequant/de_quant.h View File

@@ -0,0 +1,63 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MICRO_LITE_MICRO_CODER_OPCODERS_NNACL_DEQUANT_DEQUANT_H_
#define MICRO_LITE_MICRO_CODER_OPCODERS_NNACL_DEQUANT_DEQUANT_H_

#include <string>
#include <vector>
#include "src/tensor.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
namespace mindspore::lite::micro::nnacl {
class Dequant {
public:
Dequant(const Dequant &) = delete;
Dequant &operator=(const Dequant &) = delete;
static Dequant *GetInstance() {
static Dequant dequant;
return &dequant;
}

void set_de_quant_buffer_str(const std::string &de_quant_buffer_str);

const size_t de_quant_max_workspace() const { return de_quant_max_workspace_; }

const std::string de_quant_buffer_str() const { return de_quant_buffer_str_; }

bool CheckDequantFlag(const Tensor *quant_tensor);

std::string GetMicroDeQuantFunction(const Tensor *quant_tensor, const std::string &quant_tensor_addr);

private:
void DeQuantFunctionPerTensor(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args,
const std::string &de_quant_arg_base_str, NNaclFp32Serializer *de_quant_code);

void DeQuantFunction(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args,
const std::string &de_quant_arg_base_str, NNaclFp32Serializer *de_quant_code);

void DeQuantFunctionPerChannel(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args,
const std::string &de_quant_arg_base_str, NNaclFp32Serializer *de_quant_code);

Dequant() = default;
~Dequant() = default;
void DequantRecordWorkspcae(size_t curr_workspace);

std::string de_quant_buffer_str_;
std::string quant_tensor_addr_;
size_t de_quant_max_workspace_{0};
};
} // namespace mindspore::lite::micro::nnacl
#endif // MICRO_LITE_MICRO_CODER_OPCODERS_NNACL_DEQUANT_DEQUANT_H_

+ 5
- 5
mindspore/lite/micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc View File

@@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/activation_fp32_coder.h"
#include <string>
#include "nnacl/fp32/activation_fp32.h"
#include "nnacl/op_base.h"
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "micro/coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Activation;

@@ -34,9 +34,9 @@ int ActivationFP32Coder::DoCode(CoderContext *const context) {
int count = MSMIN(stride, length - stride * task_id);

if (activation_parameter->type_ == schema::ActivationType_SIGMOID) {
Collect(context, {"runtime/kernel/fp32/sigmoid.h"}, {"sigmoid.c"});
Collect(context, {"runtime/kernel/fp32/sigmoid_fp32.h"}, {"sigmoid_fp32.c"});
} else {
Collect(context, {"nnacl/fp32/activation.h"}, {"activation.c"});
Collect(context, {"nnacl/fp32/activation_fp32.h"}, {"activation_fp32.c"});
}
NNaclFp32Serializer code;
switch (activation_parameter->type_) {


+ 6
- 9
mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/addn_fp32_coder.h"
#include <string>
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "micro/coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_AddN;
namespace mindspore::lite::micro::nnacl {
@@ -28,15 +28,12 @@ int AddNFP32Coder::DoCode(CoderContext *const context) {
int elements_num = input0->ElementsNum();

// Get Tensor Pointer
std::string input0_str = allocator_->GetRuntimeAddr(input0);
std::string input1_str = allocator_->GetRuntimeAddr(input1);
Collect(context, {"nnacl/kernel/fp32/add_fp32_slim.h"}, {"add_fp32_slim.c"});
Collect(context, {"nnacl/kernel/fp32/add_fp32.h"}, {"add_fp32.c"});
NNaclFp32Serializer code;
code.CodeFunction("ElementAdd", input0_str, input1_str, output_tensor_, elements_num);
code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num);
if (input_tensors_.size() > 2) {
for (size_t i = 2; i < input_tensors_.size(); ++i) {
std::string input_str = allocator_->GetRuntimeAddr(input_tensors_.at(i));
code.CodeFunction("ElementAdd", input_str, output_tensor_, elements_num);
code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num);
}
}
context->AppendCode(code.str());


+ 18
- 17
mindspore/lite/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc View File

@@ -61,7 +61,7 @@ int ArithmeticFP32Coder::Init(CoderContext *const context) {

if (arithmetic_parameter_->in_elements_num0_ == 1 || arithmetic_parameter_->in_elements_num1_ == 1) {
switch (arithmetic_parameter_->op_parameter_.type_) {
case PrimitiveType_Mul:
case PrimitiveType_MulFusion:
switch (arithmetic_parameter_->activation_type_) {
case schema::ActivationType_RELU:
arithmetic_parameter_->broadcasting_ = false;
@@ -80,7 +80,7 @@ int ArithmeticFP32Coder::Init(CoderContext *const context) {
break;
}
break;
case PrimitiveType_Add:
case PrimitiveType_AddFusion:
switch (arithmetic_parameter_->activation_type_) {
case schema::ActivationType_RELU:
arithmetic_parameter_->broadcasting_ = false;
@@ -99,7 +99,7 @@ int ArithmeticFP32Coder::Init(CoderContext *const context) {
break;
}
break;
case PrimitiveType_Sub:
case PrimitiveType_SubFusion:
switch (arithmetic_parameter_->activation_type_) {
case schema::ActivationType_RELU:
arithmetic_parameter_->broadcasting_ = false;
@@ -157,7 +157,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) {
}
arithmetic_parameter_ = reinterpret_cast<ArithmeticParameter *>(parameter_);
std::map<int, std::function<void()>> type_setters = {
{PrimitiveType_Mul,
{PrimitiveType_MulFusion,
[this]() {
switch (arithmetic_parameter_->activation_type_) {
case schema::ActivationType_RELU:
@@ -174,7 +174,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) {
break;
}
}},
{PrimitiveType_Add,
{PrimitiveType_AddFusion,
[this]() {
switch (arithmetic_parameter_->activation_type_) {
case schema::ActivationType_RELU:
@@ -191,7 +191,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) {
break;
}
}},
{PrimitiveType_Sub,
{PrimitiveType_SubFusion,
[this]() {
switch (arithmetic_parameter_->activation_type_) {
case schema::ActivationType_RELU:
@@ -205,7 +205,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) {
break;
}
}},
{PrimitiveType_Div,
{PrimitiveType_DivFusion,
[this]() {
switch (arithmetic_parameter_->activation_type_) {
case schema::ActivationType_RELU:
@@ -275,15 +275,16 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
* this solution is not suitable for micro, for the size of package.
* */
if (arithmetic_opt_run_ == "ElementOptSub" || arithmetic_run_ == "ElementSub") {
Collect(context, {"nnacl/kernel/fp32/sub.h"}, {"sub.c"});
Collect(context, {"nnacl/fp32/sub_fp32.h"}, {"sub_fp32.c"});
} else if (arithmetic_opt_run_ == "ElementOptAdd" || arithmetic_run_ == "ElementAdd") {
Collect(context, {"nnacl/kernel/fp32/add_fp32_slim.h"}, {"add_fp32_slim.c"});
Collect(context, {"nnacl/fp32/add_fp32.h"}, {"add_fp32.c"});
} else if (arithmetic_opt_run_ == "ElementOptMul" || arithmetic_run_ == "ElementMul") {
Collect(context, {"nnacl/kernel/fp32/mul.h"}, {"mul.c"});
Collect(context, {"nnacl/fp32/mul_fp32.h"}, {"mul_fp32.c"});
} else if (arithmetic_run_ == "ElementAddRelu") {
Collect(context, {"nnacl/kernel/fp32/add_relu.h"}, {"add_relu.c"});
Collect(context, {"nnacl/fp32/add_relu_fp32.h"}, {"add_relu_fp32.c"});
} else {
Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic.h"}, {"arithmetic_common.c", "arithmetic.c"});
Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic_fp32.h"},
{"arithmetic_common.c", "arithmetic_fp32.c"});
}

if (arithmetic_parameter_->broadcasting_) {
@@ -330,15 +331,15 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_Add, CPUOpCoderCreator<ArithmeticFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP32Coder>)

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Mul, CPUOpCoderCreator<ArithmeticFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_MulFusion, CPUOpCoderCreator<ArithmeticFP32Coder>)

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Add, CPUOpCoderCreator<ArithmeticFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP32Coder>)

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Sub, CPUOpCoderCreator<ArithmeticFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_SubFusion, CPUOpCoderCreator<ArithmeticFP32Coder>)

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Div, CPUOpCoderCreator<ArithmeticFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_DivFusion, CPUOpCoderCreator<ArithmeticFP32Coder>)

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_LogicalAnd, CPUOpCoderCreator<ArithmeticFP32Coder>)



+ 4
- 4
mindspore/lite/micro/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.h View File

@@ -25,9 +25,9 @@
#define DEFAULT_ARITHMETIC_NDIMS 10
namespace mindspore::lite::micro::nnacl {

using mindspore::schema::PrimitiveType_Add;
using mindspore::schema::PrimitiveType_AddFusion;

using mindspore::schema::PrimitiveType_Div;
using mindspore::schema::PrimitiveType_DivFusion;

using mindspore::schema::PrimitiveType_Equal;

@@ -51,7 +51,7 @@ using mindspore::schema::PrimitiveType_Maximum;

using mindspore::schema::PrimitiveType_Minimum;

using mindspore::schema::PrimitiveType_Mul;
using mindspore::schema::PrimitiveType_MulFusion;

using mindspore::schema::PrimitiveType_NotEqual;

@@ -59,7 +59,7 @@ using mindspore::schema::PrimitiveType_RealDiv;

using mindspore::schema::PrimitiveType_SquaredDifference;

using mindspore::schema::PrimitiveType_Sub;
using mindspore::schema::PrimitiveType_SubFusion;

using mindspore::schema::PrimitiveType_Eltwise;



+ 4
- 4
mindspore/lite/micro/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.h View File

@@ -27,7 +27,7 @@ namespace mindspore::lite::micro::nnacl {

using mindspore::schema::PrimitiveType_Abs;

using mindspore::schema::PrimitiveType_Add;
using mindspore::schema::PrimitiveType_AddFusion;

using mindspore::schema::PrimitiveType_AddN;

@@ -37,7 +37,7 @@ using mindspore::schema::PrimitiveType_Ceil;

using mindspore::schema::PrimitiveType_Cos;

using mindspore::schema::PrimitiveType_Div;
using mindspore::schema::PrimitiveType_DivFusion;

using mindspore::schema::PrimitiveType_Equal;

@@ -67,7 +67,7 @@ using mindspore::schema::PrimitiveType_Maximum;

using mindspore::schema::PrimitiveType_Minimum;

using mindspore::schema::PrimitiveType_Mul;
using mindspore::schema::PrimitiveType_MulFusion;

using mindspore::schema::PrimitiveType_NotEqual;

@@ -81,7 +81,7 @@ using mindspore::schema::PrimitiveType_Sqrt;

using mindspore::schema::PrimitiveType_SquaredDifference;

using mindspore::schema::PrimitiveType_Sub;
using mindspore::schema::PrimitiveType_SubFusion;

using mindspore::schema::PrimitiveType_Sin;



+ 2
- 2
mindspore/lite/micro/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc View File

@@ -14,10 +14,10 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/assign_add_fp32_coder.h"
#include <string>
#include "schema/inner/ops_generated.h"
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"

namespace mindspore::lite::micro::nnacl {



+ 5
- 7
mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc View File

@@ -17,7 +17,6 @@
#include <string>
#include <vector>
#include "nnacl/fp32/batchnorm_fp32.h"
#include "src/ops/batch_norm.h"
#include "nnacl/op_base.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
@@ -27,10 +26,7 @@ using mindspore::schema::PrimitiveType_BatchNorm;
namespace mindspore::lite::micro::nnacl {

int BatchnormFP32Coder::Init() {
auto bn_parameter = reinterpret_cast<BatchNormParameter *>(parameter_);
auto bn_prim = reinterpret_cast<const mindspore::lite::BatchNorm *>(OperatorCoder::primitive());
bn_parameter->epsilon_ = bn_prim->GetEpsilon();

auto bn_parameter = reinterpret_cast<BatchNormParameter *>(OperatorCoder::parameter_);
std::vector<int> input_shapes = input_tensor_->shape();
if (input_shapes.empty()) {
return RET_ERROR;
@@ -41,7 +37,9 @@ int BatchnormFP32Coder::Init() {
for (int i = 0; i < n_dim - 1; i++) {
bn_parameter->unit_ *= input_shapes.at(i);
}
bn_parameter->op_parameter_.thread_num_ = MSMIN(bn_parameter->op_parameter_.thread_num_, bn_parameter->unit_);
if (default_momentum_ < 0.0f) {
default_momentum_ = bn_parameter->momentum_;
}
return RET_OK;
}

@@ -59,7 +57,7 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) {
Collect(context, {"nnacl/fp32/batchnorm.h"}, {"nnacl/fp32/batchnorm.c"});
NNaclFp32Serializer code;
code.CodeStruct("bn_parameter", *bn_parameter);
code.CodeFunction("BatchNorm", output_tensor_, input_tensor_, mean_tensor, var_tensor, task_id, "&bn_parameter");
code.CodeFunction("BatchNormFp32", input_tensor_, mean_tensor, var_tensor, "&bn_parameter", task_id, output_tensor_);
MS_LOG(INFO) << "BatchnormFP32Code has been called";
context->AppendCode(code.str());
return lite::RET_OK;


+ 6
- 0
mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.h View File

@@ -36,6 +36,12 @@ class BatchnormFP32Coder final : public OperatorCoder {

private:
int Init();

float default_momentum_{-1.0f};

float *mean_{nullptr};

float *variance_{nullptr};
};

} // namespace mindspore::lite::micro::nnacl


+ 77
- 0
mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc View File

@@ -0,0 +1,77 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/nnacl/fp32/biasadd_fp32_coder.h"
#include <string>
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"

using mindspore::schema::PrimitiveType_BiasAdd;

namespace mindspore::lite::micro::nnacl {

int BiasAddFP32Coder::Prepare(CoderContext *context) {
arithmetic_parameter_ = reinterpret_cast<ArithmeticParameter *>(parameter_);
size_t data_size = input_tensors_.at(0)->ElementsNum();
tile_in_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, data_size * sizeof(float), kWorkspace));
tile_bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, data_size * sizeof(float), kWorkspace));
return RET_OK;
}

int BiasAddFP32Coder::DoCode(CoderContext *ctx) {
if (input_tensors_.size() < kBiasIndex) {
return RET_ERROR;
}
size_t data_size = input_tensor_->ElementsNum();
std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex));
Collect(ctx,
{"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h",
"nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"},
{"arithmetic_base.c", "arithmetic_fp32.c", "add_fp32.c"});
nnacl::NNaclFp32Serializer code;
std::vector<int> dims = input_tensor_->shape();
arithmetic_parameter_->broadcasting_ = false;
arithmetic_parameter_->ndim_ = dims.size();
arithmetic_parameter_->activation_type_ = 0;
for (size_t i = 0; i < dims.size(); i++) {
arithmetic_parameter_->in_shape0_[i] = dims[i];
}
arithmetic_parameter_->in_elements_num0_ = 0;

for (size_t i = 0; i < dims.size(); i++) {
if (i == dims.size() - 1) {
arithmetic_parameter_->in_shape1_[i] = dims[dims.size() - 1];
continue;
}
arithmetic_parameter_->in_shape1_[i] = 1;
}
arithmetic_parameter_->in_elements_num1_ = 0;

for (size_t i = 0; i < dims.size(); i++) {
arithmetic_parameter_->out_shape_[i] = dims[i];
}
arithmetic_parameter_->out_elements_num_ = 0;
// other rest elements is not sure

code.CodeStruct("arith_param", *arithmetic_parameter_);
code.CodeFunction("BroadcastAdd", input_tensor_, bias_str, tile_in_, tile_bias_, output_tensor_, data_size,
"(ArithmeticParameter *)&arith_param");
ctx->AppendCode(code.str());
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_BiasAdd, CPUOpCoderCreator<BiasAddFP32Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 43
- 0
mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_BIASADD_FP32_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_BIASADD_FP32_CODER_H_

#include <vector>
#include "coder/opcoders/op_coder.h"
#include "nnacl/arithmetic.h"

namespace mindspore::lite::micro::nnacl {
class BiasAddFP32Coder final : public OperatorCoder {
public:
BiasAddFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}

~BiasAddFP32Coder() override = default;

int Prepare(CoderContext *context) override;

int DoCode(CoderContext *context) override;

private:
ArithmeticParameter *arithmetic_parameter_{nullptr};
float *tile_in_{nullptr};
float *tile_bias_{nullptr};
};
} // namespace mindspore::lite::micro::nnacl
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_BIASADD_FP32_CODER_H_

+ 4
- 7
mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc View File

@@ -14,13 +14,12 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
#include <string>
#include "micro/coder/log.h"
#include "micro/coder/opcoders/file_collector.h"
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/log.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"

using mindspore::schema::PrimitiveType_DepthwiseConv2D;
namespace mindspore::lite::micro::nnacl {
int ConvolutionDepthwiseFP32Coder::Prepare(CoderContext *const context) {
Conv2DBaseCoder::Init();
@@ -73,6 +72,4 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_DepthwiseConv2D,
CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 1
- 1
mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h View File

@@ -18,7 +18,7 @@
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_DEPTHWISE_FP32_CODER_H_

#include <vector>
#include "micro/coder/opcoders/base/conv2d_base_coder.h"
#include "coder/opcoders/base/conv2d_base_coder.h"
#include "src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h"

namespace mindspore::lite::micro::nnacl {


+ 81
- 42
mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc View File

@@ -14,17 +14,21 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/convolution_fp32_coder.h"
#include <memory>
#include <string>
#include <vector>
#include "micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h"
#include "nnacl/fp32/winograd_utils.h"
#include "src/ops/populate/populate_register.h"
#include "micro/coder/opcoders/file_collector.h"
#include "micro/coder/log.h"
#include "coder/opcoders/file_collector.h"
#include "coder/log.h"
#include "src/common/prim_util.h"
#include "src/common/version_manager.h"
#include "coder/opcoders/nnacl/dequant/de_quant.h"

using mindspore::schema::PrimitiveType_Conv2D;
using mindspore::schema::PrimitiveType_Conv2DFusion;
namespace mindspore::lite::micro::nnacl {
int ConvolutionFP32Coder::InitTmpBuffer() {
int in_channel = conv_param_->input_channel_;
@@ -43,20 +47,16 @@ int ConvolutionFP32Coder::InitTmpBuffer() {
}

int ConvolutionFP32Coder::Prepare(CoderContext *const context) {
int ret = Conv2DBaseCoder::Init();
MS_CHECK_RET_CODE(ret, "Conv2DBaseCoder::Init() failed.");
ret = InitWeightBias(context);
MS_CHECK_RET_CODE(ret, "Init weight bias failed.");
MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed.");
de_quant_flag_ = Dequant::GetInstance()->CheckDequantFlag(filter_tensor_);
MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed.");
return Resize();
}

int ConvolutionFP32Coder::Resize() {
int ret = Conv2DBaseCoder::CheckResizeValid();
MS_CHECK_RET_CODE(ret, "Resize is invalid.");
ret = Conv2DBaseCoder::Init();
MS_CHECK_RET_CODE(ret, "init failed.");
ret = InitTmpBuffer();
MS_CHECK_RET_CODE(ret, "init tmp buffer failed.");
MS_CHECK_RET_CODE(Conv2DBaseCoder::CheckResizeValid(), "Resize is invalid.");
MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "init failed.");
MS_CHECK_RET_CODE(InitTmpBuffer(), "init tmp buffer failed.");
return RET_OK;
}

@@ -71,36 +71,43 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) {
const int oc_block = C8NUM;
int oc_block_num = UP_DIV(out_channel, C8NUM);
int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane;
pack_weight_size_ = pack_weight_size * sizeof(float);
auto origin_weight = reinterpret_cast<float *>(filter_tensor_->MutableData());
MS_CHECK_PTR(origin_weight);
packed_weight_ = reinterpret_cast<float *>(
allocator_->Malloc(kNumberTypeFloat32, pack_weight_size * sizeof(float), kOnlinePackWeight));
packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
MS_CHECK_PTR(packed_weight_);
auto out_channel_size = static_cast<size_t>(out_channel);

NNaclFp32Serializer code;
code.CodeMallocExpression(packed_weight_, pack_weight_size * sizeof(float));
code.CodeFunction("memset", packed_weight_, 0, pack_weight_size * sizeof(float));
code.CodeFunction("RowMajor2Col8Major", filter_tensor_, packed_weight_, out_channel_size, in_channel * kernel_plane);
NNaclFp32Serializer init_code;
std::string ori_weight_addr = allocator_->GetRuntimeAddr(filter_tensor_);
std::string init_weight_str = ori_weight_addr;
if (de_quant_flag_) {
init_weight_str = Dequant::GetInstance()->de_quant_buffer_str();
std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, ori_weight_addr);
init_code << de_quant_function;
}
init_code.CodeMallocExpression(packed_weight_, pack_weight_size_);
init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_);
init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size,
in_channel * kernel_plane);

auto bias_data_size = static_cast<size_t>(oc_block_num * oc_block * sizeof(float));
bias_data_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, bias_data_size, kOnlinePackWeight));
bias_data_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
MS_CHECK_PTR(bias_data_);
if (input_tensors_.size() == kInputSize2) {
code.CodeMallocExpression(bias_data_, bias_data_size);
code.CodeFunction("memset", bias_data_, 0, bias_data_size);
code.CodeFunction("memcpy", bias_data_, bias_tensor_, out_channel_size * sizeof(float));
init_code.CodeMallocExpression(bias_data_, bias_data_size);
init_code.CodeFunction("memset", bias_data_, 0, bias_data_size);
init_code.CodeFunction("memcpy", bias_data_, bias_tensor_, out_channel_size * sizeof(float));
} else {
return RET_ERROR;
}
context->AppendInitCode(code.str());
context->AppendInitCode(init_code.str());
return RET_OK;
}

int ConvolutionFP32Coder::DoCode(CoderContext *const context) {
{
std::vector<string> asmFiles;
std::vector<std::string> asmFiles;
if (target_ == kARM32A) {
asmFiles = {"MatmulFp32.S",
"MatmulFp32Opt.S",
@@ -112,9 +119,14 @@ int ConvolutionFP32Coder::DoCode(CoderContext *const context) {
asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S",
"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"};
}
Collect(context,
{"nnacl/kernel/fp32/conv_fp32_slim.h", "nnacl/fp32/matmul.h", "nnacl/conv_parameter.h", "nnacl/op_base.h"},
{"common_func.c", "conv_fp32_slim.c", "matmul.c"}, asmFiles);
std::vector<std::string> h_files = {"nnacl/fp32/conv_common_fp32.h", "nnacl/fp32/matmul.h",
"nnacl/conv_parameter.h", "nnacl/op_base.h"};
std::vector<std::string> c_files = {"common_func.c", "conv_common_fp32.c", "matmul.c"};
if (de_quant_flag_) {
h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h");
c_files.emplace_back("dequant_int8_to_fp32_wrapper.c");
}
Collect(context, h_files, c_files, asmFiles);
}
NNaclFp32Serializer code;
// call the op function
@@ -122,7 +134,7 @@ int ConvolutionFP32Coder::DoCode(CoderContext *const context) {
code.CodeFunction("memset", col_major_input_, "0", col_major_input_size_);
code.CodeStruct("conv_parameter", *conv_param_);
int task_id = 0;
code.CodeFunction("ConvFp32Slim", input_tensor_, packed_input_, packed_weight_, bias_data_, col_major_input_,
code.CodeFunction("ConvFp32", input_tensor_, packed_input_, packed_weight_, bias_data_, col_major_input_,
output_tensor_, task_id, "(ConvParameter *)&conv_parameter");

context->AppendCode(code.str());
@@ -135,18 +147,18 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector<
Target target) {
std::vector<Tensor *> inputs = in_tensors;
std::vector<Tensor *> outputs = out_tensors;
auto primitive = node->primitive_;
if (!primitive) {
const void *primitive = node->primitive_;
if (primitive == nullptr) {
return nullptr;
}
OpParameter *parameter =
PopulateRegistry::GetInstance()->GetParameterCreator((schema::PrimitiveType(primitive->Type())))(primitive);
if (parameter == nullptr) {
MS_LOG(ERROR) << "PopulateParameter return nullptr, type: "
<< schema::EnumNamePrimitiveType((schema::PrimitiveType)(primitive->Type()));
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
ParameterGen paramGen =
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
if (paramGen == nullptr) {
MS_LOG(ERROR) << "parameter generator is null";
return nullptr;
}
auto conv_param = reinterpret_cast<ConvParameter *>(parameter);
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
bool use_winograd = false;
int out_unit = 0;
int kernel_h = conv_param->kernel_h_;
@@ -159,7 +171,7 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector<
conv_param->output_channel_ = outputs.at(kOutputIndex)->Channel();
conv_param->op_parameter_.thread_num_ = 1;
CheckIfUseWinograd(&use_winograd, &out_unit, conv_param);
free(parameter);
free(conv_param);
// weight de quant
std::unique_ptr<OperatorCoder> coder;
if (kernel_h == 1 && kernel_w == 1) {
@@ -175,5 +187,32 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector<
return coder;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2D, CPUConvolutionFP32CoderCreator)
std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index,
Target target) {
const void *primitive = node->primitive_;
if (primitive == nullptr) {
return nullptr;
}
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
ParameterGen paramGen =
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
if (paramGen == nullptr) {
MS_LOG(ERROR) << "parameter generator is null";
return nullptr;
}
auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_));
std::unique_ptr<OperatorCoder> coder;
if (conv_param->group_ == 1) {
coder = CPUConvolutionFP32CoderCreator(in_tensors, out_tensors, node, node_index, target);
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
coder = CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target);
} else {
// GroupConv
}
return coder;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator)
} // namespace mindspore::lite::micro::nnacl

+ 8
- 6
mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.h View File

@@ -14,14 +14,14 @@
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_FP32_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_FP32_CODER_H_
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_CONVOLUTION_FP32_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_CONVOLUTION_FP32_CODER_H_

#include <vector>
#include <string>
#include "nnacl/conv_parameter.h"
#include "micro/coder/opcoders/base/conv2d_base_coder.h"
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/base/conv2d_base_coder.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"

namespace mindspore::lite::micro::nnacl {
class ConvolutionFP32Coder final : public Conv2DBaseCoder {
@@ -51,12 +51,14 @@ class ConvolutionFP32Coder final : public Conv2DBaseCoder {

size_t packed_input_size_{0};

int thread_stride_{0};
bool de_quant_flag_{false};

int thread_count_{0};

float *col_major_input_{nullptr};
size_t col_major_input_size_{0};

size_t pack_weight_size_{0};
};
} // namespace mindspore::lite::micro::nnacl
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_FP32_CODER_H_
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_CONVOLUTION_FP32_CODER_H_

+ 9
- 8
mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc View File

@@ -13,12 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h"
#include <array>
#include "nnacl/base/minimal_filtering_generator.h"
#include "micro/coder/log.h"
#include "micro/coder/opcoders/file_collector.h"
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/log.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"

namespace mindspore::lite::micro::nnacl {
const std::array<std::string, 9> InputTransFuncList = {
@@ -222,10 +222,11 @@ int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) {
asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S",
"PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"};
}
Collect(context, {"nnacl/fp32/conv.h", "nnacl/common_func.h"},
{"common_func.c", "conv_int8.c", "matmul_int8.c", "pack.c", "conv.c", "winograd_transform.c",
"common_func_fp32.c", "fixed_point.c", "winograd_utils.c", "minimal_filtering_generator.c"},
asmFiles);
Collect(
context, {"nnacl/fp32/conv_winograd_fp32.h", "nnacl/common_func.h"},
{"common_func.c", "conv_int8.c", "matmul_int8.c", "pack_fp32.c", "conv_winograd_fp32.c", "winograd_transform.c",
"common_func_fp32.c", "fixed_point.c", "winograd_utils.c", "minimal_filtering_generator.c"},
asmFiles);

NNaclFp32Serializer code;
// call the op function


+ 1
- 1
mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h View File

@@ -20,7 +20,7 @@
#include <memory>
#include <string>
#include <vector>
#include "micro/coder/opcoders/base/conv2d_base_coder.h"
#include "coder/opcoders/base/conv2d_base_coder.h"
#include "nnacl/conv_parameter.h"

namespace mindspore::lite::micro::nnacl {


+ 51
- 25
mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc View File

@@ -22,6 +22,7 @@
#include "coder/opcoders/file_collector.h"
#include "nnacl/fp32/matmul_fp32.h"
#include "wrapper/fp32/matmul_fp32_wrapper.h"
#include "coder/opcoders/nnacl/dequant/de_quant.h"

using mindspore::schema::PrimitiveType_MatMul;

@@ -31,6 +32,13 @@ int MatMulFP32BaseCoder::ReSize() {
ResizeParameter();
thread_count_ = MSMIN(thread_num_, UP_DIV(params_->col_align_, col_tile_));
thread_stride_ = UP_DIV(UP_DIV(params_->col_align_, col_tile_), thread_count_);
// can not call Malloc in DoCode,so move this runtime init to final resize
if (!params_->a_const_) {
MS_CHECK_RET_CODE(InitBufferA(), "InitBufferA failed");
}
if (!params_->b_const_) {
MS_CHECK_RET_CODE(InitBufferB(), "InitBufferB failed");
}
return RET_OK;
}

@@ -45,17 +53,16 @@ int MatMulFP32BaseCoder::InitBiasData() {
}

void MatMulFP32BaseCoder::InitParameter() {
row_tile_ = C12NUM;
if (target_ == kARM32A) {
row_tile_ = C12NUM;
col_tile_ = C4NUM;
} else {
row_tile_ = C12NUM;
col_tile_ = C8NUM;
}
}

void MatMulFP32BaseCoder::ResizeParameter() {
if (params_->row_ == 1 && !params_->b_const_) {
if (params_->row_ == 1) {
vec_matmul_ = true;
}
params_->row_align_ = vec_matmul_ ? 1 : UP_ROUND(params_->row_, row_tile_);
@@ -66,12 +73,11 @@ int MatMulFP32BaseCoder::InitBufferA() {
if (a_pack_ptr_ != nullptr) {
return RET_OK;
}
a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(float));
if (params_->a_const_) {
a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
} else {
a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(float));
a_pack_ptr_ =
reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, a_pack_ptr_size_, kOfflinePackWeight));
a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, a_pack_ptr_size_, kWorkspace));
}
MS_CHECK_PTR(a_pack_ptr_);
return RET_OK;
@@ -81,12 +87,11 @@ int MatMulFP32BaseCoder::InitBufferB() {
if (b_pack_ptr_ != nullptr) {
return RET_OK;
}
b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * sizeof(float));
if (params_->b_const_) {
b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight));
} else {
b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * sizeof(float));
b_pack_ptr_ =
reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, b_pack_ptr_size_, kOfflinePackWeight));
b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, b_pack_ptr_size_, kWorkspace));
}
MS_CHECK_PTR(b_pack_ptr_);
return RET_OK;
@@ -108,12 +113,9 @@ int MatMulFP32BaseCoder::Init() {
MS_CHECK_RET_CODE(InitBiasData(), "InitBiasData failed");
if (params_->a_const_) {
MS_CHECK_RET_CODE(InitBufferA(), "InitBufferA failed");
MS_CHECK_RET_CODE(InitMatrixA(reinterpret_cast<float *>(input_tensor_->data_c())), "InitMatrixA failed");
}

if (params_->b_const_) {
MS_CHECK_RET_CODE(InitBufferB(), "InitBufferB failed");
MS_CHECK_RET_CODE(InitMatrixB(reinterpret_cast<float *>(filter_tensor_->data_c())), "InitMatrixB failed");
}
return RET_OK;
}
@@ -124,12 +126,17 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
// generate code .h .c
std::vector<std::string> asm_files;
if (target_ == kARM32A) {
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S"};
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatmulFp32Opt12x4.S"};
} else if (target_ == kARM64) {
asm_files = {"arm64/MatmulFp32.S", "MatmulFp32Opt.S", "arm64/MatVecMulFp32.S"};
asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatVecMulFp32.S"};
}
std::vector<std::string> h_files = {"nnacl/fp32/matmul_fp32.h", "wrapper/fp32/matmul_fp32_wrapper.h"};
std::vector<std::string> c_files = {"matmul_fp32.c", "matmul_fp32_wrapper.c"};
if (de_quant_flag_) {
h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h");
c_files.emplace_back("dequant_int8_to_fp32_wrapper.c");
}
Collect(context, {"nnacl/fp32/matmul.h", "adapter/fp32/matmul_fp32_adapter.h"}, {"matmul.c", "matmul_fp32_adapter.c"},
asm_files);
Collect(context, h_files, c_files, asm_files);
NNaclFp32Serializer code;
NNaclFp32Serializer init_code;
code.CodeStruct("mat_mul_parameter", *params_);
@@ -137,9 +144,12 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
// do bias packing to init
if (bias_ptr_) {
init_code.CodeMallocExpression(bias_ptr_, bias_pack_ptr_size_);
init_code.CodeFunction("memcpy", bias_ptr_, bias_tensor_->data_c(), bias_pack_ptr_size_);
init_code.CodeFunction("memcpy", bias_ptr_, bias_tensor_, bias_pack_ptr_size_);
}

// Get Tensor Pointer
std::string a_str = allocator_->GetRuntimeAddr(input_tensor_);
std::string b_str = allocator_->GetRuntimeAddr(filter_tensor_);
std::string c_str = allocator_->GetRuntimeAddr(output_tensor_);
std::string a_pack_str = allocator_->GetRuntimeAddr(a_pack_ptr_);
std::string b_pack_str = allocator_->GetRuntimeAddr(b_pack_ptr_);
@@ -147,12 +157,28 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
// do const value packing to init
if (!params_->a_const_) {
code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
init_code.CodeMallocExpression(b_pack_ptr_, b_pack_ptr_size_);
std::string b_src_str = b_str;
if (de_quant_flag_) {
// reuse to b_pack_str
b_src_str = Dequant::GetInstance()->de_quant_buffer_str();
std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, b_str);
init_code << de_quant_function;
}
// b_pack_str has been memset, no need to memset
init_code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
init_code.CodeFunction("InitMatrixB", b_src_str, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
}
if (!params_->b_const_) {
init_code.CodeMallocExpression(a_pack_str, a_pack_ptr_size_);
std::string a_src_str = a_str;
if (de_quant_flag_) {
// reuse to a_pack_str
a_src_str = Dequant::GetInstance()->de_quant_buffer_str();
std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(input_tensor_, a_str);
init_code << de_quant_function;
}
// a_pack_str has been memset, no need to memset
init_code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
init_code.CodeFunction("InitMatrixA", a_src_str, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_);
}

@@ -165,13 +191,13 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) {
}
code << "for (int i = 0; i < " << params_->batch << "; ++i) {\n";
if (vec_matmul_) {
code << "\t\tbatch_a_ptr = " << a_pack_str << " + i * " << params_->deep_ << ";\n";
code << "\t\tbatch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n";
code << "\t\tbatch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
code << "\t\tfloat *batch_a_ptr = " << a_pack_str << " + i * " << params_->deep_ << ";\n";
code << "\t\tfloat *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n";
code << "\t\tfloat *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
} else {
code << "\t\tbatch_a_ptr = " << a_pack_str << " + i * " << params_->row_align_ * params_->deep_ << ";\n";
code << "\t\tbatch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_align_ << ";\n";
code << "\tbatch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
code << "\t\tfloat *batch_a_ptr = " << a_pack_str << " + i * " << params_->row_align_ * params_->deep_ << ";\n";
code << "\t\tfloat *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_align_ << ";\n";
code << "\t\tfloat *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n";
}

if (vec_matmul_) {


+ 1
- 0
mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.h View File

@@ -56,6 +56,7 @@ class MatMulFP32BaseCoder : public OperatorCoder {
float *b_pack_ptr_ = nullptr;
float *bias_ptr_{nullptr};
bool vec_matmul_{false};
bool de_quant_flag_{false};

private:
int col_tile_{0};


+ 5
- 2
mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_coder.cc View File

@@ -18,6 +18,7 @@
#include <vector>
#include "coder/log.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/nnacl/dequant/de_quant.h"

using mindspore::schema::PrimitiveType_MatMul;

@@ -77,10 +78,12 @@ int MatMulFP32Coder::Prepare(CoderContext *const context) {
params_->b_const_ = (filter_tensor_->data_c() != nullptr);
MatMulFP32BaseCoder::InitParameter();
if (params_->a_const_) {
InitShapeA();
de_quant_flag_ = Dequant::GetInstance()->CheckDequantFlag(input_tensor_);
MS_CHECK_RET_CODE(InitShapeA(), "MatMulFP32Coder init_shape_a failed");
}
if (params_->b_const_) {
InitShapeB();
de_quant_flag_ = Dequant::GetInstance()->CheckDequantFlag(filter_tensor_);
MS_CHECK_RET_CODE(InitShapeB(), "MatMulFP32Coder init_shape_b failed");
}
MS_CHECK_RET_CODE(MatMulFP32BaseCoder::Init(), "MatMulFP32Coder init failed");
return ReSize();


+ 6
- 6
mindspore/lite/micro/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc View File

@@ -14,14 +14,14 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/nnacl/fp32/pad_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/pad_fp32_coder.h"
#include <string>
#include <vector>
#include "micro/coder/log.h"
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "micro/coder/opcoders/file_collector.h"
#include "coder/log.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Pad;
using mindspore::schema::PrimitiveType_PadFusion;

namespace mindspore::lite::micro::nnacl {

@@ -99,5 +99,5 @@ int PadFP32Coder::DoCode(CoderContext *const context) {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Pad, CPUOpCoderCreator<PadFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_PadFusion, CPUOpCoderCreator<PadFP32Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 8
- 16
mindspore/lite/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc View File

@@ -21,7 +21,8 @@
#include "coder/log.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Pooling;
using mindspore::schema::PrimitiveType_AvgPoolFusion;
using mindspore::schema::PrimitiveType_MaxPoolFusion;

namespace mindspore::lite::micro::nnacl {

@@ -46,7 +47,7 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
float minf = -FLT_MAX;
float maxf = FLT_MAX;
if (pooling_parameter->pool_mode_ == PoolMode_MaxPool) {
Collect(context, {"nnacl/kernel/fp32/max_pooling_fp32_slim.h"}, {"max_pooling_fp32_slim.c"});
Collect(context, {"nnacl/fp32/pooling_fp32.h"}, {"pooling_fp32.c"});
switch (pooling_parameter->act_type_) {
case ActType_Relu: {
minf = 0.f;
@@ -63,14 +64,9 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
}
}

if (thread_num_ > 1) {
code.CodeBaseStruct("PoolingFp32Args", "args", input_tensor_, output_tensor_, "&pooling_parameter", minf, maxf);
CODE_PARALLEL_FUNC("MaxPoolingFp32Run");
} else {
code.CodeFunction("MaxPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf);
}
code.CodeFunction("MaxPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf);
} else {
Collect(context, {"nnacl/fp32/pooling.h"}, {"pooling.c"});
Collect(context, {"nnacl/fp32/pooling_fp32.h"}, {"pooling_fp32.c"});
switch (pooling_parameter->act_type_) {
case ActType_Relu: {
minf = 0.f;
@@ -86,12 +82,7 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
break;
}
}
if (thread_num_ > 1) {
code.CodeBaseStruct("PoolingFp32Args", "args", input_tensor_, output_tensor_, "&pooling_parameter", minf, maxf);
CODE_PARALLEL_FUNC("AvgPoolingFp32Run");
} else {
code.CodeFunction("AvgPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf);
}
code.CodeFunction("AvgPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf);
}

MS_LOG(INFO) << "PoolingFp32Code has been called";
@@ -99,5 +90,6 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) {
return lite::RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Pooling, CPUOpCoderCreator<PoolingFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_MaxPoolFusion, CPUOpCoderCreator<PoolingFP32Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 1
- 1
mindspore/lite/micro/coder/opcoders/nnacl/fp32/pooling_fp32_coder.h View File

@@ -18,7 +18,7 @@
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_POOLFP32_CODER_H_

#include <vector>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"

namespace mindspore::lite::micro::nnacl {



+ 2
- 2
mindspore/lite/micro/coder/opcoders/nnacl/fp32/power_fp32_coder.cc View File

@@ -20,7 +20,7 @@
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Power;
using mindspore::schema::PrimitiveType_PowFusion;

namespace mindspore::lite::micro::nnacl {

@@ -55,6 +55,6 @@ int PowerFP32Coder::DoCode(CoderContext *const context) {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Power, CPUOpCoderCreator<PowerFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_PowFusion, CPUOpCoderCreator<PowerFP32Coder>)

} // namespace mindspore::lite::micro::nnacl

+ 2
- 2
mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc View File

@@ -20,7 +20,7 @@
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Reduce;
using mindspore::schema::PrimitiveType_PowFusion;

namespace mindspore::lite::micro::nnacl {
int ReduceFP32Coder::Prepare(CoderContext *const context) {
@@ -116,6 +116,6 @@ int ReduceFP32Coder::DoCode(CoderContext *const context) {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Reduce, CPUOpCoderCreator<ReduceFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_PowFusion, CPUOpCoderCreator<ReduceFP32Coder>)

} // namespace mindspore::lite::micro::nnacl

+ 18
- 25
mindspore/lite/micro/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc View File

@@ -18,8 +18,9 @@
#include "coder/log.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/parallel.h"

using mindspore::schema::PrimitiveType_Scale;
using mindspore::schema::PrimitiveType_ScaleFusion;

namespace mindspore::lite::micro::nnacl {
ScaleFP32Coder::~ScaleFP32Coder() {
@@ -131,34 +132,26 @@ int ScaleFP32Coder::DoCode(CoderContext *const context) {
NNaclFp32Serializer code;
code.CodeStruct("scale_parameter", *scale_param_);

if (thread_num_ > 1) {
code.CodeBaseStruct("ScaleFp32Args", "args", input_tensor_, output_tensor_, scale_tensor, offset_tensor,
switch (scale_param_->activation_type_) {
case schema::ActivationType_RELU6:
code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale_tensor, offset_tensor, kDefaultTaskId,
"&scale_parameter");
CODE_PARALLEL_FUNC("ScaleFp32Run");
} else {
int task_id = 0;
switch (scale_param_->activation_type_) {
case schema::ActivationType_RELU6:
code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale_tensor, offset_tensor, task_id,
"&scale_parameter");
break;
case schema::ActivationType_RELU:
code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale_tensor, offset_tensor, task_id,
"&scale_parameter");
break;
case schema::ActivationType_NO_ACTIVATION:
code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale_tensor, offset_tensor, task_id,
"&scale_parameter");
break;
default:
MS_LOG(ERROR) << "Scale does not support activation type " << scale_param_->activation_type_;
return RET_ERROR;
}
break;
case schema::ActivationType_RELU:
code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale_tensor, offset_tensor, kDefaultTaskId,
"&scale_parameter");
break;
case schema::ActivationType_NO_ACTIVATION:
code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale_tensor, offset_tensor, kDefaultTaskId,
"&scale_parameter");
break;
default:
MS_LOG(ERROR) << "Scale does not support activation type " << scale_param_->activation_type_;
return RET_ERROR;
}
MS_LOG(INFO) << "ScaleFP32Code has been called";
context->AppendCode(code.str());
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Scale, CPUOpCoderCreator<ScaleFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_ScaleFusion, CPUOpCoderCreator<ScaleFP32Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 0
- 74
mindspore/lite/micro/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc View File

@@ -1,74 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/nnacl/fp32/slice_fp32_coder.h"
#include <string>
#include "nnacl/slice_parameter.h"
#include "src/ops/slice.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Slice;
namespace mindspore::lite::micro::nnacl {
int SliceFP32Coder::Prepare(CoderContext *const context) { return RET_OK; }

int SliceFP32Coder::DoCode(CoderContext *const context) {
// generate code .h .c
Collect(context, {"nnacl/slice_parameter.h", "nnacl/fp32/slice.h"}, {"slice.c"});

auto param = reinterpret_cast<SliceParameter *>(parameter_);
auto primitive_slice = reinterpret_cast<const mindspore::lite::Slice *>(OperatorCoder::primitive());
std::vector<int> begin = primitive_slice->GetPostProcessBegin();
std::vector<int> size = primitive_slice->GetPostProcessSize();
std::vector<int> input_shape = input_tensor_->shape();
NNaclFp32Serializer code;
for (int i = 0; i < param->param_length_; i++) {
param->shape_[i] = input_shape.at(i);
}

for (int i = 0; i < param->param_length_; i++) {
param->begin_[i] = begin.at(i);
}

for (int i = 0; i < param->param_length_; i++) {
int tmp_size = size.at(i);
if (size.at(i) < 0) {
tmp_size = input_shape.at(i) - begin.at(i);
}
param->end_[i] = (begin.at(i) + tmp_size);
}

for (int i = 0; i < param->param_length_; i++) {
if (size.at(i) < 0) {
param->size_[i] = (input_shape.at(i) - begin.at(i));
continue;
}
param->size_[i] = size.at(i);
}

code.CodeStruct("slice_parameter", *param);

// call the op function
if (param->param_length_ < DIMENSION_4D) {
code.CodeFunction("PadSliceParameterTo4D", "&slice_parameter");
}
code.CodeFunction("DoSliceNoParallel", input_tensor_, output_tensor_, "&slice_parameter");
context->AppendCode(code.str());
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Slice, CPUOpCoderCreator<SliceFP32Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 0
- 37
mindspore/lite/micro/coder/opcoders/nnacl/fp32/slice_fp32_coder.h View File

@@ -1,37 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_SLICE_FP32_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_SLICE_FP32_CODER_H_

#include <vector>
#include "coder/opcoders/op_coder.h"

namespace mindspore::lite::micro::nnacl {
class SliceFP32Coder final : public OperatorCoder {
public:
SliceFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}

~SliceFP32Coder() override = default;

int Prepare(CoderContext *const context) override;

int DoCode(CoderContext *const context) override;
};
} // namespace mindspore::lite::micro::nnacl
#endif // MINDSPORE_LITE_MICRO_CODER_OPCOD ERS_SLICE_FP32_CODER_H_

+ 3
- 3
mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc View File

@@ -20,7 +20,7 @@
#include "schema/inner/ops_generated.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_SoftMax;
using mindspore::schema::PrimitiveType_Softmax;

namespace mindspore::lite::micro::nnacl {

@@ -48,7 +48,7 @@ int SoftMaxFP32Coder::Prepare(CoderContext *const context) {
}

int SoftMaxFP32Coder::DoCode(CoderContext *const context) {
Collect(context, {"nnacl/fp32/softmax.h"}, {"softmax.c"});
Collect(context, {"nnacl/fp32/softmax_fp32.h"}, {"softmax_fp32.c", "exp_fp32.c"});
NNaclFp32Serializer code;
code.CodeStruct("softmax_parameter", *softmax_param_);
code.CodeFunction("memset", sum_data_, "0", sum_data_size_);
@@ -58,6 +58,6 @@ int SoftMaxFP32Coder::DoCode(CoderContext *const context) {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_SoftMax, CPUOpCoderCreator<SoftMaxFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Softmax, CPUOpCoderCreator<SoftMaxFP32Coder>)

} // namespace mindspore::lite::micro::nnacl

+ 1
- 1
mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h View File

@@ -17,7 +17,7 @@
#define MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_

#include <vector>
#include "micro/coder/opcoders/base/softmax_base_coder.h"
#include "coder/opcoders/base/softmax_base_coder.h"
namespace mindspore::lite::micro::nnacl {

class SoftMaxFP32Coder final : public SoftmaxBaseCoder {


+ 57
- 0
mindspore/lite/micro/coder/opcoders/nnacl/fp32/splice_fp32_coder.cc View File

@@ -0,0 +1,57 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/nnacl/fp32/splice_fp32_coder.h"
#include <string>
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"
#include "src/common/log_adapter.h"
#include "nnacl/splice_parameter.h"
using mindspore::schema::PrimitiveType_Splice;
namespace mindspore::lite::micro::nnacl {
int SpliceFP32Coder::DoCode(CoderContext *const context) {
auto splice_parameter = reinterpret_cast<SpliceParameter *>(parameter_);
// to make forward_indexes nullptr
splice_parameter->forward_indexes_ = nullptr;
std::vector<int> src_shape = input_tensor_->shape();
std::vector<int> dst_shape = output_tensor_->shape();
if (src_shape.size() != dst_shape.size() || src_shape.size() != kInputSize2 || dst_shape.size() != kInputSize2) {
MS_LOG(ERROR) << "SpliceFP32Coder src_shape size not equal to dst_shape";
return RET_ERROR;
}
int src_row = src_shape.at(kInputIndex);
int dst_row = dst_shape.at(kInputIndex);
int src_col = src_shape.at(kBiasIndex);
int dst_col = dst_shape.at(kBiasIndex);
if (src_row != dst_row) {
MS_LOG(ERROR) << "SpliceFP32Coder src_row not equal to dst_row";
return RET_ERROR;
}
if (src_col * splice_parameter->context_dim_ != dst_col) {
MS_LOG(ERROR) << "SpliceFP32Coder src_col not match to dst_col";
return RET_ERROR;
}
Collect(context, {"nnacl/splice_parameter.h", "nnacl/fp32/splice_fp32.h"}, {"splice_fp32.c"});
NNaclFp32Serializer code;
code.CodeStruct("splice_parameter", *splice_parameter);
code.CodeFunction("SpliceFp32", input_tensor_, src_row, src_col, "&splice_parameter", output_tensor_, dst_row,
dst_col);
context->AppendCode(code.str());
MS_LOG(DEBUG) << "SpliceFP32Coder do_code ok";
return RET_OK;
}
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Splice, CPUOpCoderCreator<SpliceFP32Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 35
- 0
mindspore/lite/micro/coder/opcoders/nnacl/fp32/splice_fp32_coder.h View File

@@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_SPLICE_FP32_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_SPLICE_FP32_CODER_H_
#include <vector>
#include "coder/opcoders/op_coder.h"
namespace mindspore::lite::micro::nnacl {
class SpliceFP32Coder final : public OperatorCoder {
public:
SpliceFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}

~SpliceFP32Coder() override = default;

int Prepare(CoderContext *const context) override { return RET_OK; }

int DoCode(CoderContext *const context) override;
};
} // namespace mindspore::lite::micro::nnacl
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_SPLICE_FP32_CODER_H_

+ 2
- 2
mindspore/lite/micro/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc View File

@@ -20,7 +20,7 @@
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Tile;
using mindspore::schema::PrimitiveType_TileFusion;

namespace mindspore::lite::micro::nnacl {
void TileFP32Coder::ComputeStrides(const int *shape, int *strides, int ndim) const {
@@ -63,6 +63,6 @@ int TileFP32Coder::DoCode(CoderContext *const context) {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Tile, CPUOpCoderCreator<TileFP32Coder>)
REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_TileFusion, CPUOpCoderCreator<TileFP32Coder>)

} // namespace mindspore::lite::micro::nnacl

+ 5
- 5
mindspore/lite/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc View File

@@ -14,11 +14,11 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h"
#include "coder/opcoders/nnacl/fp32/transpose_fp32_coder.h"
#include <vector>
#include <string>
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "micro/coder/opcoders/file_collector.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
#include "coder/opcoders/file_collector.h"

using mindspore::schema::PrimitiveType_Transpose;
namespace mindspore::lite::micro::nnacl {
@@ -83,8 +83,8 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) {
NNaclFp32Serializer code;
code.CodeStruct("transpose_parameter", *transpose_parameter_);

code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, in_shape_, out_shape_, "&transpose_parameter",
task_id, num_unit_thread, dim_size_, position_);
code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, in_shape_, out_shape_,
"(TransposeParameter *)&transpose_parameter", task_id, num_unit_thread, dim_size_, position_);

context->AppendCode(code.str());
return RET_OK;


+ 8
- 8
mindspore/lite/micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h View File

@@ -39,14 +39,14 @@ class TransposeFp32Coder final : public OperatorCoder {

private:
TransposeParameter *transpose_parameter_ = nullptr;
int thread_num_ = 1;
int thread_h_stride_ = 0;
int thread_h_num_ = 0;
int num_unit_ = 0;
int *in_shape_ = nullptr;
int *out_shape_ = nullptr;
int *dim_size_ = nullptr;
int *position_ = nullptr;
int thread_num_{1};
int thread_h_stride_{0};
int thread_h_num_{0};
int num_unit_{0};
int *in_shape_{nullptr};
int *out_shape_{nullptr};
int *dim_size_{nullptr};
int *position_{nullptr};
};

} // namespace mindspore::lite::micro::nnacl


+ 74
- 0
mindspore/lite/micro/coder/opcoders/nnacl/int8/activation_int8_coder.cc View File

@@ -0,0 +1,74 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/nnacl/int8/sigmoid_int8_coder.h"
#include "coder/opcoders/nnacl/int8/relux_int8_coder.h"
#include "src/ops/populate/populate_register.h"
#include "nnacl/fp32/activation_fp32.h"
#include "schema/model_generated.h"
#include "src/common/version_manager.h"

using mindspore::schema::PrimitiveType_Activation;

namespace mindspore::lite::micro::nnacl {

std::unique_ptr<OperatorCoder> CPUActivationINT8CoderCreator(const std::vector<Tensor *> &in_tensors,
const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index,
Target target) {
const void *primitive_c = node->primitive_;
if (primitive_c == nullptr) {
return nullptr;
}
int schema_version = VersionManager::GetInstance()->GetSchemaVersion();
ParameterGen parameter_gen =
PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version);
if (parameter_gen == nullptr) {
MS_LOG(ERROR) << "parameter generator is nullptr";
return nullptr;
}
OpParameter *parameter = parameter_gen(node->primitive_);
if (parameter == nullptr) {
MS_LOG(ERROR) << "PopulateParameter return nullptr, type: "
<< schema::EnumNamePrimitiveType((schema::PrimitiveType)GetPrimitiveType(node->primitive_));
return nullptr;
}
auto type = (reinterpret_cast<ActivationParameter *>(parameter))->type_;

std::unique_ptr<OperatorCoder> coder;
switch (static_cast<schema::ActivationType>(type)) {
case schema::ActivationType_SIGMOID:
coder = CPUOpCoderCreator<SigmodInt8Coder>(in_tensors, out_tensors, node, node_index, target);
break;
case schema::ActivationType_RELU:
coder = CPUOpCoderCreator<ReluInt8Coder>(in_tensors, out_tensors, node, node_index, target);
break;
case schema::ActivationType_RELU6:
coder = CPUOpCoderCreator<Relu6Int8Coder>(in_tensors, out_tensors, node, node_index, target);
break;
default:
break;
}

if (coder == nullptr) {
MS_LOG(ERROR) << "create conv2d int8 coder failed";
return nullptr;
}
return coder;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Activation, CPUActivationINT8CoderCreator)
} // namespace mindspore::lite::micro::nnacl

+ 37
- 27
mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.cc View File

@@ -14,17 +14,18 @@
* limitations under the License.
*/

#include "micro/coder/opcoders/nnacl/int8/add_int8_coder.h"
#include "coder/opcoders/nnacl/int8/add_int8_coder.h"
#include <algorithm>
#include <type_traits>
#include "nnacl/int8/quantize.h"
#include "micro/coder/log.h"
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
#include "micro/coder/opcoders/file_collector.h"
#include "coder/log.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/parallel.h"

using mindspore::schema::PrimitiveType_Add;
using mindspore::schema::PrimitiveType_AddFusion;

namespace mindspore::lite::micro {
namespace mindspore::lite::micro::nnacl {

int AddInt8Coder::Prepare(CoderContext *const context) {
input0 = input_tensors().at(0);
@@ -38,26 +39,8 @@ int AddInt8Coder::Prepare(CoderContext *const context) {
return RET_OK;
}

int AddInt8Coder::DoCode(CoderContext *const context) {
Collect(context, {"wrapper/int8/conv1x1_init_int8.h"}, {"add_int8_wrapper.c", "add_int8.c", "thread_pool.c"});

nnacl::NNaclInt8Serializer code;

code.CodeStruct("para", para_);
code.CodeStruct("arith_para", *arith_para_);
code.CodeBaseStruct("AddArgs", "args", "para", "arith_para", in_size_, out_size_, thread_num_s_, elements_num_,
support_opt_add_, input0, input1, output_tensor_);

if (arith_para_->broadcasting_) {
code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "AddBroadcastRun", "&args", thread_num_s_);
} else {
code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "AddRun", "&args", thread_num_s_);
}

return RET_OK;
}

int AddInt8Coder::Init() {
arith_para_ = reinterpret_cast<ArithmeticParameter *>(parameter_);
para_.in0_args_.zp_ = input0->quant_params().front().zeroPoint * -1;
para_.in1_args_.zp_ = input1->quant_params().front().zeroPoint * -1;
para_.out_zp_ = output_tensor_->quant_params().front().zeroPoint;
@@ -152,5 +135,32 @@ int AddInt8Coder::ReSize() {
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Add, CPUOpCoderCreator<AddInt8Coder>)
} // namespace mindspore::lite::micro
int AddInt8Coder::DoCode(CoderContext *const context) {
Collect(context, {"wrapper/int8/add_int8_wrapper.h"},
{"add_int8_wrapper.c", "add_int8.c", "arithmetic_base.c", "arithmetic_int8.c", "thread_pool.c"});

nnacl::NNaclInt8Serializer code;

code.CodeStruct("para", para_);
code.CodeStruct("arith_para", *arith_para_);
code.CodeBaseStruct("AddInt8Args", kRunArgs, "&para", "&arith_para", in_size_, out_size_, gThreadNum, elements_num_,
support_opt_add_, input0, input1, output_tensor_);
if (support_parallel_) {
if (arith_para_->broadcasting_) {
code.CodeFunction(kParallelLaunch, gThreadPool, "AddBroadcastInt8Run", kRunArgsAddr, gThreadNum);
} else {
code.CodeFunction(kParallelLaunch, gThreadPool, "AddInt8Run", kRunArgsAddr, gThreadNum);
}
} else {
if (arith_para_->broadcasting_) {
code.CodeFunction("AddBroadcastInt8Run", kRunArgsAddr, kDefaultTaskId);
} else {
code.CodeFunction("AddInt8Run", kRunArgsAddr, kDefaultTaskId);
}
}
context->AppendCode(code.str());
return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_AddFusion, CPUOpCoderCreator<AddInt8Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 5
- 7
mindspore/lite/micro/coder/opcoders/nnacl/int8/add_int8_coder.h View File

@@ -18,17 +18,15 @@
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_ADD_INT8_CODER_H_

#include <vector>
#include "micro/coder/opcoders/op_coder.h"
#include "coder/opcoders/op_coder.h"
#include "nnacl/int8/add_int8.h"

namespace mindspore::lite::micro {
class AddInt8Coder : public OperatorCoder {
namespace mindspore::lite::micro::nnacl {
class AddInt8Coder final : public OperatorCoder {
public:
AddInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {
arith_para_ = reinterpret_cast<ArithmeticParameter *>(parameter_);
}
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {}

~AddInt8Coder() override = default;

@@ -49,5 +47,5 @@ class AddInt8Coder : public OperatorCoder {
int elements_num_{0};
bool support_opt_add_{false};
};
} // namespace mindspore::lite::micro
} // namespace mindspore::lite::micro::nnacl
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_ADD_INT8_CODER_H_

+ 162
- 0
mindspore/lite/micro/coder/opcoders/nnacl/int8/batchnorm_int8_coder.cc View File

@@ -0,0 +1,162 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "coder/opcoders/nnacl/int8/batchnorm_int8_coder.h"
#include <string>
#include "coder/log.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
#include "coder/opcoders/file_collector.h"
#include "coder/opcoders/parallel.h"

using mindspore::schema::PrimitiveType_BatchNorm;

namespace mindspore::lite::micro::nnacl {

int BatchNormInt8Coder::Prepare(CoderContext *const context) {
std::vector<int> input_shapes = input_tensor_->shape();
size_t n_dim = input_shapes.size();
batchnorm_param_->channel_ = input_shapes[n_dim - 1];
batchnorm_param_->units_ = 1;
for (size_t i = 0; i < n_dim - 1; i++) {
batchnorm_param_->units_ *= input_shapes[i];
}
batchnorm_param_->op_parameter_.thread_num_ =
MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_);
if (target_ == kARM32M) {
batchnorm_param_->unit_ = batchnorm_param_->units_;
} else {
batchnorm_param_->unit_ = UP_DIV(batchnorm_param_->units_, kMaxThreadNumSupported);
}
if (batchnorm_param_->fused_) {
MS_CHECK_RET_CODE(InitFusedConstTensor(), "InitFusedConstTensor failed");
} else {
MS_CHECK_RET_CODE(InitConstTensor(), "InitConstTensor failed");
}

return RET_OK;
}
int BatchNormInt8Coder::DoCode(CoderContext *context) {
std::vector<std::string> headers = {"nnacl/slice_parameter.h"};
std::vector<std::string> cFiles = {"batchnorm_int8.c"};
NNaclInt8Serializer code;

code.CodeStruct("param", *batchnorm_param_);
code.CodeFunction("BatchNormInt8", output_tensor_, input_tensor_, alpha_addr_, beta_addr_, kDefaultTaskId, "&param");

Collect(context, headers, cFiles);
context->AppendCode(code.str());

return RET_OK;
}

int BatchNormInt8Coder::InitConstTensor() {
MS_CHECK_TRUE(input_tensors_.size() >= kInputSize2, "input tensors number not match");
Tensor *input = input_tensor_;
Tensor *mean = input_tensors_.at(1);
Tensor *variance = input_tensors_.at(2);
Tensor *output = output_tensor_;

auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());

MS_CHECK_PTR(mean_ptr);
MS_CHECK_PTR(var_ptr);

alpha_addr_ = reinterpret_cast<float *>(
allocator_->Malloc(kNumberTypeFloat, mean->ElementsNum() * sizeof(float), kOfflinePackWeight));
MS_CHECK_PTR(alpha_addr_);
beta_addr_ = reinterpret_cast<float *>(
allocator_->Malloc(kNumberTypeFloat, variance->ElementsNum() * sizeof(float), kOfflinePackWeight));
MS_CHECK_PTR(beta_addr_);
// compute alpha, beta;
auto eps = batchnorm_param_->epsilon_;
int32_t zp_in = input->quant_params().at(0).zeroPoint;
int32_t zp_mean = mean->quant_params().at(0).zeroPoint;
int32_t zp_var = variance->quant_params().at(0).zeroPoint;
int32_t zp_out = output->quant_params().at(0).zeroPoint;
auto s_in = static_cast<float>(input->quant_params().at(0).scale);
auto s_mean = static_cast<float>(mean->quant_params().at(0).scale);
auto s_var = static_cast<float>(variance->quant_params().at(0).scale);
auto s_out = static_cast<float>(output->quant_params().at(0).scale);

for (int i = 0; i < batchnorm_param_->channel_; ++i) {
float tmp = s_out * sqrt(eps + s_var * (var_ptr[i] - zp_var));
float tmp_a = s_in / tmp;
float tmp_b = zp_out - tmp_a * zp_in - (s_mean * (mean_ptr[i] - zp_mean)) / tmp;
alpha_addr_[i] = tmp_a;
beta_addr_[i] = tmp_b;
}

return RET_OK;
}

int BatchNormInt8Coder::InitFusedConstTensor() {
MS_CHECK_TRUE(input_tensors_.size() >= 5, "input tensors number not match");
Tensor *input = input_tensors_.at(0);
Tensor *scale = input_tensors_.at(1);
Tensor *offset = input_tensors_.at(2);
Tensor *mean = input_tensors_.at(3);
Tensor *variance = input_tensors_.at(4);
Tensor *output = output_tensor_;

auto scale_ptr = reinterpret_cast<int8_t *>(scale->MutableData());
auto offset_ptr = reinterpret_cast<int8_t *>(offset->MutableData());
auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());

MS_CHECK_PTR(scale_ptr);
MS_CHECK_PTR(offset_ptr);
MS_CHECK_PTR(mean_ptr);
MS_CHECK_PTR(var_ptr);

alpha_addr_ = reinterpret_cast<float *>(
allocator_->Malloc(kNumberTypeFloat, mean->ElementsNum() * sizeof(float), kOfflinePackWeight));
MS_CHECK_PTR(alpha_addr_);
beta_addr_ = reinterpret_cast<float *>(
allocator_->Malloc(kNumberTypeFloat, variance->ElementsNum() * sizeof(float), kOfflinePackWeight));
MS_CHECK_PTR(beta_addr_);
// compute alpha, beta;
float eps = batchnorm_param_->epsilon_;
int32_t zp_in = input->quant_params().at(0).zeroPoint;
int32_t zp_scale = scale->quant_params().at(0).zeroPoint;
int32_t zp_offset = offset->quant_params().at(0).zeroPoint;
int32_t zp_mean = mean->quant_params().at(0).zeroPoint;
int32_t zp_var = variance->quant_params().at(0).zeroPoint;
int32_t zp_out = output->quant_params().at(0).zeroPoint;
auto s_in = static_cast<float>(input->quant_params().at(0).scale);
auto s_scale = static_cast<float>(scale->quant_params().at(0).scale);
auto s_offset = static_cast<float>(offset->quant_params().at(0).scale);
auto s_mean = static_cast<float>(mean->quant_params().at(0).scale);
auto s_var = static_cast<float>(variance->quant_params().at(0).scale);
auto s_out = static_cast<float>(output->quant_params().at(0).scale);

float mul_12 = s_in * s_scale;
float mul_24 = s_scale * s_mean;
float div_36 = s_offset / s_out;
for (int i = 0; i < batchnorm_param_->channel_; ++i) {
float tmp = s_out * sqrt(eps + s_var * (var_ptr[i] - zp_var));
float tmp_a = (mul_12 * (scale_ptr[i] - zp_scale)) / tmp;
float tmp_b = zp_out + div_36 * (offset_ptr[i] - zp_offset) - tmp_a * zp_in -
(mul_24 * (scale_ptr[i] - zp_scale) * (mean_ptr[i] - zp_mean)) / tmp;
alpha_addr_[i] = tmp_a;
beta_addr_[i] = tmp_b;
}

return RET_OK;
}

REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_BatchNorm, CPUOpCoderCreator<BatchNormInt8Coder>)
} // namespace mindspore::lite::micro::nnacl

+ 49
- 0
mindspore/lite/micro/coder/opcoders/nnacl/int8/batchnorm_int8_coder.h View File

@@ -0,0 +1,49 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_BATCHNORM_INT8_CODER_H_
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_BATCHNORM_INT8_CODER_H_

#include <cstring>
#include <vector>
#include "coder/opcoders/op_coder.h"
#include "nnacl/batchnorm_parameter.h"

namespace mindspore::lite::micro::nnacl {
class BatchNormInt8Coder final : public OperatorCoder {
public:
BatchNormInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
const Model::Node *node, size_t node_index, Target target)
: OperatorCoder(in_tensors, out_tensors, node, node_index, target) {
batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter_);
}

~BatchNormInt8Coder() override = default;

int Prepare(CoderContext *const context) override;

int DoCode(CoderContext *context) override;

private:
int InitConstTensor();
int InitFusedConstTensor();

float *alpha_addr_{nullptr};
float *beta_addr_{nullptr};
BatchNormParameter *batchnorm_param_;
};
} // namespace mindspore::lite::micro::nnacl
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_BATCHNORM_INT8_CODER_H_

+ 16
- 12
mindspore/lite/micro/coder/opcoders/nnacl/int8/concat_int8_coder.cc View File

@@ -21,6 +21,7 @@
#include "nnacl/int8/quantize.h"
#include "coder/opcoders/file_collector.h"
#include "coder/log.h"
#include "coder/opcoders/parallel.h"
#include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"

int MallocQuantArgForConcat(ConcatQuantArg *quant_arg, size_t input_num) {
@@ -37,7 +38,6 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) {

concat_param_->input_shapes_ = nullptr;
size_t input_num = input_tensors().size();
MS_CHECK_PTR(input_data_);
MS_CHECK_RET_CODE(MallocQuantArgForConcat(&concat_param_->quant_arg_, input_num),
"Null pointer reference: quant_concat_parm_->in_quant_args_.");
for (int i = 0; i < static_cast<int>(input_num); i++) {
@@ -60,7 +60,10 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) {
concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num));
MS_CHECK_PTR(concat_param_->input_shapes_);
for (int i = 0; i < static_cast<int>(input_num); i++) {
concat_param_->input_shapes_[i] = reinterpret_cast<int *>(input_tensors().at(i)->shape().data());
auto in_shape = input_tensors_.at(i)->shape();
concat_param_->input_shapes_[i] = reinterpret_cast<int *>(malloc(in_shape.size() * sizeof(int)));
MS_CHECK_PTR(concat_param_->input_shapes_[i]);
memcpy(reinterpret_cast<void *>(concat_param_->input_shapes_[i]), in_shape.data(), sizeof(int) * in_shape.size());
}

before_axis_size = 1;
@@ -70,7 +73,10 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) {

int64_t after_axis_size = 1;
int output_dim = static_cast<int>(output_tensor_->shape().size());
concat_param_->output_shapes_ = output_tensor_->shape().data();
concat_param_->output_shapes_ = reinterpret_cast<int *>(malloc(output_dim * sizeof(int)));
MS_CHECK_PTR(concat_param_->output_shapes_);
memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor_->shape().data(),
sizeof(int) * output_dim);
for (int i = axis_ + 1; i < output_dim; i++) {
after_axis_size *= concat_param_->output_shapes_[i];
}
@@ -84,7 +90,8 @@ int ConcatInt8Coder::DoCode(CoderContext *const context) {
count_unit_ = thread_num_ > 1 ? UP_DIV(before_axis_size, thread_num_) : before_axis_size;
concat_param_->count_unit_ = count_unit_;

Collect(context, {"nnacl/int8/concat_int8.h"}, {"concat_int8.c"});
Collect(context, {"nnacl/int8/concat_int8.h", "wrapper/int8/concat_int8_wrapper.h"},
{"concat_int8.c", "concat_int8_wrapper.c"});
NNaclInt8Serializer code;

int in_tensor_count = input_tensors().size();
@@ -96,15 +103,12 @@ int ConcatInt8Coder::DoCode(CoderContext *const context) {
}
code.CodeStruct("concat_param", *concat_param_, in_tensor_count, input_tensor_->shape().size(),
output_tensor_->shape().size());

if (thread_num_ > 1) {
code.CodeBaseStruct("ConcatInt8Args", "args", "input_data", output_tensor_, "&concat_param", axis_,
before_axis_size, count_unit_);
code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "ConcatInt8Run", "&args", "thread_num");
code.CodeBaseStruct("ConcatInt8Args", kRunArgs, "input_data", output_tensor_, "&concat_param", axis_,
before_axis_size, count_unit_);
if (support_parallel_) {
code.CodeFunction(kParallelLaunch, gThreadPool, "ConcatInt8Run", kRunArgsAddr, gThreadNum);
} else {
int task_id = 0;
int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_);
code.CodeFunction("Int8Concat", "input_data", output_tensor_, "&concat_param", axis_, real_dst_count, task_id);
code.CodeFunction("ConcatInt8Run", kRunArgsAddr, kDefaultTaskId);
}
context->AppendCode(code.str());
return RET_OK;


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save