| @@ -72,6 +72,7 @@ add_compile_definitions(NO_DLIB) | |||
| add_compile_options(-fPIC) | |||
| if(SUPPORT_TRAIN) | |||
| set(BUILD_MINDDATA "full") | |||
| if(PLATFORM_ARM64) | |||
| set(RUNTIME_COMPONENT_NAME train-android-aarch64) | |||
| elseif(PLATFORM_ARM32) | |||
| @@ -19,14 +19,14 @@ | |||
| #include "include/lite_utils.h" | |||
| namespace mindspore::lite { | |||
| class PrimitiveC; | |||
| struct MS_API Model { | |||
| struct Node { | |||
| String name_; | |||
| NodeType node_type_; | |||
| PrimitiveC *primitive_; | |||
| const void *primitive_; | |||
| Uint32Vector input_indices_; | |||
| Uint32Vector output_indices_; | |||
| int quant_type_; | |||
| }; | |||
| using NodePtrVector = std::vector<Node *>; | |||
| struct SubGraph { | |||
| @@ -55,7 +55,7 @@ struct MS_API Model { | |||
| /// \brief Free meta graph temporary buffer | |||
| virtual void Free() = 0; | |||
| /// \brief Free all temporay buffer.EG: nodes in the model. | |||
| /// \brief Free all temporary buffer.EG: nodes in the model. | |||
| virtual void Destroy() = 0; | |||
| /// \brief Model destruct, free all memory | |||
| @@ -22,7 +22,7 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| const int ms_version_major = 1; | |||
| const int ms_version_minor = 1; | |||
| const int ms_version_minor = 2; | |||
| const int ms_version_revision = 0; | |||
| /// \brief Global method to get a version string. | |||
| @@ -9,16 +9,10 @@ include_directories(${CMAKE_BINARY_DIR}) | |||
| include(${TOP_DIR}/cmake/utils.cmake) | |||
| include(${TOP_DIR}/cmake/dependency_utils.cmake) | |||
| include(${TOP_DIR}/cmake/dependency_securec.cmake) | |||
| include(${TOP_DIR}/cmake/external_libs/glog.cmake) | |||
| include(${TOP_DIR}/cmake/external_libs/flatbuffers.cmake) | |||
| include(${TOP_DIR}/cmake/external_libs/cmsis.cmake) | |||
| set(FBS_FILES | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../schema/model.fbs | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../schema/ops.fbs | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../schema/model_v0.fbs | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../schema/ops_v0.fbs | |||
| ) | |||
| file(GLOB FBS_FILES ${CMAKE_CURRENT_SOURCE_DIR}/../schema/*.fbs) | |||
| ms_build_flatbuffers_lite(FBS_FILES | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../schema/ | |||
| fbs_src | |||
| @@ -50,6 +44,6 @@ if(ENABLE_ASAN) | |||
| endif() | |||
| add_subdirectory(coder) | |||
| if(${BUILD_TESTCASES}) | |||
| if(BUILD_TESTCASES) | |||
| add_subdirectory(test) | |||
| endif() | |||
| @@ -5,6 +5,9 @@ set(CODER_SRC | |||
| ${MICRO_DIR}/coder/graph.cc | |||
| ${MICRO_DIR}/coder/session.cc | |||
| ${MICRO_DIR}/coder/train.cc | |||
| ${MICRO_DIR}/coder/utils/coder_utils.cc | |||
| ${MICRO_DIR}/coder/utils/dir_utils.cc | |||
| ${MICRO_DIR}/coder/utils/type_cast.cc | |||
| ) | |||
| set(CODER_ALLOCATOR_SRC | |||
| @@ -21,6 +24,11 @@ set(CODER_GENERATOR_SRC | |||
| ${MICRO_DIR}/coder/generator/component/weight_component.cc | |||
| ${MICRO_DIR}/coder/generator/component/cmake_component.cc | |||
| ${MICRO_DIR}/coder/generator/component/train_component.cc | |||
| ${MICRO_DIR}/coder/generator/component/parallel_component.cc | |||
| ) | |||
| set(MINDSPORE_CORE | |||
| ${TOP_DIR}/mindspore/core/gvar/logging_level.cc | |||
| ) | |||
| set(CODER_OPCODERS_SRC | |||
| @@ -28,16 +36,20 @@ set(CODER_OPCODERS_SRC | |||
| ${MICRO_DIR}/coder/opcoders/op_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/op_coder_builder.cc | |||
| ${MICRO_DIR}/coder/opcoders/op_coder_register.cc | |||
| ${MICRO_DIR}/coder/opcoders/parallel.cc | |||
| #### serializer | |||
| ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc | |||
| ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.cc | |||
| ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_stream_utils.cc | |||
| #### base coder | |||
| ${MICRO_DIR}/coder/opcoders/base/conv2d_base_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/base/dtype_cast_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/base/full_connection_base_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/base/quant_dtype_cast_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/base/reduce_base_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/base/resize_base_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/base/softmax_base_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/base/detection_post_process_base_coder.cc | |||
| #### cmsis int8 coder | |||
| ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc | |||
| @@ -55,6 +67,7 @@ set(CODER_OPCODERS_SRC | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc | |||
| @@ -64,21 +77,20 @@ set(CODER_OPCODERS_SRC | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/matmul_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nchw2nhwc_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nhwc2nchw_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reshape_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/squeeze_dims_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc | |||
| #### nnacl int8 coder | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/activation_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/add_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/batchnorm_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/concat_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/fullconnection_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/matmul_int8_coder.cc | |||
| @@ -87,40 +99,69 @@ set(CODER_OPCODERS_SRC | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/deconvolution_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/resize_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/reduce_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/softmax_int8_coder.cc | |||
| ) | |||
| set(CODER_UTILS_SRC | |||
| ${MICRO_DIR}/coder/utils/coder_utils.cc | |||
| ${MICRO_DIR}/coder/utils/dir_utils.cc | |||
| ${MICRO_DIR}/coder/utils/type_cast.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/sub_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/detection_post_process_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/sigmoid_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/relux_int8_coder.cc | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/int8/div_int8_coder.cc | |||
| #### nnacl dequant coder | |||
| ${MICRO_DIR}/coder/opcoders/nnacl/dequant/de_quant.cc | |||
| ) | |||
| set(LITE_SRC | |||
| ${LITE_DIR}/src/common/file_utils.cc | |||
| ${LITE_DIR}/src/common/graph_util.cc | |||
| ${LITE_DIR}/src/common/string_util.cc | |||
| ${LITE_DIR}/src/common/prim_util.cc | |||
| ${LITE_DIR}/src/common/tensor_util.cc | |||
| ${LITE_DIR}/src/runtime/allocator.cc | |||
| ${LITE_DIR}/src/runtime/infer_manager.cc | |||
| ${LITE_DIR}/src/runtime/runtime_api.cc | |||
| ${LITE_DIR}/src/lite_model.cc | |||
| ${LITE_DIR}/src/tensorlist.cc | |||
| ${LITE_DIR}/src/tensor.cc | |||
| ${LITE_DIR}/src/scheduler.cc | |||
| ${LITE_DIR}/src/inner_context.cc | |||
| ${LITE_DIR}/src/dequant.cc | |||
| ${LITE_DIR}/src/kernel_registry.cc | |||
| ${LITE_DIR}/src/lite_kernel.cc | |||
| ${LITE_DIR}/src/sub_graph_kernel.cc | |||
| ${LITE_DIR}/src/huffman_decode.cc | |||
| ${LITE_DIR}/src/executor.cc | |||
| ${LITE_DIR}/src/common/log_adapter.cc | |||
| ### src/ops for parameter and infer shape | |||
| ${LITE_DIR}/src/ops/batch_norm.cc | |||
| ${LITE_DIR}/src/ops/conv2d.cc | |||
| ${LITE_DIR}/src/ops/primitive_c.cc | |||
| ${LITE_DIR}/src/ops/slice.cc | |||
| ${LITE_DIR}/src/ops/while.cc | |||
| ${LITE_DIR}/src/common/utils.cc | |||
| ### populate operator parameter | |||
| ${LITE_DIR}/src/ops/populate/conv2d_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/arithmetic_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/add_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/concat_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/conv2d_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/detection_post_process_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/depthwise_conv2d_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/full_connection_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/pooling_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/quant_dtype_cast_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/resize_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/reshape_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/batch_norm_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/slice_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/while_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/matmul_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/bias_add_populate.cc | |||
| ${LITE_DIR}/src/ops/populate/activation_populate.cc | |||
| ### tools | |||
| ${LITE_DIR}/tools/common/flag_parser.cc | |||
| ) | |||
| set(LITE_KERNEL_SRC | |||
| ### nnacl | |||
| ${LITE_DIR}/nnacl/common_func.c | |||
| ${LITE_DIR}/nnacl/base/minimal_filtering_generator.c | |||
| ${LITE_DIR}/nnacl/base/arithmetic_base.c | |||
| ${LITE_DIR}/nnacl/base/slice_base.c | |||
| ${LITE_DIR}/nnacl/fp32/winograd_utils.c | |||
| ${LITE_DIR}/nnacl/fp32/pack_fp32.c | |||
| ${LITE_DIR}/nnacl/int8/quantize.c | |||
| @@ -128,13 +169,138 @@ set(LITE_KERNEL_SRC | |||
| ${LITE_DIR}/nnacl/int8/matmul_int8.c | |||
| ${LITE_DIR}/nnacl/int8/fixed_point.c | |||
| ${LITE_DIR}/nnacl/fp32/matmul_fp32.c | |||
| ${LITE_DIR}/nnacl/int8/arithmetic_int8.c | |||
| ${LITE_DIR}/nnacl/int8/add_int8.c | |||
| ${LITE_DIR}/nnacl/int8/concat_int8.c | |||
| ${LITE_DIR}/nnacl/int8/conv_int8.c | |||
| ${LITE_DIR}/nnacl/int8/conv3x3_int8.c | |||
| ${LITE_DIR}/nnacl/int8/conv1x1_int8.c | |||
| ${LITE_DIR}/nnacl/base/conv1x1_base.c | |||
| ${LITE_DIR}/nnacl/int8/conv_depthwise_int8.c | |||
| ${LITE_DIR}/nnacl/int8/deconv_int8.c | |||
| ${LITE_DIR}/nnacl/int8/common_func_int8.c | |||
| ${LITE_DIR}/nnacl/int8/slice_int8.c | |||
| ${LITE_DIR}/nnacl/int8/batchnorm_int8.c | |||
| ${LITE_DIR}/nnacl/int8/sub_int8.c | |||
| ${LITE_DIR}/nnacl/int8/quant_dtype_cast_int8.c | |||
| ${LITE_DIR}/nnacl/int8/sigmoid_int8.c | |||
| ${LITE_DIR}/nnacl/int8/resize_int8.c | |||
| ### infer | |||
| ${LITE_DIR}/nnacl/infer/adam_infer.c | |||
| ${LITE_DIR}/nnacl/infer/add_sub_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/addn_infer.c | |||
| ${LITE_DIR}/nnacl/infer/apply_momentum_infer.c | |||
| ${LITE_DIR}/nnacl/infer/argmin_max_infer.c | |||
| ${LITE_DIR}/nnacl/infer/arithmetic_compare_infer.c | |||
| ${LITE_DIR}/nnacl/infer/arithmetic_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/arithmetic_infer.c | |||
| ${LITE_DIR}/nnacl/infer/assert_op_infer.c | |||
| ${LITE_DIR}/nnacl/infer/assign_add_infer.c | |||
| ${LITE_DIR}/nnacl/infer/assign_infer.c | |||
| ${LITE_DIR}/nnacl/infer/audio_spectrogram_infer.c | |||
| ${LITE_DIR}/nnacl/infer/batch_to_space_infer.c | |||
| ${LITE_DIR}/nnacl/infer/bias_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/binary_cross_entropy_infer.c | |||
| ${LITE_DIR}/nnacl/infer/bn_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/broadcast_to_infer.c | |||
| ${LITE_DIR}/nnacl/infer/cast_infer.c | |||
| ${LITE_DIR}/nnacl/infer/common_infer.c | |||
| ${LITE_DIR}/nnacl/infer/concat_infer.c | |||
| ${LITE_DIR}/nnacl/infer/constant_of_shape_infer.c | |||
| ${LITE_DIR}/nnacl/infer/conv2d_grad_filter_infer.c | |||
| ${LITE_DIR}/nnacl/infer/conv2d_grad_input_infer.c | |||
| ${LITE_DIR}/nnacl/infer/conv2d_infer.c | |||
| ${LITE_DIR}/nnacl/infer/crop_and_resize_infer.c | |||
| ${LITE_DIR}/nnacl/infer/crop_infer.c | |||
| ${LITE_DIR}/nnacl/infer/custom_extract_features_infer.c | |||
| ${LITE_DIR}/nnacl/infer/custom_normalize_infer.c | |||
| ${LITE_DIR}/nnacl/infer/custom_predict_infer.c | |||
| ${LITE_DIR}/nnacl/infer/deconv2d_infer.c | |||
| ${LITE_DIR}/nnacl/infer/dedepthwise_conv2d_infer.c | |||
| ${LITE_DIR}/nnacl/infer/depth_to_space_infer.c | |||
| ${LITE_DIR}/nnacl/infer/depthwise_conv2d_infer.c | |||
| ${LITE_DIR}/nnacl/infer/detection_post_process_infer.c | |||
| ${LITE_DIR}/nnacl/infer/dropout_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/dropout_infer.c | |||
| ${LITE_DIR}/nnacl/infer/embedding_lookup_infer.c | |||
| ${LITE_DIR}/nnacl/infer/expand_dims_infer.c | |||
| ${LITE_DIR}/nnacl/infer/fft_imag_infer.c | |||
| ${LITE_DIR}/nnacl/infer/fft_real_infer.c | |||
| ${LITE_DIR}/nnacl/infer/fill_infer.c | |||
| ${LITE_DIR}/nnacl/infer/flatten_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/flatten_infer.c | |||
| ${LITE_DIR}/nnacl/infer/full_connection_infer.c | |||
| ${LITE_DIR}/nnacl/infer/fused_batchnorm_infer.c | |||
| ${LITE_DIR}/nnacl/infer/gather_infer.c | |||
| ${LITE_DIR}/nnacl/infer/gather_nd_infer.c | |||
| ${LITE_DIR}/nnacl/infer/group_conv2d_grad_input_infer.c | |||
| ${LITE_DIR}/nnacl/infer/gru_infer.c | |||
| ${LITE_DIR}/nnacl/infer/hashtable_lookup_infer.c | |||
| ${LITE_DIR}/nnacl/infer/invert_permutation_infer.c | |||
| ${LITE_DIR}/nnacl/infer/layer_norm_infer.c | |||
| ${LITE_DIR}/nnacl/infer/lin_space_infer.c | |||
| ${LITE_DIR}/nnacl/infer/lsh_projection_infer.c | |||
| ${LITE_DIR}/nnacl/infer/lstm_infer.c | |||
| ${LITE_DIR}/nnacl/infer/matmul_infer.c | |||
| ${LITE_DIR}/nnacl/infer/maximum_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/mean_infer.c | |||
| ${LITE_DIR}/nnacl/infer/merge_infer.c | |||
| ${LITE_DIR}/nnacl/infer/mfcc_infer.c | |||
| ${LITE_DIR}/nnacl/infer/non_max_suppression_infer.c | |||
| ${LITE_DIR}/nnacl/infer/one_hot_infer.c | |||
| ${LITE_DIR}/nnacl/infer/pad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/partial_infer.c | |||
| ${LITE_DIR}/nnacl/infer/pooling_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/pooling_infer.c | |||
| ${LITE_DIR}/nnacl/infer/power_infer.c | |||
| ${LITE_DIR}/nnacl/infer/prior_box_infer.c | |||
| ${LITE_DIR}/nnacl/infer/quant_dtype_cast_infer.c | |||
| ${LITE_DIR}/nnacl/infer/random_standard_normal_infer.c | |||
| ${LITE_DIR}/nnacl/infer/range_infer.c | |||
| ${LITE_DIR}/nnacl/infer/rank_infer.c | |||
| ${LITE_DIR}/nnacl/infer/reduce_infer.c | |||
| ${LITE_DIR}/nnacl/infer/reshape_infer.c | |||
| ${LITE_DIR}/nnacl/infer/resize_infer.c | |||
| ${LITE_DIR}/nnacl/infer/rfft_infer.c | |||
| ${LITE_DIR}/nnacl/infer/roi_pooling_infer.c | |||
| ${LITE_DIR}/nnacl/infer/scatter_nd_infer.c | |||
| ${LITE_DIR}/nnacl/infer/select_infer.c | |||
| ${LITE_DIR}/nnacl/infer/sgd_infer.c | |||
| ${LITE_DIR}/nnacl/infer/shape_infer.c | |||
| ${LITE_DIR}/nnacl/infer/size_infer.c | |||
| ${LITE_DIR}/nnacl/infer/skip_gram_infer.c | |||
| ${LITE_DIR}/nnacl/infer/slice_infer.c | |||
| ${LITE_DIR}/nnacl/infer/softmax_cross_entropy_infer.c | |||
| ${LITE_DIR}/nnacl/infer/softmax_infer.c | |||
| ${LITE_DIR}/nnacl/infer/space_to_batch_infer.c | |||
| ${LITE_DIR}/nnacl/infer/space_to_batch_nd_infer.c | |||
| ${LITE_DIR}/nnacl/infer/space_to_depth_infer.c | |||
| ${LITE_DIR}/nnacl/infer/sparse_softmax_cross_entropy_infer.c | |||
| ${LITE_DIR}/nnacl/infer/sparse_to_dense_infer.c | |||
| ${LITE_DIR}/nnacl/infer/split_infer.c | |||
| ${LITE_DIR}/nnacl/infer/squeeze_infer.c | |||
| ${LITE_DIR}/nnacl/infer/stack_infer.c | |||
| ${LITE_DIR}/nnacl/infer/strided_slice_grad_infer.c | |||
| ${LITE_DIR}/nnacl/infer/strided_slice_infer.c | |||
| ${LITE_DIR}/nnacl/infer/switch_infer.c | |||
| ${LITE_DIR}/nnacl/infer/tensorlist_fromtensor_infer.c | |||
| ${LITE_DIR}/nnacl/infer/tensorlist_getitem_infer.c | |||
| ${LITE_DIR}/nnacl/infer/tensorlist_reserve_infer.c | |||
| ${LITE_DIR}/nnacl/infer/tensorlist_setitem_infer.c | |||
| ${LITE_DIR}/nnacl/infer/tensorlist_stack_infer.c | |||
| ${LITE_DIR}/nnacl/infer/tile_infer.c | |||
| ${LITE_DIR}/nnacl/infer/topk_infer.c | |||
| ${LITE_DIR}/nnacl/infer/transpose_infer.c | |||
| ${LITE_DIR}/nnacl/infer/uniform_real_infer.c | |||
| ${LITE_DIR}/nnacl/infer/unique_infer.c | |||
| ${LITE_DIR}/nnacl/infer/unsorted_segment_sum_infer.c | |||
| ${LITE_DIR}/nnacl/infer/unsqueeze_infer.c | |||
| ${LITE_DIR}/nnacl/infer/unstack_infer.c | |||
| ${LITE_DIR}/nnacl/infer/where_infer.c | |||
| ${LITE_DIR}/nnacl/infer/while_infer.c | |||
| ${LITE_DIR}/nnacl/infer/splice_infer.c | |||
| ) | |||
| list(APPEND FILE_SET ${CODER_SRC} ${CODER_UTILS_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC} | |||
| ${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC}) | |||
| list(APPEND FILE_SET ${CODER_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC} | |||
| ${CODER_ALLOCATOR_SRC} ${LITE_SRC} ${LITE_KERNEL_SRC} ${MINDSPORE_CORE}) | |||
| @@ -0,0 +1,21 @@ | |||
| set(CMSIS_DIR ${LITE_DIR}/micro/build/cmsis) | |||
| if(MICRO_CMSIS_X86) | |||
| message("build cmsis kernels") | |||
| include_directories(${CMSIS_DIR}/CMSIS/Core/Include) | |||
| include_directories(${CMSIS_DIR}/CMSIS/DSP/Include) | |||
| include_directories(${CMSIS_DIR}/CMSIS/NN/Include) | |||
| file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c) | |||
| file(GLOB CMSIS_OPS | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c | |||
| ${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c | |||
| ) | |||
| endif() | |||
| @@ -1,32 +0,0 @@ | |||
| include_directories(${NNACL_DIR}/..) | |||
| set(CMSIS_SRC ${NNACL_DIR}/../micro/build/cmsis) | |||
| if(MICRO_CMSIS_X86) | |||
| message("*****build cmsis x86 codes****") | |||
| include_directories(${CMSIS_SRC}/CMSIS/Core/Include) | |||
| include_directories(${CMSIS_SRC}/CMSIS/DSP/Include) | |||
| include_directories(${CMSIS_SRC}/CMSIS/NN/Include) | |||
| file(GLOB RUNTIME_KERNEL_CMSIS_SRC | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/BasicMathFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/ActivationFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/ConcatenationFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/ConvolutionFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/FullyConnectedFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/NNSupportFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/PoolingFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/ReshapeFunctions/*.c | |||
| ${CMSIS_SRC}/CMSIS/NN/Source/SoftmaxFunctions/*.c | |||
| ) | |||
| endif() | |||
| ########################### files ########################### | |||
| file(GLOB RUNTIME_KERNEL_SRC | |||
| ${NNACL_DIR}/kernel/fp32/*.c | |||
| ${NNACL_DIR}/kernel/int8/*.c | |||
| ) | |||
| if(MICRO_CMSIS_X86) | |||
| set(RUNTIME_OPS ${RUNTIME_KERNEL_SRC} ${RUNTIME_TRAIN_SRC} ${RUNTIME_KERNEL_CMSIS_SRC}) | |||
| else() | |||
| set(RUNTIME_OPS ${RUNTIME_KERNEL_SRC} ${RUNTIME_TRAIN_SRC}) | |||
| endif() | |||
| @@ -0,0 +1,20 @@ | |||
| include_directories(${LITE_DIR}) | |||
| set(NNACL_DIR ${LITE_DIR}/nnacl) | |||
| file(GLOB KERNEL_SRC | |||
| ${NNACL_DIR}/*.c | |||
| ${NNACL_DIR}/base/*.c | |||
| ${NNACL_DIR}/fp32/*.c | |||
| ${NNACL_DIR}/int8/*.c | |||
| ) | |||
| if(MICRO_BUILD_ARM64) | |||
| file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S) | |||
| set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) | |||
| endif() | |||
| if(MICRO_BUILD_ARM32A) | |||
| file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm32/*.S) | |||
| set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) | |||
| endif() | |||
| set(NNACL_OPS ${KERNEL_SRC} ${ASSEMBLY_SRC}) | |||
| @@ -0,0 +1,25 @@ | |||
| include_directories(${LITE_DIR}/micro/coder/operator_library) | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") | |||
| set(WRAPPER_DIR ${LITE_DIR}/micro/coder/operator_library/wrapper/) | |||
| set(RUNTIME_SRC | |||
| ${LITE_DIR}/src/runtime/thread_pool.c | |||
| ) | |||
| set(WRAPPER_SRC | |||
| ${WRAPPER_DIR}/base/detection_post_process_base_wrapper.c | |||
| ${WRAPPER_DIR}/fp32/matmul_fp32_wrapper.c | |||
| ${WRAPPER_DIR}/int8/matmul_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/add_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/concat_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/convolution_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/conv_init_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/conv1x1_init_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/conv1x1_run_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/convolution_depthwise_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/resize_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/slice_int8_wrapper.c | |||
| ${WRAPPER_DIR}/int8/batchnorm_int8_wrapper.c | |||
| ) | |||
| list(APPEND FILE_SET ${WRAPPER_SRC} ${RUNTIME_SRC}) | |||
| @@ -1,12 +0,0 @@ | |||
| SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") | |||
| set(MICRO_WRAPPER_SRC | |||
| ${LITE_DIR}/src/runtime/thread_pool.c | |||
| ${MICRO_DIR}/wrapper/fp32/matmul_fp32_wrapper.c | |||
| ${MICRO_DIR}/wrapper/int8/matmul_int8_wrapper.c | |||
| ${MICRO_DIR}/wrapper/int8/conv_init_int8_wrapper.c | |||
| ${MICRO_DIR}/wrapper/int8/conv1x1_init_int8_wrapper.c | |||
| ${MICRO_DIR}/wrapper/int8/conv1x1_run_int8_wrapper.c | |||
| ) | |||
| list(APPEND FILE_SET ${MICRO_WRAPPER_SRC}) | |||
| @@ -1,7 +1,10 @@ | |||
| add_definitions(-DUSE_GLOG) | |||
| set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections ") | |||
| set(MICRO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..) | |||
| set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..) | |||
| set(3RD_DIR ${TOP_DIR}/third_party) | |||
| set(LITE_DIR ${TOP_DIR}/mindspore/lite) | |||
| set(MICRO_DIR ${LITE_DIR}/micro) | |||
| if(ENABLE_CONVERTER) | |||
| set(CODEGEN_PATH ${CMAKE_BINARY_DIR}/micro/coder/codegen) | |||
| else() | |||
| @@ -13,17 +16,19 @@ include_directories(${3RD_DIR}) | |||
| include_directories(${3RD_DIR}/flatbuffers/include) | |||
| #include ms | |||
| include_directories(${TOP_DIR}/) | |||
| include_directories(${LITE_DIR}) | |||
| include_directories(${TOP_DIR}/mindspore/core/) | |||
| include_directories(${LITE_DIR}) | |||
| include_directories(${MICRO_DIR}) | |||
| #include coder | |||
| include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../) | |||
| include(${TOP_DIR}/cmake/external_libs/cmsis.cmake) | |||
| include(${MICRO_DIR}/cmake/file_list.cmake) | |||
| include(${MICRO_DIR}/cmake/wrapper.cmake) | |||
| include(${MICRO_DIR}/cmake/package_wrapper.cmake) | |||
| add_subdirectory(operator_library) | |||
| add_executable(codegen main.cc ${FILE_SET}) | |||
| add_dependencies(codegen fbs_src) | |||
| add_dependencies(codegen fbs_inner_src) | |||
| target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY}) | |||
| if(NOT WIN32) | |||
| add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH}) | |||
| target_link_libraries(codegen PRIVATE ${SECUREC_LIBRARY} mindspore::glog) | |||
| if(NOT WIN32 AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release") | |||
| add_custom_command(TARGET codegen POST_BUILD COMMAND strip ${CODEGEN_PATH}) | |||
| endif() | |||
| @@ -22,11 +22,9 @@ | |||
| namespace mindspore::lite::micro { | |||
| void *MemoryAllocator::MallocWeightTensor(TypeId type_id, size_t size, MallocType type) { | |||
| static const std::map<TypeId, size_t> size_map = {{kNumberTypeFloat32, sizeof(float)}, | |||
| {kNumberTypeInt32, sizeof(int)}, | |||
| {kNumberTypeInt32, sizeof(int32_t)}, | |||
| {kNumberTypeInt16, sizeof(int16_t)}, | |||
| {kNumberTypeInt8, sizeof(int8_t)}}; | |||
| static const std::map<TypeId, size_t> size_map = { | |||
| {kNumberTypeFloat, sizeof(float)}, {kNumberTypeFloat32, sizeof(float)}, {kNumberTypeInt32, sizeof(int32_t)}, | |||
| {kNumberTypeInt16, sizeof(int16_t)}, {kNumberTypeInt8, sizeof(int8_t)}, {kNumberTypeUInt8, sizeof(uint8_t)}}; | |||
| auto item = size_map.find(type_id); | |||
| MS_CHECK_TRUE_RET_NULL(item != size_map.end(), "unsupported type idnex"); | |||
| size_t type_size = item->second; | |||
| @@ -73,7 +73,7 @@ class MemoryAllocator { | |||
| if (type != kWorkspace) { | |||
| return MallocWeightTensor(type_id, size, type); | |||
| } | |||
| if (size == 0 && size >= UINT_MAX) { | |||
| if (size == 0 || size >= UINT_MAX) { | |||
| return nullptr; | |||
| } | |||
| @@ -94,12 +94,12 @@ class MemoryAllocator { | |||
| template <typename T> | |||
| std::string GetRuntimeAddr(T t, bool is_const = false) { | |||
| if (!t) { | |||
| return "NULL"; | |||
| return ""; | |||
| } | |||
| std::string type_info = is_const ? "const " : ""; | |||
| std::string type_name; | |||
| if (std::type_index(typeid(T)) == std::type_index(typeid(Tensor *))) { | |||
| type_name = GetTensorDataType(reinterpret_cast<Tensor *>(t)->data_type()) + " *"; | |||
| type_name = GetTensorDataType(reinterpret_cast<Tensor *>(t)->data_type()) + "*"; | |||
| } else { | |||
| type_name = GetVariableTypeName<T>(); | |||
| } | |||
| @@ -34,19 +34,20 @@ namespace mindspore::lite::micro { | |||
| class CoderFlags : public virtual FlagParser { | |||
| public: | |||
| CoderFlags() { | |||
| AddFlag(&CoderFlags::is_weight_file_, "isWeightFile", "whether generating weight .net file, true| false", false); | |||
| AddFlag(&CoderFlags::is_weight_file_, "isWeightFile", "whether generating weight binary file, true| false", false); | |||
| AddFlag(&CoderFlags::model_path_, "modelPath", "Input model path", ""); | |||
| AddFlag(&CoderFlags::code_path_, "codePath", "Input code path", "."); | |||
| AddFlag(&CoderFlags::code_module_name_, "moduleName", "Input code module name", ""); | |||
| AddFlag(&CoderFlags::target_, "target", "generateed code target, x86| ARM32M| ARM32A| ARM64", "x86"); | |||
| AddFlag(&CoderFlags::code_mode_, "codeMode", "generated code mode, Normal | Inference | Train", "Normal"); | |||
| AddFlag(&CoderFlags::debug_mode_, "debugMode", "dump perlayer's time cost and tensor, true | false", false); | |||
| AddFlag(&CoderFlags::target_, "target", "generated code target, x86| ARM32M| ARM32A| ARM64", "x86"); | |||
| AddFlag(&CoderFlags::code_mode_, "codeMode", "generated code mode, Inference | Train", "Inference"); | |||
| AddFlag(&CoderFlags::support_parallel_, "supportParallel", "whether support parallel launch, true | false", false); | |||
| AddFlag(&CoderFlags::debug_mode_, "debugMode", "dump the tensors data for debugging, true | false", false); | |||
| } | |||
| ~CoderFlags() override = default; | |||
| public: | |||
| std::string model_path_; | |||
| bool support_parallel_{false}; | |||
| bool is_weight_file_{false}; | |||
| std::string code_module_name_; | |||
| std::string code_path_; | |||
| @@ -87,8 +88,7 @@ int Coder::Run(const std::string &model_path) { | |||
| int Coder::Init(const CoderFlags &flags) const { | |||
| static const std::map<std::string, Target> kTargetMap = { | |||
| {"x86", kX86}, {"ARM32M", kARM32M}, {"ARM32A", kARM32A}, {"ARM64", kARM64}, {"All", kAllTargets}}; | |||
| static const std::map<std::string, CodeMode> kCodeModeMap = { | |||
| {"Normal", Code_Normal}, {"Inference", Code_Inference}, {"Train", Code_Train}}; | |||
| static const std::map<std::string, CodeMode> kCodeModeMap = {{"Inference", Inference}, {"Train", Train}}; | |||
| Configurator *config = Configurator::GetInstance(); | |||
| @@ -112,6 +112,11 @@ int Coder::Init(const CoderFlags &flags) const { | |||
| return true; | |||
| }); | |||
| parsers.emplace_back([&flags, config]() -> bool { | |||
| config->set_support_parallel(flags.support_parallel_); | |||
| return true; | |||
| }); | |||
| parsers.emplace_back([&flags, config]() -> bool { | |||
| config->set_debug_mode(flags.debug_mode_); | |||
| return true; | |||
| @@ -21,7 +21,7 @@ | |||
| namespace mindspore::lite::micro { | |||
| enum Target { kX86 = 0, kARM32M = 1, kARM32A = 2, kARM64 = 3, kAllTargets = 4, kTargetUnknown = 99 }; | |||
| enum CodeMode { Code_Normal = 0, Code_Inference = 1, Code_Train = 2, Code_Unknown = 99 }; | |||
| enum CodeMode { Inference = 0, Train = 1, Code_Unknown = 99 }; | |||
| class Configurator { | |||
| public: | |||
| @@ -36,9 +36,6 @@ class Configurator { | |||
| void set_code_path(const std::string &code_path) { code_path_ = code_path; } | |||
| std::string code_path() const { return code_path_; } | |||
| void set_subgraph_(const std::string &subgraph) { sub_graph_ = subgraph; } | |||
| std::string sub_graph() { return sub_graph_; } | |||
| void set_target(Target target) { target_ = target; } | |||
| Target target() const { return target_; } | |||
| @@ -51,16 +48,19 @@ class Configurator { | |||
| void set_is_weight_file(bool flag) { is_weight_file_ = flag; } | |||
| bool is_weight_file() const { return is_weight_file_; } | |||
| void set_support_parallel(bool parallel) { support_parallel_ = parallel; } | |||
| bool support_parallel() const { return support_parallel_; } | |||
| private: | |||
| Configurator() = default; | |||
| ~Configurator() = default; | |||
| bool is_weight_file_{false}; | |||
| std::string module_name_; | |||
| std::string code_path_; | |||
| std::string sub_graph_; | |||
| Target target_{kTargetUnknown}; | |||
| CodeMode code_mode_{Code_Unknown}; | |||
| bool is_weight_file_{false}; | |||
| bool support_parallel_{false}; | |||
| bool debug_mode_{false}; | |||
| }; | |||
| } // namespace mindspore::lite::micro | |||
| @@ -14,9 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/context.h" | |||
| #include "micro/coder/coder_config.h" | |||
| #include "micro/coder/allocator/allocator.h" | |||
| #include "coder/context.h" | |||
| #include "coder/coder_config.h" | |||
| #include "coder/allocator/allocator.h" | |||
| namespace mindspore::lite::micro { | |||
| CoderContext::CoderContext() { | |||
| @@ -108,7 +108,7 @@ void CodeBenchmarkSetBuffer(std::ofstream &ofs, const std::string &module_name) | |||
| << "_SetBuffer(buffer);\n" | |||
| " if (ret != RET_OK) {\n" | |||
| " MICRO_ERROR(\"set inputs failed\");\n" | |||
| " return RET_ERROR;" | |||
| " return RET_ERROR;\n" | |||
| " }\n"; | |||
| } | |||
| @@ -128,19 +128,6 @@ void CodeBenchmarkInitWeight(std::ofstream &ofs, const std::string &module_name) | |||
| " weight_buffer = NULL;\n"; | |||
| } | |||
| void CodeBenchmarkConfigThread(std::ofstream &ofs) { | |||
| ofs << " int thread_num = 4;\n" | |||
| " BindMode bind_mode = NO_BIND_MODE;\n" | |||
| " if (argc >= 6) {\n" | |||
| " thread_num = atoi(argv[4]);\n" | |||
| " bind_mode = atoi(argv[5]);\n" | |||
| " }\n" | |||
| " ret = ConfigThreadPool(THREAD_POOL_DEFAULT, thread_num, bind_mode);\n" | |||
| " if (ret != 0) {\n" | |||
| " MICRO_ERROR(\"create thread pool failed\");\n" | |||
| " }\n"; | |||
| } | |||
| void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name) { | |||
| ofs << " if (argc >= 4) {\n" | |||
| << " " << module_name << "_WarmUp();\n" | |||
| @@ -170,7 +157,6 @@ void CodeBenchmarkPrintOutputs(std::ofstream &ofs, const std::string &module_nam | |||
| " PrintTensorData(tensor);\n" | |||
| " }\n"; | |||
| ofs << " printf(\"" << module_name << " inference success.\\n\");\n"; | |||
| ofs << " free(buffer);\n"; | |||
| } | |||
| /** | |||
| @@ -39,8 +39,6 @@ void CodeBenchmarkSetBuffer(std::ofstream &ofs, const std::string &module_name); | |||
| void CodeBenchmarkInitWeight(std::ofstream &ofs, const std::string &module_name); | |||
| void CodeBenchmarkConfigThread(std::ofstream &ofs); | |||
| void CodeBenchmarkInference(std::ofstream &ofs, const std::string &module_name); | |||
| void CodeBenchmarkPrintOutputs(std::ofstream &ofs, const std::string &module_name); | |||
| @@ -24,10 +24,9 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con | |||
| Target target) { | |||
| ofs << "include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include/)\n"; | |||
| if (target == kARM32M) { | |||
| ofs << "include_directories(${OP_HEADER_PATH}/cmsis)\n" | |||
| << "include_directories(${OP_HEADER_PATH}/cmsis/CMSIS/NN/Include)\n" | |||
| << "include_directories(${OP_HEADER_PATH}/cmsis/CMSIS/DSP/Include)\n" | |||
| << "include_directories(${OP_HEADER_PATH}/cmsis/CMSIS/Core/Include)\n"; | |||
| ofs << "include_directories(${OP_HEADER_PATH}/CMSIS/NN/Include)\n" | |||
| << "include_directories(${OP_HEADER_PATH}/CMSIS/DSP/Include)\n" | |||
| << "include_directories(${OP_HEADER_PATH}/CMSIS/Core/Include)\n"; | |||
| } | |||
| ofs << "set(OP_SRC\n"; | |||
| for (const std::string &c_file : ctx->c_files()) { | |||
| @@ -38,7 +37,7 @@ void CodeCMakeNetLibrary(std::ofstream &ofs, const std::string &module_name, con | |||
| << ")\n"; | |||
| std::set<std::string> kernel_cmake_asm_set_files = ctx->asm_files(); | |||
| if (!kernel_cmake_asm_set_files.empty()) { | |||
| if (!kernel_cmake_asm_set_files.empty() && (target == kARM32A || target == kARM64)) { | |||
| ofs << "set(ASSEMBLY_SRC\n"; | |||
| for (const std::string &asm_file : kernel_cmake_asm_set_files) { | |||
| ofs << " " << asm_file << ".o\n"; | |||
| @@ -26,7 +26,7 @@ namespace mindspore::lite::micro { | |||
| void CodeSourceFileInclude(std::ofstream &ofs, const std::string &weight_file, const std::string &header) { | |||
| ofs << g_hwLicense << "#include \"microtensor.h\"\n" | |||
| << "#include \"" << weight_file << "\"\n" | |||
| << "#include \"" << header << "\"\n"; | |||
| << "#include \"" << header << "\"\n\n"; | |||
| } | |||
| void CodeInputAndOutputState(std::ofstream &ofs, const std::string &module_name) { | |||
| @@ -53,13 +53,13 @@ void PrintMicroTensors(std::ofstream &ofs, std::vector<Tensor *> tensors, const | |||
| MS_LOG(ERROR) << "nonexistent tensor"; | |||
| break; | |||
| } | |||
| ofs << " static int dim[] = {"; | |||
| ofs << " static int dim" << i << "[] = {"; | |||
| for (size_t j = 0; j < tensor->shape().size(); ++j) { | |||
| ofs << tensor->shape()[j] << ", "; | |||
| } | |||
| ofs << "};\n" | |||
| << " " << name << "[" << i << "].ndim = " << tensor->shape().size() << ";\n" | |||
| << " " << name << "[" << i << "].dim = dim;\n" | |||
| << " " << name << "[" << i << "].dim = dim" << i << ";\n" | |||
| << " " << name << "[" << i << "].type = " << EnumMicroTensorDataType(tensor->data_type()) << ";\n" | |||
| << " " << name << "[" << i << "].format = " << std::to_string(tensor->format()) << ";\n" | |||
| << " " << name << "[" << i << "].data =" << item->second << ";\n"; | |||
| @@ -69,7 +69,6 @@ void PrintMicroTensors(std::ofstream &ofs, std::vector<Tensor *> tensors, const | |||
| void CodeInputAndOutputImplement(std::ofstream &ofs, const std::string &module_name, | |||
| const std::unique_ptr<CoderContext> &ctx) { | |||
| // input tensors | |||
| ofs << "\n// input tensors\n"; | |||
| std::vector<Tensor *> inputs = ctx->graph_inputs(); | |||
| for (size_t i = 0; i < inputs.size(); ++i) { | |||
| ofs << "static const unsigned char *" << ctx->input_name() + std::to_string(i) << " = 0;\n"; | |||
| @@ -88,7 +87,6 @@ void CodeInputAndOutputImplement(std::ofstream &ofs, const std::string &module_n | |||
| ofs << " return RET_OK;\n}\n"; | |||
| // output tensors | |||
| ofs << "\n// output tensors\n"; | |||
| std::vector<Tensor *> outputs = ctx->graph_outputs(); | |||
| size_t output_num = outputs.size(); | |||
| std::string output_name = ctx->output_name(); | |||
| @@ -158,7 +156,7 @@ void CodeManageResourceState(std::ofstream &ofs, const std::string &module_name) | |||
| void CodeInitResourceImplement(std::ofstream &ofs, const std::string &module_name, | |||
| const std::unique_ptr<CoderContext> &ctx) { | |||
| ofs << "int " << module_name << "deconv_GetBufferSize() {\n" | |||
| ofs << "int " << module_name << "_GetBufferSize() {\n" | |||
| << " return " << ctx->total_buffer_size() << ";\n" | |||
| << "}\n"; | |||
| ofs << "int " << module_name << "_SetBuffer( void *buffer) {\n"; | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_ | |||
| #define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_ | |||
| static const char bench_cmake_lists_txt[] = | |||
| const char *bench_cmake_lists_txt = | |||
| "cmake_minimum_required(VERSION 3.14)\n" | |||
| "project(${PROJ_NAME})\n" | |||
| "\n" | |||
| @@ -55,9 +55,9 @@ static const char bench_cmake_lists_txt[] = | |||
| "link_directories(${MODEL_LIB_PATH})\n" | |||
| "include(benchmark.cmake)\n" | |||
| "add_executable(${PROJ_NAME}_bench ${SRC_FILES})\n" | |||
| "target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm)\n"; | |||
| "target_link_libraries(${PROJ_NAME}_bench ${MODEL_LIB_NAME} -lm -pthread)\n"; | |||
| static const char src_cmake_lists_txt[] = | |||
| const char *src_cmake_lists_txt = | |||
| "cmake_minimum_required(VERSION 3.14)\n" | |||
| "project(${PROJ_NAME})\n" | |||
| "\n" | |||
| @@ -112,4 +112,4 @@ static const char src_cmake_lists_txt[] = | |||
| "string(CONCAT library_name \"lib\" ${PROJ_NAME} \".a\")\n" | |||
| "create_library()\n"; | |||
| #endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_ | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_CMAKE_LISTS_CODE_H_ | |||
| @@ -13,10 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BEN_DEBUG_UTILS_H_ | |||
| #define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BEN_DEBUG_UTILS_H_ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_DEBUG_UTILS_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_DEBUG_UTILS_H_ | |||
| static const char debug_utils_h[] = | |||
| const char *debug_utils_h = | |||
| "/**\n" | |||
| " * Copyright 2021 Huawei Technologies Co., Ltd\n" | |||
| " *\n" | |||
| @@ -50,7 +50,7 @@ static const char debug_utils_h[] = | |||
| "\n" | |||
| "#endif // MINDSPORE_LITE_MICRO_MICRODEBUGUTIL_H_\n"; | |||
| static const char debug_utils_c[] = | |||
| const char *debug_utils_c = | |||
| "/**\n" | |||
| " * Copyright 2021 Huawei Technologies Co., Ltd\n" | |||
| " *\n" | |||
| @@ -239,7 +239,7 @@ static const char debug_utils_c[] = | |||
| "}\n" | |||
| "\n" | |||
| "void PrintTensor(MicroTensor *tensor, FILE *output_file, const char *is_input) {\n" | |||
| " if (output_file != NULL) {\n" | |||
| " if (output_file == NULL) {\n" | |||
| " MICRO_ERROR(\"output file is NULL\");\n" | |||
| " return;\n" | |||
| " }\n" | |||
| @@ -269,4 +269,4 @@ static const char debug_utils_c[] = | |||
| " return retval;\n" | |||
| "}\n"; | |||
| #endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BEN_DEBUG_UTILS_H_ | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_DEBUG_UTILS_H_ | |||
| @@ -14,12 +14,12 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H | |||
| #define MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H | |||
| #ifndef MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H_ | |||
| #define MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H_ | |||
| namespace mindspore::lite::micro { | |||
| const char g_hwLicense[] = | |||
| static const char *g_hwLicense = | |||
| "/**\n" | |||
| " * Copyright 2021 Huawei Technologies Co., Ltd\n" | |||
| " *\n" | |||
| @@ -37,4 +37,4 @@ const char g_hwLicense[] = | |||
| " */\n\n"; | |||
| } // namespace mindspore::lite::micro | |||
| #endif // MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H | |||
| #endif // MINDSPORE_LITE_MICRO_GENERATOR_CONST_BLOCK_LICENSE_INFOS_H_ | |||
| @@ -14,9 +14,9 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_ | |||
| #define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_ | |||
| static const char load_input_h[] = | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_ | |||
| const char *load_input_h = | |||
| "/**\n" | |||
| " * Copyright 2021 Huawei Technologies Co., Ltd\n" | |||
| " *\n" | |||
| @@ -43,7 +43,7 @@ static const char load_input_h[] = | |||
| "\n" | |||
| "#endif // MICRO_EXAMPLE_LOAD_INPUT_LOAD_INPUT_H_\n"; | |||
| static const char load_input_c[] = | |||
| const char *load_input_c = | |||
| "/**\n" | |||
| " * Copyright 2021 Huawei Technologies Co., Ltd\n" | |||
| " *\n" | |||
| @@ -131,11 +131,11 @@ static const char load_input_c[] = | |||
| " int size = 0;\n" | |||
| " buffers[i] = ReadInputData(inputs_path[i], &size);\n" | |||
| " if (size != inputs_size[i] || buffers[i] == NULL) {\n" | |||
| " printf(\"size mismatch, %s, %d, %d\\n\", inputs_path[i], size, inputs_size[i]);\n" | |||
| " printf(\"size mismatch, %s, input: %d, needed: %d\\n\", inputs_path[i], size, inputs_size[i]);\n" | |||
| " return -1;\n" | |||
| " }\n" | |||
| " }\n" | |||
| " return 0;\n" | |||
| "}\n"; | |||
| #endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_ | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_BENCH_LOAD_INPUT_H_ | |||
| @@ -13,10 +13,10 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_ | |||
| #define MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_ | |||
| static const char micro_tensor_h[] = | |||
| const char *micro_tensor_h = | |||
| "/**\n" | |||
| " * Copyright 2021 Huawei Technologies Co., Ltd\n" | |||
| " *\n" | |||
| @@ -42,20 +42,8 @@ static const char micro_tensor_h[] = | |||
| "#include <stdbool.h>\n" | |||
| "#include <stdint.h>\n" | |||
| "\n" | |||
| "inline bool IsPrint() {\n" | |||
| " char *env = getenv(\"GLOG_v\");\n" | |||
| " if (env == NULL) {\n" | |||
| " return false;\n" | |||
| " }\n" | |||
| " return strcmp(env, \"1\") == 0;\n" | |||
| "}\n" | |||
| "\n" | |||
| "#define MICRO_INFO(content, args...) \\\n" | |||
| " { \\\n" | |||
| " if (IsPrint()) { \\\n" | |||
| " printf(\"[INFO] %s|%d: \" #content \"\\r\\n\", __func__, __LINE__, ##args); \\\n" | |||
| " } \\\n" | |||
| " }\n" | |||
| "#define MICRO_INFO(content, args...) \\\n" | |||
| " { printf(\"[INFO] %s|%d: \" #content \"\\r\\n\", __func__, __LINE__, ##args); }\n" | |||
| "#define MICRO_ERROR(content, args...) \\\n" | |||
| " { printf(\"[ERROR] %s|%d: \" #content \"\\r\\n\", __func__, __LINE__, ##args); }\n" | |||
| "\n" | |||
| @@ -115,4 +103,4 @@ static const char micro_tensor_h[] = | |||
| "} GraphQuantArgs;\n" | |||
| "\n" | |||
| "#endif // MSMICRO_TENSOR_H\n"; | |||
| #endif // MICRO_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_ | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_MICRO_TENSOR_H_ | |||
| @@ -0,0 +1,99 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_THREAD_POOL_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_THREAD_POOL_H_ | |||
| namespace mindspore::lite::micro { | |||
| const char *thread_pool_h = | |||
| "/**\n" | |||
| " * Copyright 2021 Huawei Technologies Co., Ltd\n" | |||
| " *\n" | |||
| " * Licensed under the Apache License, Version 2.0 (the \"License\");\n" | |||
| " * you may not use this file except in compliance with the License.\n" | |||
| " * You may obtain a copy of the License at\n" | |||
| " *\n" | |||
| " * http://www.apache.org/licenses/LICENSE-2.0\n" | |||
| " *\n" | |||
| " * Unless required by applicable law or agreed to in writing, software\n" | |||
| " * distributed under the License is distributed on an \"AS IS\" BASIS,\n" | |||
| " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" | |||
| " * See the License for the specific language governing permissions and\n" | |||
| " * limitations under the License.\n" | |||
| " */\n" | |||
| "\n" | |||
| "#ifndef MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_\n" | |||
| "#define MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_\n" | |||
| "\n" | |||
| "#include <stdbool.h>\n" | |||
| "\n" | |||
| "#define MAX_TASK_NUM (2)\n" | |||
| "\n" | |||
| "/// \\brief BindMode defined for holding bind cpu strategy argument.\n" | |||
| "typedef enum {\n" | |||
| " NO_BIND_MODE = 0, /**< no bind */\n" | |||
| " HIGHER_MODE = 1, /**< bind higher cpu first */\n" | |||
| " MID_MODE = 2 /**< bind middle cpu first */\n" | |||
| "} BindMode;\n" | |||
| "\n" | |||
| "struct ThreadPool;\n" | |||
| "\n" | |||
| "struct ThreadPool *CreateThreadPool(int thread_num, int mode);\n" | |||
| "\n" | |||
| "/**\n" | |||
| " *\n" | |||
| " * @param session_index, support multi session\n" | |||
| " * @param job\n" | |||
| " * @param content\n" | |||
| " * @param task_num\n" | |||
| " */\n" | |||
| "int ParallelLaunch(struct ThreadPool *thread_pool, int (*job)(void *, int), void *content, int task_num);\n" | |||
| "\n" | |||
| "/**\n" | |||
| " * bind each thread to specified cpu core\n" | |||
| " * @param is_bind\n" | |||
| " * @param mode\n" | |||
| " */\n" | |||
| "int BindThreads(struct ThreadPool *thread_pool, bool is_bind, int mode);\n" | |||
| "\n" | |||
| "/**\n" | |||
| " * activate the thread pool\n" | |||
| " * @param thread_pool_id\n" | |||
| " */\n" | |||
| "void ActivateThreadPool(struct ThreadPool *thread_pool);\n" | |||
| "\n" | |||
| "/**\n" | |||
| " * deactivate the thread pool\n" | |||
| " * @param thread_pool_id\n" | |||
| " */\n" | |||
| "void DeactivateThreadPool(struct ThreadPool *thread_pool);\n" | |||
| "\n" | |||
| "/**\n" | |||
| " *\n" | |||
| " * @return current thread num\n" | |||
| " */\n" | |||
| "int GetCurrentThreadNum(struct ThreadPool *thread_pool);\n" | |||
| "\n" | |||
| "/**\n" | |||
| " * destroy thread pool, and release resource\n" | |||
| " */\n" | |||
| "void DestroyThreadPool(struct ThreadPool *thread_pool);\n" | |||
| "\n" | |||
| "#endif // MINDSPORE_LITE_SRC_RUNTIME_THREAD_POOL_H_\n"; | |||
| } // namespace mindspore::lite::micro | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_CONST_BLOCKS_THREAD_POOL_H_ | |||
| @@ -0,0 +1,61 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/generator/component/parallel_component.h" | |||
| #include <string> | |||
| namespace mindspore::lite::micro { | |||
| void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name) { | |||
| ofs << " int thread_num = 4;\n" | |||
| " BindMode bind_mode = NO_BIND_MODE;\n" | |||
| " if (argc >= 6) {\n" | |||
| " thread_num = atoi(argv[4]);\n" | |||
| " bind_mode = atoi(argv[5]);\n" | |||
| " }\n" | |||
| " struct ThreadPool *thread_pool = CreateThreadPool(thread_num, bind_mode);\n" | |||
| " if (thread_pool == NULL) {\n" | |||
| " MICRO_ERROR(\"create thread pool failed\");\n" | |||
| " return RET_ERROR;\n" | |||
| " }\n" | |||
| << " ret = " << module_name << "_SetThreadPool(thread_pool);\n" | |||
| << " if (ret != RET_OK) {\n" | |||
| " MICRO_ERROR(\"set global thread pool failed\");\n" | |||
| " return RET_ERROR;\n" | |||
| " }\n" | |||
| " MICRO_INFO(\"config: ThreadNum: %d, BindMode: %d\", thread_num, bind_mode);\n"; | |||
| } | |||
| void CodeDestroyThreadPool(std::ofstream &ofs) { ofs << " DestroyThreadPool(thread_pool);\n"; } | |||
| void CodeSetGlobalThreadPoolState(std::ofstream &ofs, const std::string &module_name) { | |||
| ofs << "/*\n" | |||
| " * set global thread pool, which is created by user\n" | |||
| " */\n" | |||
| << "int " << module_name << "_SetThreadPool(struct ThreadPool *thread_pool);\n\n"; | |||
| } | |||
| void CodeSetGlobalThreadPoolImplement(std::ofstream &ofs, const std::string &module_name) { | |||
| ofs << "struct ThreadPool *g_thread_pool = NULL;\n" | |||
| << "int " << module_name << "_SetThreadPool(struct ThreadPool *thread_pool) {\n" | |||
| << " if (thread_pool == NULL) {\n" | |||
| " return RET_ERROR;\n" | |||
| " }\n" | |||
| " g_thread_pool = thread_pool;\n" | |||
| " return RET_OK;\n" | |||
| "}\n"; | |||
| } | |||
| } // namespace mindspore::lite::micro | |||
| @@ -0,0 +1,35 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_GENERATOR_PARALLEL_COMPONENT_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_GENERATOR_PARALLEL_COMPONENT_H_ | |||
| #include <string> | |||
| #include <fstream> | |||
| namespace mindspore::lite::micro { | |||
| void CodeCreateThreadPool(std::ofstream &ofs, const std::string &module_name); | |||
| void CodeDestroyThreadPool(std::ofstream &ofs); | |||
| void CodeSetGlobalThreadPoolState(std::ofstream &ofs, const std::string &module_name); | |||
| void CodeSetGlobalThreadPoolImplement(std::ofstream &ofs, const std::string &module_name); | |||
| } // namespace mindspore::lite::micro | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_GENERATOR_PARALLEL_COMPONENT_H_ | |||
| @@ -74,10 +74,10 @@ void CodeModelParamsForNet(std::ofstream &hofs, std::ofstream &cofs, const std:: | |||
| continue; | |||
| } | |||
| if (tensor->category() == Tensor::Category::CONST_TENSOR) { | |||
| hofs << "extern " << GetTensorDataType(tensor->data_type()) << name << " = [];\n"; | |||
| cofs << GetTensorDataType(tensor->data_type()) << name << " = [" << tensor->ElementsNum() << "];\n"; | |||
| hofs << "extern " << GetTensorDataType(tensor->data_type()) << name << "[];\n"; | |||
| cofs << GetTensorDataType(tensor->data_type()) << name << "[" << tensor->ElementsNum() << "];\n"; | |||
| } else if (tensor->category() == Tensor::Category::VAR) { | |||
| hofs << "extern " << GetTensorDataType(tensor->data_type()) << " *" << name << ";\n"; | |||
| hofs << "extern " << GetTensorDataType(tensor->data_type()) << "*" << name << ";\n"; | |||
| cofs << GetTensorDataType(tensor->data_type()) << "*" << name << " = NULL;\n"; | |||
| } | |||
| } | |||
| @@ -87,7 +87,6 @@ void CodeModelParamsForNet(std::ofstream &hofs, std::ofstream &cofs, const std:: | |||
| void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, const std::unique_ptr<CoderContext> &ctx) { | |||
| ofs << "int " << module_name << "_Init(void *weight_buffer, int weight_size) {\n" | |||
| << " if (weight_buffer == NULL) {\n" | |||
| " MICRO_ERROR(\"weight buffer is NULL\");\n" | |||
| << " return RET_ERROR;\n" | |||
| << " }\n"; | |||
| @@ -106,8 +105,9 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons | |||
| if (tensor->category() != Tensor::Category::CONST_TENSOR) { | |||
| continue; | |||
| } | |||
| auto iter = ctx->tensors_map().find(tensor); | |||
| if (iter != ctx->tensors_map().end()) { | |||
| std::map<Tensor *, std::string> ctx_tensor_map = ctx->tensors_map(); | |||
| auto iter = ctx_tensor_map.find(tensor); | |||
| if (iter != ctx_tensor_map.end()) { | |||
| origins += " {" + name + ", " + std::to_string(tensor->Size()) + ", " + std::to_string(offset) + "},\n"; | |||
| params_num++; | |||
| } else { | |||
| @@ -115,14 +115,14 @@ void CodeWeightInitFunc(std::ofstream &ofs, const std::string &module_name, cons | |||
| params += | |||
| " " + GetTensorDataType(data_type) + "*" + name + " = (weight_buffer + " + std::to_string(offset) + ");\n"; | |||
| } | |||
| offset += tensor->Size(); | |||
| } | |||
| ofs << " struct ModelParameter model_params[] = {\n" << origins << " };\n"; | |||
| ofs << params << "\n"; | |||
| ofs << " struct ModelParameter model_params[] = {\n" << origins << " };\n"; | |||
| ofs << "\n"; | |||
| ofs << " for(int i = 0; i < " << params_num << "; ++i) {\n" | |||
| << " if (model_params[i].offset + model_params[i].size > weight_size) {\n" | |||
| " MICRO_ERROR(\"buffer is invalid, size: %d, offset: %lu\", weight_size, model_params[i].offset);\n" | |||
| " return RET_ERROR;\n" | |||
| " }\n" | |||
| << " memcpy(model_params[i].addr, (weight_buffer + model_params[i].offset), model_params[i].size);\n" | |||
| @@ -24,8 +24,9 @@ | |||
| #include "coder/generator/component/const_blocks/cmake_lists.h" | |||
| #include "coder/generator/component/const_blocks/debug_utils.h" | |||
| #include "coder/generator/component/const_blocks/load_input.h" | |||
| #include "coder/generator/component/const_blocks/thread_pool.h" | |||
| #include "coder/generator/component/const_blocks/license.h" | |||
| #include "micro/coder/log.h" | |||
| #include "coder/log.h" | |||
| namespace mindspore::lite::micro { | |||
| int WriteContentToFile(const std::string &file, const std::string &content) { | |||
| @@ -61,11 +62,13 @@ Generator::~Generator() { (void)umask(origin_umask_); } | |||
| void Generator::CodeNetRunFunc(std::ofstream &ofs) { | |||
| // generate net inference code | |||
| ofs << "void " << config_->module_name() << "_Inference() {\n"; | |||
| if (config_->code_mode() == CodeMode::Code_Inference) { | |||
| ofs << "int thread_num = GetCurrentThreadNum(THREAD_POOL_DEFAULT);\n"; | |||
| if (config_->support_parallel()) { | |||
| ofs << " const int g_thread_num = GetCurrentThreadNum(g_thread_pool);\n"; | |||
| } else { | |||
| ofs << " const int g_thread_num = 1;\n"; | |||
| } | |||
| for (const auto &block : ctx_->code_blocks()) { | |||
| ofs << "\t{\n" << block << "\t}\n"; | |||
| ofs << " {\n" << block << " }\n"; | |||
| } | |||
| ofs << "}\n"; | |||
| } | |||
| @@ -98,7 +101,7 @@ int Generator::CodeSourceCMakeFile() { | |||
| } | |||
| int Generator::CodeStaticContent() { | |||
| const std::vector<std::pair<std::string, std::string>> static_blocks = { | |||
| std::vector<std::pair<std::string, std::string>> static_blocks = { | |||
| {net_inc_file_path_ + "microtensor.h", micro_tensor_h}, | |||
| {net_src_file_path_ + "CMakeLists.txt", src_cmake_lists_txt}, | |||
| {net_main_file_path_ + "debug_utils.h", debug_utils_h}, | |||
| @@ -106,12 +109,13 @@ int Generator::CodeStaticContent() { | |||
| {net_main_file_path_ + "load_input.h", load_input_h}, | |||
| {net_main_file_path_ + "load_input.c", load_input_c}, | |||
| {net_main_file_path_ + "CMakeLists.txt", bench_cmake_lists_txt}}; | |||
| if (config_->support_parallel()) { | |||
| static_blocks.emplace_back(net_inc_file_path_ + "thread_pool.h", thread_pool_h); | |||
| } | |||
| for (const auto &static_block : static_blocks) { | |||
| std::string file_name = static_block.first; | |||
| std::string content = static_block.second; | |||
| if (WriteContentToFile(file_name, content) != RET_OK) { | |||
| return RET_ERROR; | |||
| } | |||
| MS_CHECK_RET_CODE(WriteContentToFile(file_name, content), "write file failed"); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -18,6 +18,7 @@ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "coder/generator/component/common_component.h" | |||
| #include "coder/generator/component/parallel_component.h" | |||
| #include "coder/generator/component/benchmark_component.h" | |||
| #include "coder/generator/component/const_blocks/license.h" | |||
| @@ -28,14 +29,17 @@ int InferenceGenerator::CodeNetHFile() { | |||
| MS_CHECK_TRUE(!ofs.bad(), "filed to open file"); | |||
| MS_LOG(INFO) << "write " << net_include_file; | |||
| ofs << g_hwLicense; | |||
| if (config_->code_mode() == CodeMode::Code_Inference) { | |||
| ofs << "#include \"src/runtime/thread_pool.h\"\n"; | |||
| if (config_->support_parallel()) { | |||
| ofs << "#include \"thread_pool.h\"\n"; | |||
| } | |||
| ofs << "#include \"microtensor.h\"\n\n"; | |||
| CodeInputAndOutputState(ofs, config_->module_name()); | |||
| if (is_get_quant_args_) { | |||
| CodeGraphQuantArgsState(ofs, config_->module_name()); | |||
| } | |||
| if (config_->support_parallel()) { | |||
| CodeSetGlobalThreadPoolState(ofs, config_->module_name()); | |||
| } | |||
| if (config_->is_weight_file()) { | |||
| CodeInitWeightState(ofs, config_->module_name()); | |||
| } | |||
| @@ -50,6 +54,9 @@ int InferenceGenerator::CodeNetCFile() { | |||
| MS_CHECK_TRUE(!ofs.bad(), "filed to open file"); | |||
| MS_LOG(INFO) << "write " << net_impl_file; | |||
| CodeSourceFileInclude(ofs, net_weight_hfile_, net_inc_hfile_); | |||
| if (config_->support_parallel()) { | |||
| CodeSetGlobalThreadPoolImplement(ofs, config_->module_name()); | |||
| } | |||
| CodeInputAndOutputImplement(ofs, config_->module_name(), ctx_); | |||
| CodeInitResourceImplement(ofs, config_->module_name(), ctx_); | |||
| CodeFreeResourceImplement(ofs, config_->module_name(), ctx_); | |||
| @@ -78,12 +85,14 @@ int InferenceGenerator::CodeBenchmarkFile() { | |||
| if (config_->is_weight_file()) { | |||
| CodeBenchmarkInitWeight(ofs, config_->module_name()); | |||
| } | |||
| if (config_->code_mode() == CodeMode::Code_Inference) { | |||
| CodeBenchmarkConfigThread(ofs); | |||
| if (config_->support_parallel()) { | |||
| CodeCreateThreadPool(ofs, config_->module_name()); | |||
| } | |||
| CodeBenchmarkInference(ofs, config_->module_name()); | |||
| CodeBenchmarkPrintOutputs(ofs, config_->module_name()); | |||
| if (config_->support_parallel()) { | |||
| CodeDestroyThreadPool(ofs); | |||
| } | |||
| CodeBenchmarkFreeResourse(ofs, config_->module_name(), inputs_num); | |||
| ofs.close(); | |||
| return RET_OK; | |||
| @@ -19,7 +19,7 @@ | |||
| #include <utility> | |||
| #include <memory> | |||
| #include "micro/coder/generator/generator.h" | |||
| #include "coder/generator/generator.h" | |||
| namespace mindspore::lite::micro { | |||
| class InferenceGenerator : public Generator { | |||
| @@ -39,7 +39,7 @@ int TrainGenerator::CodeNetHFile() { | |||
| MS_CHECK_TRUE(!ofs.bad(), "filed to open file"); | |||
| MS_LOG(INFO) << "write " << net_include_file; | |||
| ofs << g_hwLicense; | |||
| if (config_->code_mode() == CodeMode::Code_Inference) { | |||
| if (config_->code_mode() == CodeMode::Inference) { | |||
| ofs << "#include \"src/runtime/thread_pool.h\"\n"; | |||
| } | |||
| ofs << "#include \"microtensor.h\"\n\n"; | |||
| @@ -19,7 +19,7 @@ | |||
| #include <utility> | |||
| #include <memory> | |||
| #include "micro/coder/generator/generator.h" | |||
| #include "coder/generator/generator.h" | |||
| namespace mindspore::lite::micro { | |||
| class TrainGenerator : public Generator { | |||
| @@ -14,7 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/graph.h" | |||
| #include "coder/graph.h" | |||
| #include <queue> | |||
| #include <deque> | |||
| #include <string> | |||
| @@ -23,7 +23,6 @@ | |||
| #include <set> | |||
| #include "coder/log.h" | |||
| #include "schema/inner/model_generated.h" | |||
| #include "src/ops/primitive_c.h" | |||
| #include "securec/include/securec.h" | |||
| namespace mindspore::lite::micro { | |||
| @@ -92,8 +91,15 @@ int CoderGraph::ConvertTensors() { | |||
| if (quant_params != nullptr) { | |||
| for (int j = 0; j < static_cast<int>(quant_params->size()); j++) { | |||
| QuantArg quant_arg{}; | |||
| quant_arg.bitNum = quant_params->Get(j)->numBits(); | |||
| quant_arg.scale = quant_params->Get(j)->scale(); | |||
| quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint(); | |||
| quant_arg.var_corr = quant_params->Get(j)->varCorr(); | |||
| quant_arg.mean_corr = quant_params->Get(j)->meanCorr(); | |||
| quant_arg.inited = quant_params->Get(j)->inited(); | |||
| quant_arg.roundType = quant_params->Get(j)->roundType(); | |||
| quant_arg.multiplier = quant_params->Get(j)->multiplier(); | |||
| quant_arg.dstDtype = quant_params->Get(j)->dstDtype(); | |||
| dstTensor->AddQuantParam(quant_arg); | |||
| } | |||
| } | |||
| @@ -14,12 +14,12 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "coder/opcoders/base/conv2d_base_coder.h" | |||
| #include <string> | |||
| #include <vector> | |||
| #include "nnacl/fp32/winograd_utils.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| #include "micro/coder/log.h" | |||
| #include "coder/log.h" | |||
| namespace { | |||
| int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t filter_arg_num, size_t output_arg_num) { | |||
| @@ -37,8 +37,8 @@ int MallocConvQuantParams(ConvQuantArg *quant_arg, size_t input_arg_num, size_t | |||
| } // namespace | |||
| namespace mindspore::lite::micro { | |||
| string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) { | |||
| string ret; | |||
| std::string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::Format dst_format) { | |||
| std::string ret; | |||
| if (src_format == schema::Format_NHWC && dst_format == schema::Format_NC4HW4) { | |||
| ret = "PackNHWCToNC4HW4Fp32"; | |||
| } else if (src_format == schema::Format_NHWC && dst_format == schema::Format_NHWC4) { | |||
| @@ -56,8 +56,8 @@ string Conv2DBaseCoder::LayoutTransformFp32(schema::Format src_format, schema::F | |||
| return ret; | |||
| } | |||
| string Conv2DBaseCoder::LayoutTransformInt8(schema::Format src_format, schema::Format dst_format) { | |||
| string ret; | |||
| std::string Conv2DBaseCoder::LayoutTransformInt8(schema::Format src_format, schema::Format dst_format) { | |||
| std::string ret; | |||
| if (src_format == schema::Format_NHWC && dst_format == schema::Format_NHWC4) { | |||
| ret = "PackNHWCToNHWC4Int8"; | |||
| } else { | |||
| @@ -67,8 +67,8 @@ string Conv2DBaseCoder::LayoutTransformInt8(schema::Format src_format, schema::F | |||
| return ret; | |||
| } | |||
| string Conv2DBaseCoder::LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format) { | |||
| string ret; | |||
| std::string Conv2DBaseCoder::LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format) { | |||
| std::string ret; | |||
| switch (data_type) { | |||
| case kNumberTypeInt8: | |||
| ret = LayoutTransformInt8(src_format, dst_format); | |||
| @@ -197,7 +197,7 @@ int Conv2DBaseCoder::SetQuantMultiplier() { | |||
| return RET_OK; | |||
| } | |||
| int Conv2DBaseCoder::CheckResizeValid() { | |||
| int Conv2DBaseCoder::CheckResizeValid() const { | |||
| // ===============check in channel================= // | |||
| int32_t filter_in_channel = filter_tensor_->Channel(); | |||
| int32_t resize_in_channel = input_tensor_->Channel(); | |||
| @@ -206,12 +206,39 @@ int Conv2DBaseCoder::CheckResizeValid() { | |||
| return RET_OK; | |||
| } | |||
| void Conv2DBaseCoder::SetRoundingAndMultipilerMode() { | |||
| auto input_quant_arg = input_tensor_->quant_params().front(); | |||
| int round_type = input_quant_arg.roundType; | |||
| switch (round_type) { | |||
| case 1: | |||
| conv_quant_arg_->round_mode_ = Rounding_Away_from_zero; | |||
| break; | |||
| case 2: | |||
| conv_quant_arg_->round_mode_ = Rounding_Up; | |||
| break; | |||
| default: | |||
| conv_quant_arg_->round_mode_ = Rounding_No; | |||
| } | |||
| int cal_multiplier_type = input_quant_arg.multiplier; | |||
| switch (cal_multiplier_type) { | |||
| case 0: | |||
| conv_quant_arg_->quant_multiplier_mode_ = Method_SinglePrecision; | |||
| break; | |||
| case 1: | |||
| conv_quant_arg_->quant_multiplier_mode_ = Method_DoublePrecision; | |||
| break; | |||
| default: | |||
| conv_quant_arg_->quant_multiplier_mode_ = Method_No; | |||
| } | |||
| } | |||
| int Conv2DBaseCoder::SetQuantParam() { | |||
| MS_CHECK_RET_CODE(MallocQuantParam(), "Malloc quant param failed."); | |||
| MS_CHECK_RET_CODE(SetInputTensorQuantParam(), "Set Input Tensor Quant Param Failed."); | |||
| MS_CHECK_RET_CODE(SetFilterTensorQuantParam(), "Set Filter Tensor Quant Param Failed."); | |||
| MS_CHECK_RET_CODE(SetOutputTensorQuantParam(), "Set Output Tensor Quant Param Failed."); | |||
| MS_CHECK_RET_CODE(SetIfPerChannel(), "Set if per tensor channel failed."); | |||
| SetRoundingAndMultipilerMode(); | |||
| MS_CHECK_RET_CODE(SetQuantMultiplier(), "Set Quant Multiplier Failed."); | |||
| // now only consider per tensor for output | |||
| MS_CHECK_PTR(conv_param_->conv_quant_arg_.out_act_min_); | |||
| @@ -21,13 +21,11 @@ | |||
| #include <vector> | |||
| #include <utility> | |||
| #include <memory> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "src/runtime/kernel/arm/base/layout_transform.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| namespace mindspore::lite::micro { | |||
| using std::string; | |||
| class Conv2DBaseCoder : public OperatorCoder { | |||
| public: | |||
| Conv2DBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| @@ -47,10 +45,14 @@ class Conv2DBaseCoder : public OperatorCoder { | |||
| free(conv_quant_arg_->input_quant_args_); | |||
| free(conv_quant_arg_->filter_quant_args_); | |||
| free(conv_quant_arg_->output_quant_args_); | |||
| conv_param_ = nullptr; | |||
| conv_quant_arg_ = nullptr; | |||
| filter_tensor_ = nullptr; | |||
| bias_tensor_ = nullptr; | |||
| } | |||
| protected: | |||
| int Init(); | |||
| virtual int Init(); | |||
| int SetQuantParam(); | |||
| @@ -62,19 +64,21 @@ class Conv2DBaseCoder : public OperatorCoder { | |||
| int SetOutputTensorQuantParam(); | |||
| void SetRoundingAndMultipilerMode(); | |||
| int SetQuantMultiplier(); | |||
| int CheckResizeValid(); | |||
| int CheckResizeValid() const; | |||
| int SetIfPerChannel(); | |||
| int CheckLayout(lite::Tensor *input_tensor); | |||
| string LayoutTransformFp32(schema::Format src_format, schema::Format dst_format); | |||
| std::string LayoutTransformFp32(schema::Format src_format, schema::Format dst_format); | |||
| string LayoutTransformInt8(schema::Format src_format, schema::Format dst_format); | |||
| std::string LayoutTransformInt8(schema::Format src_format, schema::Format dst_format); | |||
| string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format); | |||
| std::string LayoutTransform(TypeId data_type, schema::Format src_format, schema::Format dst_format); | |||
| ConvParameter *conv_param_{nullptr}; | |||
| @@ -84,7 +88,7 @@ class Conv2DBaseCoder : public OperatorCoder { | |||
| Tensor *bias_tensor_{nullptr}; | |||
| string convert_func_; | |||
| std::string convert_func_; | |||
| }; | |||
| } // namespace mindspore::lite::micro | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_CONV2D_BASE_CODER_H_ | |||
| @@ -0,0 +1,153 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/base/detection_post_process_base_coder.h" | |||
| #include "nnacl/int8/quant_dtype_cast_int8.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| #include "include/errorcode.h" | |||
| namespace mindspore::lite::micro { | |||
| int DetectionPostProcessBaseCoder::Prepare(CoderContext *const context) { | |||
| MS_CHECK_PTR(parameter_); | |||
| params_ = reinterpret_cast<DetectionPostProcessParameter *>(parameter_); | |||
| params_->anchors_ = nullptr; | |||
| params_->decoded_boxes_ = nullptr; | |||
| params_->nms_candidate_ = nullptr; | |||
| params_->indexes_ = nullptr; | |||
| params_->scores_ = nullptr; | |||
| params_->all_class_indexes_ = nullptr; | |||
| params_->all_class_scores_ = nullptr; | |||
| params_->single_class_indexes_ = nullptr; | |||
| params_->selected_ = nullptr; | |||
| Tensor *anchor_tensor = input_tensors_.at(2); | |||
| MS_CHECK_PTR(anchor_tensor); | |||
| if (anchor_tensor->data_type() == kNumberTypeInt8) { | |||
| QuantArg quant_param = anchor_tensor->quant_params().at(0); | |||
| auto anchor_int8 = reinterpret_cast<int8_t *>(anchor_tensor->data_c()); | |||
| MS_CHECK_PTR(anchor_int8); | |||
| auto anchor_fp32 = static_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat, anchor_tensor->ElementsNum() * sizeof(float), kOfflinePackWeight)); | |||
| MS_CHECK_PTR(anchor_fp32); | |||
| DoDequantizeInt8ToFp32(anchor_int8, anchor_fp32, quant_param.scale, quant_param.zeroPoint, | |||
| anchor_tensor->ElementsNum()); | |||
| params_->anchors_ = anchor_fp32; | |||
| } else if (anchor_tensor->data_type() == kNumberTypeUInt8) { | |||
| QuantArg quant_param = anchor_tensor->quant_params().front(); | |||
| auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->data_c()); | |||
| MS_CHECK_PTR(anchor_uint8); | |||
| auto anchor_fp32 = static_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat, anchor_tensor->ElementsNum() * sizeof(float), kOfflinePackWeight)); | |||
| MS_CHECK_PTR(anchor_fp32); | |||
| DoDequantizeUInt8ToFp32(anchor_uint8, anchor_fp32, quant_param.scale, quant_param.zeroPoint, | |||
| anchor_tensor->ElementsNum()); | |||
| params_->anchors_ = anchor_fp32; | |||
| } else if (anchor_tensor->data_type() == kNumberTypeFloat32 || anchor_tensor->data_type() == kNumberTypeFloat) { | |||
| params_->anchors_ = static_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat, anchor_tensor->ElementsNum() * sizeof(float), kOfflinePackWeight)); | |||
| MS_CHECK_PTR(params_->anchors_); | |||
| memcpy(params_->anchors_, anchor_tensor->data_c(), anchor_tensor->Size()); | |||
| } else { | |||
| MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type(); | |||
| return RET_ERROR; | |||
| } | |||
| MS_CHECK_RET_CODE(AllocateBuffer(), "AllocateBuffer failed"); | |||
| MS_CHECK_RET_CODE(MallocInputsBuffer(), "malloc inputs buffer failed"); | |||
| return RET_OK; | |||
| } | |||
| int DetectionPostProcessBaseCoder::AllocateBuffer() { | |||
| MS_CHECK_PTR(input_tensors_.at(0)); | |||
| MS_CHECK_PTR(input_tensors_.at(1)); | |||
| num_boxes_ = input_tensors_.at(0)->shape().at(1); | |||
| num_classes_with_bg_ = input_tensors_.at(1)->shape().at(2); | |||
| params_->decoded_boxes_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * 4 * sizeof(float), kWorkspace); | |||
| MS_CHECK_PTR(params_->decoded_boxes_); | |||
| params_->nms_candidate_ = allocator_->Malloc(kNumberTypeUInt8, num_boxes_ * sizeof(uint8_t), kWorkspace); | |||
| MS_CHECK_PTR(params_->nms_candidate_); | |||
| params_->selected_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->selected_); | |||
| params_->single_class_indexes_ = allocator_->Malloc(kNumberTypeInt, num_boxes_ * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->single_class_indexes_); | |||
| if (params_->use_regular_nms_) { | |||
| params_->scores_ = | |||
| allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace); | |||
| MS_CHECK_PTR(params_->scores_); | |||
| params_->indexes_ = | |||
| allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->indexes_); | |||
| params_->all_class_scores_ = | |||
| allocator_->Malloc(kNumberTypeFloat, (num_boxes_ + params_->max_detections_) * sizeof(float), kWorkspace); | |||
| MS_CHECK_PTR(params_->all_class_scores_); | |||
| params_->all_class_indexes_ = | |||
| allocator_->Malloc(kNumberTypeInt, (num_boxes_ + params_->max_detections_) * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->all_class_indexes_); | |||
| } else { | |||
| params_->scores_ = allocator_->Malloc(kNumberTypeFloat, num_boxes_ * sizeof(float), kWorkspace); | |||
| MS_CHECK_PTR(params_->scores_); | |||
| params_->indexes_ = | |||
| allocator_->Malloc(kNumberTypeFloat, num_boxes_ * params_->num_classes_ * sizeof(int), kWorkspace); | |||
| MS_CHECK_PTR(params_->indexes_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DetectionPostProcessBaseCoder::DoCode(CoderContext *const context) { | |||
| Collect(context, {"nnacl/detection_post_process_parameter.h", "wrapper/base/detection_post_process_base_wrapper.h"}, | |||
| {"detection_post_process_fp32.c", "detection_post_process_base_wrapper.c"}); | |||
| Serializer code; | |||
| MS_CHECK_RET_CODE(GetInputData(context, &code), "GetInputData failed"); | |||
| Tensor *output_boxes = output_tensors_.at(0); | |||
| Tensor *output_classes = output_tensors_.at(1); | |||
| Tensor *output_scores = output_tensors_.at(2); | |||
| Tensor *output_num = output_tensors_.at(3); | |||
| code.CodeBaseStruct("DetectionPostProcessParameter", "params", params_->op_parameter_, params_->h_scale_, | |||
| params_->w_scale_, params_->x_scale_, params_->y_scale_, params_->nms_iou_threshold_, | |||
| params_->nms_score_threshold_, params_->max_detections_, params_->detections_per_class_, | |||
| params_->max_classes_per_detection_, params_->num_classes_, params_->use_regular_nms_, | |||
| params_->out_quantized_, params_->anchors_, params_->decoded_boxes_, params_->nms_candidate_, | |||
| params_->indexes_, params_->scores_, params_->all_class_indexes_, params_->all_class_scores_, | |||
| params_->single_class_indexes_, params_->selected_); | |||
| code.CodeFunction("DecodeBoxes", num_boxes_, input_boxes_, params_->anchors_, "¶ms"); | |||
| if (params_->use_regular_nms_) { | |||
| code.CodeFunction("DetectionPostProcessRegular", num_boxes_, num_classes_with_bg_, input_scores_, output_boxes, | |||
| output_classes, output_scores, output_num, "PartialArgSort", "¶ms"); | |||
| } else { | |||
| int task_id = 0; | |||
| int thread_num = 1; | |||
| code.CodeFunction("NmsMultiClassesFastCore", num_boxes_, num_classes_with_bg_, input_scores_, "PartialArgSort", | |||
| "¶ms", task_id, thread_num); | |||
| code.CodeFunction("DetectionPostProcessFast", num_boxes_, num_classes_with_bg_, input_scores_, | |||
| "(float *)(params.decoded_boxes_)", output_boxes, output_classes, output_scores, output_num, | |||
| "PartialArgSort", "¶ms"); | |||
| } | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite::micro | |||
| @@ -0,0 +1,54 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DETECTION_POST_PROCESS_BASE_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DETECTION_POST_PROCESS_BASE_CODER_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include <utility> | |||
| #include <memory> | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/detection_post_process_parameter.h" | |||
| #include "coder/opcoders/serializers/serializer.h" | |||
| namespace mindspore::lite::micro { | |||
| class DetectionPostProcessBaseCoder : public OperatorCoder { | |||
| public: | |||
| DetectionPostProcessBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| ~DetectionPostProcessBaseCoder() override = default; | |||
| int Prepare(CoderContext *const context) override; | |||
| int DoCode(CoderContext *const context) override; | |||
| protected: | |||
| int AllocateBuffer(); | |||
| virtual int GetInputData(CoderContext *const context, Serializer *const coder) = 0; | |||
| virtual int MallocInputsBuffer() = 0; | |||
| int num_boxes_{0}; | |||
| int num_classes_with_bg_{0}; | |||
| float *input_boxes_{nullptr}; | |||
| float *input_scores_{nullptr}; | |||
| DetectionPostProcessParameter *params_{nullptr}; | |||
| }; | |||
| } // namespace mindspore::lite::micro | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_DETECTION_POST_PROCESS_BASE_CODER_H_ | |||
| @@ -15,7 +15,7 @@ | |||
| */ | |||
| #include <string> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "micro/coder/opcoders/base/dtype_cast_coder.h" | |||
| #include "micro/coder/opcoders/serializers/serializer.h" | |||
| @@ -19,7 +19,7 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/int8/quant_dtype_cast_int8.h" | |||
| namespace mindspore::lite::micro { | |||
| @@ -14,10 +14,14 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/base/full_connection_base_coder.h" | |||
| #include "coder/opcoders/base/full_connection_base_coder.h" | |||
| namespace mindspore::lite::micro { | |||
| FullConnectionBaseCoder::~FullConnectionBaseCoder() { fc_param_ = nullptr; } | |||
| FullConnectionBaseCoder::~FullConnectionBaseCoder() { | |||
| fc_param_ = nullptr; | |||
| filter_tensor_ = nullptr; | |||
| bias_tensor_ = nullptr; | |||
| } | |||
| int FullConnectionBaseCoder::Init() { | |||
| this->fc_param_ = reinterpret_cast<MatMulParameter *>(parameter_); | |||
| @@ -18,7 +18,7 @@ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_BASE_FULLY_CONNECTED_BASE_CODER_H_ | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/matmul_parameter.h" | |||
| namespace mindspore::lite::micro { | |||
| @@ -29,7 +29,8 @@ class FullConnectionBaseCoder : public OperatorCoder { | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| ~FullConnectionBaseCoder() override; | |||
| int Init(); | |||
| virtual int Init(); | |||
| protected: | |||
| MatMulParameter *fc_param_{nullptr}; | |||
| @@ -14,61 +14,72 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include <string> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "micro/coder/opcoders/base/quant_dtype_cast_coder.h" | |||
| #include "micro/coder/opcoders/serializers/serializer.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/base/quant_dtype_cast_coder.h" | |||
| #include "coder/opcoders/serializers/serializer.h" | |||
| #include "coder/utils/type_cast.h" | |||
| using mindspore::schema::PrimitiveType_QuantDTypeCast; | |||
| namespace mindspore::lite::micro { | |||
| int QuantDTypeCastCoder::Prepare(CoderContext *const context) { | |||
| this->cast_param_ = reinterpret_cast<QuantDTypeCastParameter *>(parameter_); | |||
| if (cast_param_->srcT == kNumberTypeFloat32 && cast_param_->dstT == kNumberTypeInt8) { | |||
| if (input_tensor_->data_type() != kNumberTypeFloat32 || output_tensor_->data_type() != kNumberTypeInt8) { | |||
| MS_LOG(ERROR) << "cast_param_ data type and tensor data type do not match."; | |||
| return RET_ERROR; | |||
| } | |||
| inverse_ = false; | |||
| } else if (cast_param_->srcT == kNumberTypeInt8 && cast_param_->dstT == kNumberTypeFloat32) { | |||
| if (input_tensor_->data_type() != kNumberTypeInt8 || output_tensor_->data_type() != kNumberTypeFloat32) { | |||
| MS_LOG(ERROR) << "cast_param_ data type and tensor data type do not match."; | |||
| return RET_ERROR; | |||
| } | |||
| inverse_ = true; | |||
| } else { | |||
| MS_LOG(ERROR) << "cast_param_ data type not supported:" | |||
| << " src: " << cast_param_->srcT << " dst: " << cast_param_->dstT; | |||
| return RET_PARAM_INVALID; | |||
| auto *param = reinterpret_cast<QuantDTypeCastParameter *>(parameter_); | |||
| if (input_tensor_->data_type() != static_cast<TypeId>(param->srcT) || | |||
| output_tensor_->data_type() != static_cast<TypeId>(param->dstT)) { | |||
| MS_LOG(ERROR) << "param data type not supported:" | |||
| << " src: " << param->srcT << " dst: " << param->dstT; | |||
| return RET_ERROR; | |||
| } | |||
| src_dtype = static_cast<TypeId>(param->srcT); | |||
| dst_dtype = static_cast<TypeId>(param->dstT); | |||
| return RET_OK; | |||
| } | |||
| int QuantDTypeCastCoder::DoCode(CoderContext *const context) { | |||
| // get quant params | |||
| QuantArg in_quant_arg = input_tensor_->quant_params().at(0); | |||
| // single thread for now | |||
| if (input_tensor_->quant_params().empty() && output_tensor_->quant_params().empty()) { | |||
| MS_LOG(ERROR) << "QuantDTypeCast need quantization parameters which is not found."; | |||
| return RET_ERROR; | |||
| } | |||
| auto quant_arg = (!output_tensor_->quant_params().empty() && output_tensor_->quant_params().at(0).inited) | |||
| ? output_tensor_->quant_params().at(0) | |||
| : input_tensor_->quant_params().at(0); | |||
| int num_unit_thread = input_tensor_->ElementsNum(); | |||
| // generate code .h .c | |||
| Collect(context, {"nnacl/int8/quant_dtype_cast_int8.h"}, {"quant_dtype_cast_int8.c"}); | |||
| Serializer code; | |||
| code.precision(kPrecision); | |||
| std::string function = inverse_ ? "DoDequantizeInt8ToFp32" : "DoQuantizeFp32ToInt8"; | |||
| code.CodeFunction(function, input_tensor_, output_tensor_, in_quant_arg.scale, in_quant_arg.zeroPoint, | |||
| num_unit_thread); | |||
| if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) { | |||
| code.CodeFunction("DoDequantizeInt8ToFp32", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint, | |||
| num_unit_thread); | |||
| } else if (src_dtype == TypeId::kNumberTypeFloat32 && dst_dtype == TypeId::kNumberTypeInt8) { | |||
| bool from_uint8_src = false; | |||
| if (quant_arg.dstDtype == TypeId::kNumberTypeUInt8) { | |||
| from_uint8_src = true; | |||
| } | |||
| code.CodeFunction("DoQuantizeFp32ToInt8", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint, | |||
| num_unit_thread, from_uint8_src); | |||
| } else if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeUInt8) { | |||
| code.CodeFunction("Int8ToUInt8", input_tensor_, output_tensor_, num_unit_thread); | |||
| } else if (src_dtype == TypeId::kNumberTypeUInt8 && dst_dtype == TypeId::kNumberTypeFloat32) { | |||
| code.CodeFunction("DoDequantizeUInt8ToFp32", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint, | |||
| num_unit_thread); | |||
| } else if (src_dtype == TypeId::kNumberTypeFloat32 && dst_dtype == TypeId::kNumberTypeUInt8) { | |||
| code.CodeFunction("DoQuantizeFp32ToUInt8", input_tensor_, output_tensor_, quant_arg.scale, quant_arg.zeroPoint, | |||
| num_unit_thread); | |||
| } else if (src_dtype == TypeId::kNumberTypeUInt8 && dst_dtype == TypeId::kNumberTypeInt8) { | |||
| code.CodeFunction("UInt8ToInt8", input_tensor_, output_tensor_, num_unit_thread); | |||
| } else { | |||
| MS_LOG(INFO) << "unsupported type cast, src: " << EnumNameDataType(src_dtype) | |||
| << ", dst: " << EnumNameDataType(dst_dtype); | |||
| return RET_ERROR; | |||
| } | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_QuantDTypeCast, | |||
| CPUOpCoderCreator<QuantDTypeCastCoder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_QuantDTypeCast, CPUOpCoderCreator<QuantDTypeCastCoder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeUInt8, PrimitiveType_QuantDTypeCast, CPUOpCoderCreator<QuantDTypeCastCoder>) | |||
| } // namespace mindspore::lite::micro | |||
| @@ -19,7 +19,7 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/int8/quant_dtype_cast_int8.h" | |||
| namespace mindspore::lite::micro { | |||
| @@ -36,10 +36,8 @@ class QuantDTypeCastCoder final : public OperatorCoder { | |||
| int DoCode(CoderContext *const context) override; | |||
| private: | |||
| QuantDTypeCastParameter *cast_param_{nullptr}; | |||
| std::vector<Tensor *> inputs_; | |||
| std::vector<Tensor *> outputs_; | |||
| bool inverse_{false}; | |||
| TypeId src_dtype{kTypeUnknown}; | |||
| TypeId dst_dtype{kTypeUnknown}; | |||
| int thread_num_{0}; | |||
| int thread_n_num_{0}; | |||
| int thread_n_stride_{0}; | |||
| @@ -14,16 +14,16 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/base/reduce_base_coder.h" | |||
| #include "coder/opcoders/base/reduce_base_coder.h" | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| namespace mindspore::lite::micro { | |||
| namespace { | |||
| constexpr size_t kInputNum = 1; | |||
| constexpr size_t kOutputNum = 1; | |||
| } // namespace | |||
| int ReduceBaseCoder::CheckInputsOutputs() { | |||
| int ReduceBaseCoder::CheckInputsOutputs() const { | |||
| if (input_tensors_.size() < kInputNum) { | |||
| MS_LOG(ERROR) << "Reduce inputs size should be at least " << kInputNum << " but got " << input_tensors_.size(); | |||
| return RET_ERROR; | |||
| @@ -19,7 +19,7 @@ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/reduce_parameter.h" | |||
| namespace mindspore::lite::micro { | |||
| @@ -31,11 +31,10 @@ class ReduceBaseCoder : public OperatorCoder { | |||
| ~ReduceBaseCoder() override = default; | |||
| int Init(); | |||
| virtual int ReSize(); | |||
| virtual int Init(); | |||
| private: | |||
| int CheckInputsOutputs(); | |||
| int CheckInputsOutputs() const; | |||
| int CheckParameters(); | |||
| protected: | |||
| @@ -54,6 +53,7 @@ class ReduceBaseCoder : public OperatorCoder { | |||
| int outer_size_{0}; | |||
| int inner_size_{0}; | |||
| int axis_size_{0}; | |||
| virtual int ReSize(); | |||
| }; | |||
| } // namespace mindspore::lite::micro | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_BASE_CODER_H | |||
| @@ -0,0 +1,104 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/base/resize_base_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| namespace mindspore::lite::micro { | |||
| constexpr int kMaxInputNum = 2; | |||
| constexpr int kOutputNum = 1; | |||
| constexpr int kSingleNum = 1; | |||
| constexpr int kDoubleNum = 2; | |||
| constexpr int kQuadrupleNum = 4; | |||
| int ResizeBaseCoder::CheckParameters() { | |||
| auto parameter = reinterpret_cast<ResizeParameter *>(parameter_); | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "cast ResizeParameter failed."; | |||
| return RET_NULL_PTR; | |||
| } | |||
| method_ = parameter->method_; | |||
| if (method_ != static_cast<int>(schema::ResizeMethod_LINEAR) && | |||
| method_ != static_cast<int>(schema::ResizeMethod_NEAREST)) { | |||
| MS_LOG(ERROR) << "Resize method should be bilinear or nearest_neighbor, but got " << method_; | |||
| return RET_INVALID_OP_ATTR; | |||
| } | |||
| if (this->input_tensors_.size() == kSingleNum) { | |||
| new_height_ = parameter->new_height_; | |||
| if (new_height_ < 1) { | |||
| MS_LOG(ERROR) << "Resize new_height should >= 1, but got " << new_height_; | |||
| return RET_INVALID_OP_ATTR; | |||
| } | |||
| new_width_ = parameter->new_width_; | |||
| if (new_width_ < 1) { | |||
| MS_LOG(ERROR) << "Resize new_width should >= 1, but got " << new_width_; | |||
| return RET_INVALID_OP_ATTR; | |||
| } | |||
| } else if (this->input_tensors_.size() == kDoubleNum) { | |||
| auto out_shape = this->input_tensors_.at(1)->data_c(); | |||
| if (out_shape == nullptr) { | |||
| MS_LOG(INFO) << "Out shape is not assigned"; | |||
| const_shape_ = false; | |||
| } else { | |||
| const_shape_ = true; | |||
| } | |||
| } | |||
| coordinate_transform_mode_ = parameter->coordinate_transform_mode_; | |||
| preserve_aspect_ratio_ = parameter->preserve_aspect_ratio_; | |||
| if (preserve_aspect_ratio_) { | |||
| MS_LOG(ERROR) << "Resize currently not support preserve_aspect_ratio true"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeBaseCoder::CheckInputsOuputs() { | |||
| if (input_tensors_.size() <= kQuadrupleNum) { | |||
| if (std::any_of(input_tensors_.begin(), input_tensors_.end(), [](const Tensor *t) { return t == nullptr; })) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Resize input num should be no more than" << kMaxInputNum << ", but got " << input_tensors_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| if (output_tensors_.size() != kOutputNum) { | |||
| MS_LOG(ERROR) << "Resize output num should be " << kOutputNum << ", but got " << output_tensors_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| auto output = output_tensors_.at(0); | |||
| if (output == nullptr) { | |||
| return RET_NULL_PTR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ResizeBaseCoder::Init() { | |||
| auto ret = CheckParameters(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| ret = CheckInputsOuputs(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| auto input_shape = input_tensor_->shape(); | |||
| if (!input_shape.empty() && input_shape.size() != COMM_SHAPE_SIZE) { | |||
| MS_LOG(ERROR) << "Resize op support input rank 4, got " << input_shape.size(); | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::lite::micro | |||
| @@ -0,0 +1,49 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_RESIZE_BASE_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_RESIZE_BASE_CODER_H_ | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/resize_parameter.h" | |||
| namespace mindspore::lite::micro { | |||
| class ResizeBaseCoder : public OperatorCoder { | |||
| public: | |||
| ResizeBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| ~ResizeBaseCoder() override = default; | |||
| int Init(); | |||
| protected: | |||
| int method_{0}; | |||
| int new_height_{0}; | |||
| int new_width_{0}; | |||
| int coordinate_transform_mode_{0}; | |||
| bool preserve_aspect_ratio_{false}; | |||
| bool const_shape_{false}; | |||
| private: | |||
| int CheckParameters(); | |||
| int CheckInputsOuputs(); | |||
| }; | |||
| } // namespace mindspore::lite::micro | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_RESIZE_BASE_CODER_H_ | |||
| @@ -13,7 +13,7 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/base/softmax_base_coder.h" | |||
| #include "coder/opcoders/base/softmax_base_coder.h" | |||
| #include <vector> | |||
| #include <type_traits> | |||
| @@ -19,14 +19,12 @@ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/softmax_parameter.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| namespace mindspore::lite::micro { | |||
| using std::string; | |||
| class SoftmaxBaseCoder : public OperatorCoder { | |||
| public: | |||
| SoftmaxBaseCoder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| @@ -17,13 +17,13 @@ | |||
| #include "coder/opcoders/cmsis-nn/int8/add_int8_coder.h" | |||
| #include <algorithm> | |||
| #include <limits> | |||
| #include "micro/coder/opcoders/serializers/serializer.h" | |||
| #include "coder/opcoders/serializers/serializer.h" | |||
| #include "nnacl/arithmetic.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| using mindspore::schema::PrimitiveType_Add; | |||
| using mindspore::schema::PrimitiveType_AddFusion; | |||
| namespace mindspore::lite::micro::cmsis { | |||
| @@ -85,5 +85,5 @@ int AddInt8Coder::DoCode(CoderContext *const context) { | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Add, CPUOpCoderCreator<AddInt8Coder>) | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_AddFusion, CPUOpCoderCreator<AddInt8Coder>) | |||
| } // namespace mindspore::lite::micro::cmsis | |||
| @@ -15,14 +15,13 @@ | |||
| */ | |||
| #include "coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.h" | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.h" | |||
| #include "coder/opcoders/serializers/serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "src/common/prim_util.h" | |||
| using mindspore::schema::PrimitiveType_Conv2D; | |||
| using mindspore::schema::PrimitiveType_Conv2DFusion; | |||
| namespace mindspore::lite::micro::cmsis { | |||
| @@ -40,13 +39,11 @@ int Conv2DInt8Coder::Prepare(CoderContext *const context) { | |||
| int Conv2DInt8Coder::DoCode(CoderContext *const context) { | |||
| Serializer code; | |||
| code.precision(kPrecision); | |||
| std::vector<string> h_files; | |||
| std::vector<string> c_files; | |||
| std::vector<std::string> h_files; | |||
| std::vector<std::string> c_files; | |||
| h_files.emplace_back("CMSIS/NN/Include/arm_nnfunctions.h"); | |||
| string buffer_str = "NULL"; | |||
| if (opt_ != Convolve_1x1_fast) { | |||
| buffer_str = allocator_->GetRuntimeAddr(buffer_); | |||
| code << " memset(" << buffer_str << ", 0, " << buffer_size_ << ");\n"; | |||
| code.CodeFunction("memset", buffer_, 0, buffer_size_); | |||
| } | |||
| code.CodeArray("output_shift", output_shift_, output_ch_); | |||
| code.CodeArray("output_mult", output_mult_, output_ch_); | |||
| @@ -57,7 +54,7 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) { | |||
| code.CodeFunction("arm_convolve_s8", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, filter_tensor_, | |||
| output_ch_, kernel_x_, kernel_y_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, | |||
| output_tensor_, "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_, | |||
| out_activation_max_, output_x_, output_y_, buffer_str); | |||
| out_activation_max_, output_x_, output_y_, buffer_); | |||
| break; | |||
| case Convolve_1_x_n: | |||
| c_files = {"arm_convolve_1_x_n_s8.c", "arm_nn_mat_mul_core_1x_s8.c"}; | |||
| @@ -65,7 +62,7 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) { | |||
| code.CodeFunction("arm_convolve_1_x_n_s8", input_tensor_, input_x_, input_ch_, input_batches_, filter_tensor_, | |||
| output_ch_, kernel_x_, pad_x_, stride_x_, bias_tensor_, output_tensor_, "output_shift", | |||
| "output_mult", out_offset_, input_offset_, out_activation_min_, out_activation_max_, output_x_, | |||
| buffer_str); | |||
| buffer_); | |||
| break; | |||
| case Convolve_1x1_fast: | |||
| c_files = {"arm_convolve_1x1_s8_fast.c", "arm_nn_mat_mult_nt_t_s8.c", "arm_nn_mat_mul_core_4x_s8.c", | |||
| @@ -74,7 +71,7 @@ int Conv2DInt8Coder::DoCode(CoderContext *const context) { | |||
| code.CodeFunction("arm_convolve_1x1_s8_fast", input_tensor_, input_x_, input_y_, input_ch_, input_batches_, | |||
| filter_tensor_, output_ch_, pad_x_, pad_y_, stride_x_, stride_y_, bias_tensor_, output_tensor_, | |||
| "output_shift", "output_mult", out_offset_, input_offset_, out_activation_min_, | |||
| out_activation_max_, output_x_, output_y_, buffer_str); | |||
| out_activation_max_, output_x_, output_y_, buffer_); | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "opt enum value is not defined"; | |||
| @@ -159,5 +156,20 @@ int Conv2DInt8Coder::InitTmpBuffer() { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Conv2D, CPUOpCoderCreator<Conv2DInt8Coder>) | |||
| std::unique_ptr<OperatorCoder> CmsisConv2DInt8OpCoderCreator(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, | |||
| Target target) { | |||
| MS_CHECK_PTR_RET_NULL(node); | |||
| int pt = GetPrimitiveType(node->primitive_); | |||
| if (pt != schema::PrimitiveType::PrimitiveType_Conv2DFusion) { | |||
| MS_LOG(ERROR) << "unmatched primitive type " << PrimitiveTypeName(pt); | |||
| return nullptr; | |||
| } | |||
| std::unique_ptr<Conv2DInt8Coder> coder = | |||
| std::make_unique<Conv2DInt8Coder>(in_tensors, out_tensors, node, node_index, target); | |||
| return coder; | |||
| } | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Conv2DFusion, CPUOpCoderCreator<Conv2DInt8Coder>) | |||
| } // namespace mindspore::lite::micro::cmsis | |||
| @@ -20,8 +20,6 @@ | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::lite::micro::cmsis { | |||
| int DWConvInt8Coder::Prepare(CoderContext *const context) { | |||
| @@ -153,6 +151,4 @@ int DWConvInt8Coder::InitTmpBuffer() { | |||
| return 0; | |||
| } | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_DepthwiseConv2D, CPUOpCoderCreator<DWConvInt8Coder>) | |||
| } // namespace mindspore::lite::micro::cmsis | |||
| @@ -19,8 +19,8 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "micro/coder/opcoders/base/full_connection_base_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/base/full_connection_base_coder.h" | |||
| #include "nnacl/int8/quantize.h" | |||
| namespace mindspore::lite::micro::cmsis { | |||
| class FullConnectionInt8Coder final : public FullConnectionBaseCoder { | |||
| @@ -20,7 +20,7 @@ | |||
| #include "nnacl/int8/quantize.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Mul; | |||
| using mindspore::schema::PrimitiveType_MulFusion; | |||
| namespace mindspore::lite::micro::cmsis { | |||
| @@ -69,5 +69,5 @@ int MulInt8Coder::DoCode(CoderContext *const context) { | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Mul, CPUOpCoderCreator<MulInt8Coder>) | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_MulFusion, CPUOpCoderCreator<MulInt8Coder>) | |||
| } // namespace mindspore::lite::micro::cmsis | |||
| @@ -20,7 +20,8 @@ | |||
| #include "coder/opcoders/serializers/serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Pooling; | |||
| using mindspore::schema::PrimitiveType_AvgPoolFusion; | |||
| using mindspore::schema::PrimitiveType_MaxPoolFusion; | |||
| namespace mindspore::lite::micro::cmsis { | |||
| int PoolingInt8Coder::Prepare(CoderContext *const context) { | |||
| @@ -39,14 +40,12 @@ int PoolingInt8Coder::Prepare(CoderContext *const context) { | |||
| int PoolingInt8Coder::DoCode(CoderContext *const context) { | |||
| // init struct PoolingParameters | |||
| std::string buffer_str = "NULL"; | |||
| std::string pooling_func; | |||
| std::vector<std::string> cFiles; | |||
| if (pooling_parameter_->pool_mode_ == PoolMode_AvgPool) { | |||
| cFiles = {"arm_avgpool_s8.c"}; | |||
| pooling_func = "arm_avgpool_s8"; | |||
| buffer_str = allocator_->GetRuntimeAddr(buffer_); | |||
| } else if (pooling_parameter_->pool_mode_ == PoolMode_MaxPool) { | |||
| cFiles = {"arm_max_pool_s8.c"}; | |||
| pooling_func = "arm_max_pool_s8"; | |||
| @@ -59,11 +58,9 @@ int PoolingInt8Coder::DoCode(CoderContext *const context) { | |||
| Serializer code; | |||
| code.precision(kPrecision); | |||
| code.CodeFunction(pooling_func, "&nn_context", "&pool_params", "&input_dims", input_tensor_, "&filter_dims", | |||
| "&output_dims", output_tensor_); | |||
| code.CodeFunction(pooling_func, dim_src_height_, dim_src_width_, dim_dst_height_, dim_dst_width_, stride_height_, | |||
| stride_width_, dim_kernel_height_, dim_kernel_width_, padding_height_, padding_width_, act_min_, | |||
| act_max_, ch_src_, input_tensor_, buffer_str, output_tensor_); | |||
| act_max_, ch_src_, input_tensor_, buffer_, output_tensor_); | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| @@ -97,6 +94,7 @@ int PoolingInt8Coder::SetParameters() { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Pooling, CPUOpCoderCreator<PoolingInt8Coder>) | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingInt8Coder>) | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_MaxPoolFusion, CPUOpCoderCreator<PoolingInt8Coder>) | |||
| } // namespace mindspore::lite::micro::cmsis | |||
| @@ -19,7 +19,7 @@ | |||
| #include "coder/opcoders/serializers/serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_SoftMax; | |||
| using mindspore::schema::PrimitiveType_Softmax; | |||
| namespace mindspore::lite::micro::cmsis { | |||
| int SoftMaxInt8Coder::Prepare(CoderContext *const context) { | |||
| @@ -76,6 +76,6 @@ int SoftMaxInt8Coder::DoCode(CoderContext *const context) { | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_SoftMax, CPUOpCoderCreator<SoftMaxInt8Coder>) | |||
| REG_OPERATOR_CODER(kARM32M, kNumberTypeInt8, PrimitiveType_Softmax, CPUOpCoderCreator<SoftMaxInt8Coder>) | |||
| } // namespace mindspore::lite::micro::cmsis | |||
| @@ -0,0 +1,143 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/nnacl/dequant/de_quant.h" | |||
| #include <string> | |||
| #include <vector> | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| static constexpr int kPerTensor = 1; | |||
| static constexpr size_t kPerBatch = 3; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| void Dequant::set_de_quant_buffer_str(const std::string &dequant_buffer_str) { | |||
| de_quant_buffer_str_ = "(float *)(" + dequant_buffer_str + ")"; | |||
| } | |||
| void Dequant::DequantRecordWorkspcae(size_t curr_workspace) { | |||
| de_quant_max_workspace_ = de_quant_max_workspace_ > curr_workspace ? de_quant_max_workspace_ : curr_workspace; | |||
| } | |||
| bool Dequant::CheckDequantFlag(const Tensor *weight_tensor) { | |||
| if (weight_tensor == nullptr) { | |||
| return false; | |||
| } | |||
| return !weight_tensor->quant_params().empty() && weight_tensor->quant_params().front().inited && | |||
| weight_tensor->data_c() != nullptr; | |||
| } | |||
| void Dequant::DeQuantFunctionPerChannel(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args, | |||
| const std::string &de_quant_arg_base_str, | |||
| NNaclFp32Serializer *const de_quant_code) { | |||
| int quant_arg_dims = static_cast<int>(quant_tensor->quant_params().size()); | |||
| int de_quant_nums = quant_tensor->ElementsNum(); | |||
| for (int i = 0; i < quant_arg_dims; ++i) { | |||
| auto de_quant_arg = de_quant_args.at(i); | |||
| std::string de_quant_arg_str = de_quant_arg_base_str + std::to_string(i); | |||
| de_quant_code->CodeStruct(de_quant_arg_str, de_quant_arg); | |||
| } | |||
| std::string de_quant_args_name = "de_quant_args"; | |||
| *de_quant_code << "const DeQuantArg *" << de_quant_args_name << "[" << quant_arg_dims << "] = {\n"; | |||
| for (int i = 0; i < quant_arg_dims - 1; ++i) { | |||
| *de_quant_code << "&" << de_quant_arg_base_str << std::to_string(i) << ", "; | |||
| } | |||
| *de_quant_code << "&" << de_quant_arg_base_str << std::to_string(quant_arg_dims - 1); | |||
| *de_quant_code << "};\n"; | |||
| size_t per_batch_size = quant_tensor->shape().at(0); | |||
| std::string quant_tensor_addr_str = "(int8_t *)(" + quant_tensor_addr_ + ")"; | |||
| de_quant_code->CodeFunction("DequantDataPerChannel", quant_tensor_addr_str, de_quant_args_name, de_quant_nums, | |||
| per_batch_size, de_quant_buffer_str_); | |||
| } | |||
| void Dequant::DeQuantFunction(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args, | |||
| const std::string &de_quant_arg_base_str, NNaclFp32Serializer *const de_quant_code) { | |||
| int quant_arg_dims = static_cast<int>(quant_tensor->quant_params().size()); | |||
| int de_quant_nums = quant_tensor->ElementsNum(); | |||
| for (int i = 0; i < quant_arg_dims; ++i) { | |||
| auto de_quant_arg = de_quant_args.at(i); | |||
| std::string de_quant_arg_str = de_quant_arg_base_str + std::to_string(i); | |||
| de_quant_code->CodeStruct(de_quant_arg_str, de_quant_arg); | |||
| } | |||
| std::string de_quant_args_name = "de_quant_args"; | |||
| *de_quant_code << "const DeQuantArg *" << de_quant_args_name << "[" << quant_arg_dims << "] = {\n"; | |||
| for (int i = 0; i < quant_arg_dims - 1; ++i) { | |||
| *de_quant_code << "&" << de_quant_arg_base_str << std::to_string(i) << ", "; | |||
| } | |||
| *de_quant_code << "&" << de_quant_arg_base_str << std::to_string(quant_arg_dims - 1); | |||
| *de_quant_code << "};\n"; | |||
| auto channels = static_cast<size_t>(quant_tensor->Batch()); | |||
| std::string quant_tensor_addr_str = "(int8_t *)(" + quant_tensor_addr_ + ")"; | |||
| de_quant_code->CodeFunction("DequantData", quant_tensor_addr_str, de_quant_args_name, de_quant_nums, channels, | |||
| de_quant_buffer_str_); | |||
| } | |||
| void Dequant::DeQuantFunctionPerTensor(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args, | |||
| const std::string &de_quant_arg_base_str, | |||
| NNaclFp32Serializer *const de_quant_code) { | |||
| size_t de_quant_nums = quant_tensor->ElementsNum(); | |||
| auto de_quant_arg = de_quant_args.at(0); | |||
| std::string de_quant_arg_str = de_quant_arg_base_str + std::to_string(0); | |||
| de_quant_code->CodeStruct(de_quant_arg_str, de_quant_arg); | |||
| std::string de_quant_args_name = "de_quant_args"; | |||
| *de_quant_code << "const DeQuantArg *" << de_quant_args_name << "[" << 1 << "] = {\n"; | |||
| *de_quant_code << "&" << de_quant_arg_base_str << std::to_string(0); | |||
| *de_quant_code << "};\n"; | |||
| std::string quant_tensor_addr_str = "(int8_t *)(" + quant_tensor_addr_ + ")"; | |||
| de_quant_code->CodeFunction("DequantDataPerTensor", quant_tensor_addr_str, de_quant_args_name, de_quant_nums, | |||
| de_quant_buffer_str_); | |||
| } | |||
| std::string Dequant::GetMicroDeQuantFunction(const Tensor *quant_tensor, const std::string &quant_tensor_addr) { | |||
| std::string de_quant_block; | |||
| if (quant_tensor == nullptr || de_quant_buffer_str_.empty()) { | |||
| return de_quant_block; | |||
| } | |||
| quant_tensor_addr_ = quant_tensor_addr; | |||
| size_t de_quant_nums = quant_tensor->ElementsNum(); | |||
| size_t quant_arg_dims = quant_tensor->quant_params().size(); | |||
| DequantRecordWorkspcae(static_cast<size_t>(de_quant_nums * sizeof(float))); | |||
| NNaclFp32Serializer de_quant_code; | |||
| de_quant_code << "{\n"; | |||
| size_t quant_tensor_dims = quant_tensor->shape().size(); | |||
| std::vector<DeQuantArg> de_quant_args; | |||
| std::string de_quant_arg_base_str = "de_quant_arg_"; | |||
| for (size_t i = 0; i < quant_arg_dims; ++i) { | |||
| auto curr_quant_param = quant_tensor->quant_params().at(i); | |||
| DeQuantArg de_quant_arg = { | |||
| .scale = static_cast<float>(curr_quant_param.scale), | |||
| .zeroPoint = curr_quant_param.zeroPoint, | |||
| .var_corr = curr_quant_param.var_corr, | |||
| .mean_corr = curr_quant_param.mean_corr, | |||
| // this clusters is meaningless which will be supported in future | |||
| .clusters = {}, | |||
| .clusters_nums = static_cast<int>(curr_quant_param.clusters.size()), | |||
| .bitNum = quant_tensor->quant_params().at(i).bitNum, | |||
| }; | |||
| de_quant_args.emplace_back(de_quant_arg); | |||
| } | |||
| de_quant_code.CodeFunction("memset", de_quant_buffer_str_, 0, de_quant_nums * sizeof(float)); | |||
| if (quant_tensor_dims == kPerBatch && quant_arg_dims == static_cast<size_t>(quant_tensor->shape().at(0))) { | |||
| DeQuantFunctionPerChannel(quant_tensor, de_quant_args, de_quant_arg_base_str, &de_quant_code); | |||
| } else if (quant_arg_dims != kPerTensor) { | |||
| DeQuantFunction(quant_tensor, de_quant_args, de_quant_arg_base_str, &de_quant_code); | |||
| } else { | |||
| DeQuantFunctionPerTensor(quant_tensor, de_quant_args, de_quant_arg_base_str, &de_quant_code); | |||
| } | |||
| de_quant_code << "}\n"; | |||
| de_quant_block = de_quant_code.str(); | |||
| return de_quant_block; | |||
| } | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -0,0 +1,63 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MICRO_LITE_MICRO_CODER_OPCODERS_NNACL_DEQUANT_DEQUANT_H_ | |||
| #define MICRO_LITE_MICRO_CODER_OPCODERS_NNACL_DEQUANT_DEQUANT_H_ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "src/tensor.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class Dequant { | |||
| public: | |||
| Dequant(const Dequant &) = delete; | |||
| Dequant &operator=(const Dequant &) = delete; | |||
| static Dequant *GetInstance() { | |||
| static Dequant dequant; | |||
| return &dequant; | |||
| } | |||
| void set_de_quant_buffer_str(const std::string &de_quant_buffer_str); | |||
| const size_t de_quant_max_workspace() const { return de_quant_max_workspace_; } | |||
| const std::string de_quant_buffer_str() const { return de_quant_buffer_str_; } | |||
| bool CheckDequantFlag(const Tensor *quant_tensor); | |||
| std::string GetMicroDeQuantFunction(const Tensor *quant_tensor, const std::string &quant_tensor_addr); | |||
| private: | |||
| void DeQuantFunctionPerTensor(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args, | |||
| const std::string &de_quant_arg_base_str, NNaclFp32Serializer *de_quant_code); | |||
| void DeQuantFunction(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args, | |||
| const std::string &de_quant_arg_base_str, NNaclFp32Serializer *de_quant_code); | |||
| void DeQuantFunctionPerChannel(const Tensor *quant_tensor, const std::vector<DeQuantArg> &de_quant_args, | |||
| const std::string &de_quant_arg_base_str, NNaclFp32Serializer *de_quant_code); | |||
| Dequant() = default; | |||
| ~Dequant() = default; | |||
| void DequantRecordWorkspcae(size_t curr_workspace); | |||
| std::string de_quant_buffer_str_; | |||
| std::string quant_tensor_addr_; | |||
| size_t de_quant_max_workspace_{0}; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MICRO_LITE_MICRO_CODER_OPCODERS_NNACL_DEQUANT_DEQUANT_H_ | |||
| @@ -13,12 +13,12 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/activation_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/activation_fp32_coder.h" | |||
| #include <string> | |||
| #include "nnacl/fp32/activation_fp32.h" | |||
| #include "nnacl/op_base.h" | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Activation; | |||
| @@ -34,9 +34,9 @@ int ActivationFP32Coder::DoCode(CoderContext *const context) { | |||
| int count = MSMIN(stride, length - stride * task_id); | |||
| if (activation_parameter->type_ == schema::ActivationType_SIGMOID) { | |||
| Collect(context, {"runtime/kernel/fp32/sigmoid.h"}, {"sigmoid.c"}); | |||
| Collect(context, {"runtime/kernel/fp32/sigmoid_fp32.h"}, {"sigmoid_fp32.c"}); | |||
| } else { | |||
| Collect(context, {"nnacl/fp32/activation.h"}, {"activation.c"}); | |||
| Collect(context, {"nnacl/fp32/activation_fp32.h"}, {"activation_fp32.c"}); | |||
| } | |||
| NNaclFp32Serializer code; | |||
| switch (activation_parameter->type_) { | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/addn_fp32_coder.h" | |||
| #include <string> | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_AddN; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -28,15 +28,12 @@ int AddNFP32Coder::DoCode(CoderContext *const context) { | |||
| int elements_num = input0->ElementsNum(); | |||
| // Get Tensor Pointer | |||
| std::string input0_str = allocator_->GetRuntimeAddr(input0); | |||
| std::string input1_str = allocator_->GetRuntimeAddr(input1); | |||
| Collect(context, {"nnacl/kernel/fp32/add_fp32_slim.h"}, {"add_fp32_slim.c"}); | |||
| Collect(context, {"nnacl/kernel/fp32/add_fp32.h"}, {"add_fp32.c"}); | |||
| NNaclFp32Serializer code; | |||
| code.CodeFunction("ElementAdd", input0_str, input1_str, output_tensor_, elements_num); | |||
| code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num); | |||
| if (input_tensors_.size() > 2) { | |||
| for (size_t i = 2; i < input_tensors_.size(); ++i) { | |||
| std::string input_str = allocator_->GetRuntimeAddr(input_tensors_.at(i)); | |||
| code.CodeFunction("ElementAdd", input_str, output_tensor_, elements_num); | |||
| code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num); | |||
| } | |||
| } | |||
| context->AppendCode(code.str()); | |||
| @@ -61,7 +61,7 @@ int ArithmeticFP32Coder::Init(CoderContext *const context) { | |||
| if (arithmetic_parameter_->in_elements_num0_ == 1 || arithmetic_parameter_->in_elements_num1_ == 1) { | |||
| switch (arithmetic_parameter_->op_parameter_.type_) { | |||
| case PrimitiveType_Mul: | |||
| case PrimitiveType_MulFusion: | |||
| switch (arithmetic_parameter_->activation_type_) { | |||
| case schema::ActivationType_RELU: | |||
| arithmetic_parameter_->broadcasting_ = false; | |||
| @@ -80,7 +80,7 @@ int ArithmeticFP32Coder::Init(CoderContext *const context) { | |||
| break; | |||
| } | |||
| break; | |||
| case PrimitiveType_Add: | |||
| case PrimitiveType_AddFusion: | |||
| switch (arithmetic_parameter_->activation_type_) { | |||
| case schema::ActivationType_RELU: | |||
| arithmetic_parameter_->broadcasting_ = false; | |||
| @@ -99,7 +99,7 @@ int ArithmeticFP32Coder::Init(CoderContext *const context) { | |||
| break; | |||
| } | |||
| break; | |||
| case PrimitiveType_Sub: | |||
| case PrimitiveType_SubFusion: | |||
| switch (arithmetic_parameter_->activation_type_) { | |||
| case schema::ActivationType_RELU: | |||
| arithmetic_parameter_->broadcasting_ = false; | |||
| @@ -157,7 +157,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) { | |||
| } | |||
| arithmetic_parameter_ = reinterpret_cast<ArithmeticParameter *>(parameter_); | |||
| std::map<int, std::function<void()>> type_setters = { | |||
| {PrimitiveType_Mul, | |||
| {PrimitiveType_MulFusion, | |||
| [this]() { | |||
| switch (arithmetic_parameter_->activation_type_) { | |||
| case schema::ActivationType_RELU: | |||
| @@ -174,7 +174,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) { | |||
| break; | |||
| } | |||
| }}, | |||
| {PrimitiveType_Add, | |||
| {PrimitiveType_AddFusion, | |||
| [this]() { | |||
| switch (arithmetic_parameter_->activation_type_) { | |||
| case schema::ActivationType_RELU: | |||
| @@ -191,7 +191,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) { | |||
| break; | |||
| } | |||
| }}, | |||
| {PrimitiveType_Sub, | |||
| {PrimitiveType_SubFusion, | |||
| [this]() { | |||
| switch (arithmetic_parameter_->activation_type_) { | |||
| case schema::ActivationType_RELU: | |||
| @@ -205,7 +205,7 @@ int ArithmeticFP32Coder::Prepare(CoderContext *const context) { | |||
| break; | |||
| } | |||
| }}, | |||
| {PrimitiveType_Div, | |||
| {PrimitiveType_DivFusion, | |||
| [this]() { | |||
| switch (arithmetic_parameter_->activation_type_) { | |||
| case schema::ActivationType_RELU: | |||
| @@ -275,15 +275,16 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) { | |||
| * this solution is not suitable for micro, for the size of package. | |||
| * */ | |||
| if (arithmetic_opt_run_ == "ElementOptSub" || arithmetic_run_ == "ElementSub") { | |||
| Collect(context, {"nnacl/kernel/fp32/sub.h"}, {"sub.c"}); | |||
| Collect(context, {"nnacl/fp32/sub_fp32.h"}, {"sub_fp32.c"}); | |||
| } else if (arithmetic_opt_run_ == "ElementOptAdd" || arithmetic_run_ == "ElementAdd") { | |||
| Collect(context, {"nnacl/kernel/fp32/add_fp32_slim.h"}, {"add_fp32_slim.c"}); | |||
| Collect(context, {"nnacl/fp32/add_fp32.h"}, {"add_fp32.c"}); | |||
| } else if (arithmetic_opt_run_ == "ElementOptMul" || arithmetic_run_ == "ElementMul") { | |||
| Collect(context, {"nnacl/kernel/fp32/mul.h"}, {"mul.c"}); | |||
| Collect(context, {"nnacl/fp32/mul_fp32.h"}, {"mul_fp32.c"}); | |||
| } else if (arithmetic_run_ == "ElementAddRelu") { | |||
| Collect(context, {"nnacl/kernel/fp32/add_relu.h"}, {"add_relu.c"}); | |||
| Collect(context, {"nnacl/fp32/add_relu_fp32.h"}, {"add_relu_fp32.c"}); | |||
| } else { | |||
| Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic.h"}, {"arithmetic_common.c", "arithmetic.c"}); | |||
| Collect(context, {"nnacl/arithmetic_common.h", "nnacl/fp32/arithmetic_fp32.h"}, | |||
| {"arithmetic_common.c", "arithmetic_fp32.c"}); | |||
| } | |||
| if (arithmetic_parameter_->broadcasting_) { | |||
| @@ -330,15 +331,15 @@ int ArithmeticFP32Coder::DoCode(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_Add, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt32, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Mul, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_MulFusion, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Add, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_AddFusion, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Sub, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_SubFusion, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Div, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_DivFusion, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_LogicalAnd, CPUOpCoderCreator<ArithmeticFP32Coder>) | |||
| @@ -25,9 +25,9 @@ | |||
| #define DEFAULT_ARITHMETIC_NDIMS 10 | |||
| namespace mindspore::lite::micro::nnacl { | |||
| using mindspore::schema::PrimitiveType_Add; | |||
| using mindspore::schema::PrimitiveType_AddFusion; | |||
| using mindspore::schema::PrimitiveType_Div; | |||
| using mindspore::schema::PrimitiveType_DivFusion; | |||
| using mindspore::schema::PrimitiveType_Equal; | |||
| @@ -51,7 +51,7 @@ using mindspore::schema::PrimitiveType_Maximum; | |||
| using mindspore::schema::PrimitiveType_Minimum; | |||
| using mindspore::schema::PrimitiveType_Mul; | |||
| using mindspore::schema::PrimitiveType_MulFusion; | |||
| using mindspore::schema::PrimitiveType_NotEqual; | |||
| @@ -59,7 +59,7 @@ using mindspore::schema::PrimitiveType_RealDiv; | |||
| using mindspore::schema::PrimitiveType_SquaredDifference; | |||
| using mindspore::schema::PrimitiveType_Sub; | |||
| using mindspore::schema::PrimitiveType_SubFusion; | |||
| using mindspore::schema::PrimitiveType_Eltwise; | |||
| @@ -27,7 +27,7 @@ namespace mindspore::lite::micro::nnacl { | |||
| using mindspore::schema::PrimitiveType_Abs; | |||
| using mindspore::schema::PrimitiveType_Add; | |||
| using mindspore::schema::PrimitiveType_AddFusion; | |||
| using mindspore::schema::PrimitiveType_AddN; | |||
| @@ -37,7 +37,7 @@ using mindspore::schema::PrimitiveType_Ceil; | |||
| using mindspore::schema::PrimitiveType_Cos; | |||
| using mindspore::schema::PrimitiveType_Div; | |||
| using mindspore::schema::PrimitiveType_DivFusion; | |||
| using mindspore::schema::PrimitiveType_Equal; | |||
| @@ -67,7 +67,7 @@ using mindspore::schema::PrimitiveType_Maximum; | |||
| using mindspore::schema::PrimitiveType_Minimum; | |||
| using mindspore::schema::PrimitiveType_Mul; | |||
| using mindspore::schema::PrimitiveType_MulFusion; | |||
| using mindspore::schema::PrimitiveType_NotEqual; | |||
| @@ -81,7 +81,7 @@ using mindspore::schema::PrimitiveType_Sqrt; | |||
| using mindspore::schema::PrimitiveType_SquaredDifference; | |||
| using mindspore::schema::PrimitiveType_Sub; | |||
| using mindspore::schema::PrimitiveType_SubFusion; | |||
| using mindspore::schema::PrimitiveType_Sin; | |||
| @@ -14,10 +14,10 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/assign_add_fp32_coder.h" | |||
| #include <string> | |||
| #include "schema/inner/ops_generated.h" | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -17,7 +17,6 @@ | |||
| #include <string> | |||
| #include <vector> | |||
| #include "nnacl/fp32/batchnorm_fp32.h" | |||
| #include "src/ops/batch_norm.h" | |||
| #include "nnacl/op_base.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| @@ -27,10 +26,7 @@ using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int BatchnormFP32Coder::Init() { | |||
| auto bn_parameter = reinterpret_cast<BatchNormParameter *>(parameter_); | |||
| auto bn_prim = reinterpret_cast<const mindspore::lite::BatchNorm *>(OperatorCoder::primitive()); | |||
| bn_parameter->epsilon_ = bn_prim->GetEpsilon(); | |||
| auto bn_parameter = reinterpret_cast<BatchNormParameter *>(OperatorCoder::parameter_); | |||
| std::vector<int> input_shapes = input_tensor_->shape(); | |||
| if (input_shapes.empty()) { | |||
| return RET_ERROR; | |||
| @@ -41,7 +37,9 @@ int BatchnormFP32Coder::Init() { | |||
| for (int i = 0; i < n_dim - 1; i++) { | |||
| bn_parameter->unit_ *= input_shapes.at(i); | |||
| } | |||
| bn_parameter->op_parameter_.thread_num_ = MSMIN(bn_parameter->op_parameter_.thread_num_, bn_parameter->unit_); | |||
| if (default_momentum_ < 0.0f) { | |||
| default_momentum_ = bn_parameter->momentum_; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -59,7 +57,7 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) { | |||
| Collect(context, {"nnacl/fp32/batchnorm.h"}, {"nnacl/fp32/batchnorm.c"}); | |||
| NNaclFp32Serializer code; | |||
| code.CodeStruct("bn_parameter", *bn_parameter); | |||
| code.CodeFunction("BatchNorm", output_tensor_, input_tensor_, mean_tensor, var_tensor, task_id, "&bn_parameter"); | |||
| code.CodeFunction("BatchNormFp32", input_tensor_, mean_tensor, var_tensor, "&bn_parameter", task_id, output_tensor_); | |||
| MS_LOG(INFO) << "BatchnormFP32Code has been called"; | |||
| context->AppendCode(code.str()); | |||
| return lite::RET_OK; | |||
| @@ -36,6 +36,12 @@ class BatchnormFP32Coder final : public OperatorCoder { | |||
| private: | |||
| int Init(); | |||
| float default_momentum_{-1.0f}; | |||
| float *mean_{nullptr}; | |||
| float *variance_{nullptr}; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -0,0 +1,77 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/nnacl/fp32/biasadd_fp32_coder.h" | |||
| #include <string> | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| using mindspore::schema::PrimitiveType_BiasAdd; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int BiasAddFP32Coder::Prepare(CoderContext *context) { | |||
| arithmetic_parameter_ = reinterpret_cast<ArithmeticParameter *>(parameter_); | |||
| size_t data_size = input_tensors_.at(0)->ElementsNum(); | |||
| tile_in_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, data_size * sizeof(float), kWorkspace)); | |||
| tile_bias_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, data_size * sizeof(float), kWorkspace)); | |||
| return RET_OK; | |||
| } | |||
| int BiasAddFP32Coder::DoCode(CoderContext *ctx) { | |||
| if (input_tensors_.size() < kBiasIndex) { | |||
| return RET_ERROR; | |||
| } | |||
| size_t data_size = input_tensor_->ElementsNum(); | |||
| std::string bias_str = allocator_->GetRuntimeAddr(input_tensors_.at(kWeightIndex)); | |||
| Collect(ctx, | |||
| {"nnacl/arithmetic.h", "nnacl/nnacl_utils.h", "nnacl/nnacl_common.h", "nnacl/base/arithmetic_base.h", | |||
| "nnacl/fp32/add_fp32.h", "nnacl/fp32/arithmetic_fp32.h"}, | |||
| {"arithmetic_base.c", "arithmetic_fp32.c", "add_fp32.c"}); | |||
| nnacl::NNaclFp32Serializer code; | |||
| std::vector<int> dims = input_tensor_->shape(); | |||
| arithmetic_parameter_->broadcasting_ = false; | |||
| arithmetic_parameter_->ndim_ = dims.size(); | |||
| arithmetic_parameter_->activation_type_ = 0; | |||
| for (size_t i = 0; i < dims.size(); i++) { | |||
| arithmetic_parameter_->in_shape0_[i] = dims[i]; | |||
| } | |||
| arithmetic_parameter_->in_elements_num0_ = 0; | |||
| for (size_t i = 0; i < dims.size(); i++) { | |||
| if (i == dims.size() - 1) { | |||
| arithmetic_parameter_->in_shape1_[i] = dims[dims.size() - 1]; | |||
| continue; | |||
| } | |||
| arithmetic_parameter_->in_shape1_[i] = 1; | |||
| } | |||
| arithmetic_parameter_->in_elements_num1_ = 0; | |||
| for (size_t i = 0; i < dims.size(); i++) { | |||
| arithmetic_parameter_->out_shape_[i] = dims[i]; | |||
| } | |||
| arithmetic_parameter_->out_elements_num_ = 0; | |||
| // other rest elements is not sure | |||
| code.CodeStruct("arith_param", *arithmetic_parameter_); | |||
| code.CodeFunction("BroadcastAdd", input_tensor_, bias_str, tile_in_, tile_bias_, output_tensor_, data_size, | |||
| "(ArithmeticParameter *)&arith_param"); | |||
| ctx->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_BiasAdd, CPUOpCoderCreator<BiasAddFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -0,0 +1,43 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_BIASADD_FP32_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_BIASADD_FP32_CODER_H_ | |||
| #include <vector> | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/arithmetic.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class BiasAddFP32Coder final : public OperatorCoder { | |||
| public: | |||
| BiasAddFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| ~BiasAddFP32Coder() override = default; | |||
| int Prepare(CoderContext *context) override; | |||
| int DoCode(CoderContext *context) override; | |||
| private: | |||
| ArithmeticParameter *arithmetic_parameter_{nullptr}; | |||
| float *tile_in_{nullptr}; | |||
| float *tile_bias_{nullptr}; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_BIASADD_FP32_CODER_H_ | |||
| @@ -14,13 +14,12 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h" | |||
| #include <string> | |||
| #include "micro/coder/log.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int ConvolutionDepthwiseFP32Coder::Prepare(CoderContext *const context) { | |||
| Conv2DBaseCoder::Init(); | |||
| @@ -73,6 +72,4 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_DepthwiseConv2D, | |||
| CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -18,7 +18,7 @@ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_DEPTHWISE_FP32_CODER_H_ | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -14,17 +14,21 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/convolution_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/convolution_fp32_coder.h" | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.h" | |||
| #include "nnacl/fp32/winograd_utils.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "micro/coder/log.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| #include "src/common/prim_util.h" | |||
| #include "src/common/version_manager.h" | |||
| #include "coder/opcoders/nnacl/dequant/de_quant.h" | |||
| using mindspore::schema::PrimitiveType_Conv2D; | |||
| using mindspore::schema::PrimitiveType_Conv2DFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int ConvolutionFP32Coder::InitTmpBuffer() { | |||
| int in_channel = conv_param_->input_channel_; | |||
| @@ -43,20 +47,16 @@ int ConvolutionFP32Coder::InitTmpBuffer() { | |||
| } | |||
| int ConvolutionFP32Coder::Prepare(CoderContext *const context) { | |||
| int ret = Conv2DBaseCoder::Init(); | |||
| MS_CHECK_RET_CODE(ret, "Conv2DBaseCoder::Init() failed."); | |||
| ret = InitWeightBias(context); | |||
| MS_CHECK_RET_CODE(ret, "Init weight bias failed."); | |||
| MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed."); | |||
| de_quant_flag_ = Dequant::GetInstance()->CheckDequantFlag(filter_tensor_); | |||
| MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed."); | |||
| return Resize(); | |||
| } | |||
| int ConvolutionFP32Coder::Resize() { | |||
| int ret = Conv2DBaseCoder::CheckResizeValid(); | |||
| MS_CHECK_RET_CODE(ret, "Resize is invalid."); | |||
| ret = Conv2DBaseCoder::Init(); | |||
| MS_CHECK_RET_CODE(ret, "init failed."); | |||
| ret = InitTmpBuffer(); | |||
| MS_CHECK_RET_CODE(ret, "init tmp buffer failed."); | |||
| MS_CHECK_RET_CODE(Conv2DBaseCoder::CheckResizeValid(), "Resize is invalid."); | |||
| MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "init failed."); | |||
| MS_CHECK_RET_CODE(InitTmpBuffer(), "init tmp buffer failed."); | |||
| return RET_OK; | |||
| } | |||
| @@ -71,36 +71,43 @@ int ConvolutionFP32Coder::InitWeightBias(CoderContext *const context) { | |||
| const int oc_block = C8NUM; | |||
| int oc_block_num = UP_DIV(out_channel, C8NUM); | |||
| int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane; | |||
| pack_weight_size_ = pack_weight_size * sizeof(float); | |||
| auto origin_weight = reinterpret_cast<float *>(filter_tensor_->MutableData()); | |||
| MS_CHECK_PTR(origin_weight); | |||
| packed_weight_ = reinterpret_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat32, pack_weight_size * sizeof(float), kOnlinePackWeight)); | |||
| packed_weight_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | |||
| MS_CHECK_PTR(packed_weight_); | |||
| auto out_channel_size = static_cast<size_t>(out_channel); | |||
| NNaclFp32Serializer code; | |||
| code.CodeMallocExpression(packed_weight_, pack_weight_size * sizeof(float)); | |||
| code.CodeFunction("memset", packed_weight_, 0, pack_weight_size * sizeof(float)); | |||
| code.CodeFunction("RowMajor2Col8Major", filter_tensor_, packed_weight_, out_channel_size, in_channel * kernel_plane); | |||
| NNaclFp32Serializer init_code; | |||
| std::string ori_weight_addr = allocator_->GetRuntimeAddr(filter_tensor_); | |||
| std::string init_weight_str = ori_weight_addr; | |||
| if (de_quant_flag_) { | |||
| init_weight_str = Dequant::GetInstance()->de_quant_buffer_str(); | |||
| std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, ori_weight_addr); | |||
| init_code << de_quant_function; | |||
| } | |||
| init_code.CodeMallocExpression(packed_weight_, pack_weight_size_); | |||
| init_code.CodeFunction("memset", packed_weight_, 0, pack_weight_size_); | |||
| init_code.CodeFunction("RowMajor2Col8Major", init_weight_str, packed_weight_, out_channel_size, | |||
| in_channel * kernel_plane); | |||
| auto bias_data_size = static_cast<size_t>(oc_block_num * oc_block * sizeof(float)); | |||
| bias_data_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, bias_data_size, kOnlinePackWeight)); | |||
| bias_data_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | |||
| MS_CHECK_PTR(bias_data_); | |||
| if (input_tensors_.size() == kInputSize2) { | |||
| code.CodeMallocExpression(bias_data_, bias_data_size); | |||
| code.CodeFunction("memset", bias_data_, 0, bias_data_size); | |||
| code.CodeFunction("memcpy", bias_data_, bias_tensor_, out_channel_size * sizeof(float)); | |||
| init_code.CodeMallocExpression(bias_data_, bias_data_size); | |||
| init_code.CodeFunction("memset", bias_data_, 0, bias_data_size); | |||
| init_code.CodeFunction("memcpy", bias_data_, bias_tensor_, out_channel_size * sizeof(float)); | |||
| } else { | |||
| return RET_ERROR; | |||
| } | |||
| context->AppendInitCode(code.str()); | |||
| context->AppendInitCode(init_code.str()); | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionFP32Coder::DoCode(CoderContext *const context) { | |||
| { | |||
| std::vector<string> asmFiles; | |||
| std::vector<std::string> asmFiles; | |||
| if (target_ == kARM32A) { | |||
| asmFiles = {"MatmulFp32.S", | |||
| "MatmulFp32Opt.S", | |||
| @@ -112,9 +119,14 @@ int ConvolutionFP32Coder::DoCode(CoderContext *const context) { | |||
| asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S", | |||
| "PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"}; | |||
| } | |||
| Collect(context, | |||
| {"nnacl/kernel/fp32/conv_fp32_slim.h", "nnacl/fp32/matmul.h", "nnacl/conv_parameter.h", "nnacl/op_base.h"}, | |||
| {"common_func.c", "conv_fp32_slim.c", "matmul.c"}, asmFiles); | |||
| std::vector<std::string> h_files = {"nnacl/fp32/conv_common_fp32.h", "nnacl/fp32/matmul.h", | |||
| "nnacl/conv_parameter.h", "nnacl/op_base.h"}; | |||
| std::vector<std::string> c_files = {"common_func.c", "conv_common_fp32.c", "matmul.c"}; | |||
| if (de_quant_flag_) { | |||
| h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h"); | |||
| c_files.emplace_back("dequant_int8_to_fp32_wrapper.c"); | |||
| } | |||
| Collect(context, h_files, c_files, asmFiles); | |||
| } | |||
| NNaclFp32Serializer code; | |||
| // call the op function | |||
| @@ -122,7 +134,7 @@ int ConvolutionFP32Coder::DoCode(CoderContext *const context) { | |||
| code.CodeFunction("memset", col_major_input_, "0", col_major_input_size_); | |||
| code.CodeStruct("conv_parameter", *conv_param_); | |||
| int task_id = 0; | |||
| code.CodeFunction("ConvFp32Slim", input_tensor_, packed_input_, packed_weight_, bias_data_, col_major_input_, | |||
| code.CodeFunction("ConvFp32", input_tensor_, packed_input_, packed_weight_, bias_data_, col_major_input_, | |||
| output_tensor_, task_id, "(ConvParameter *)&conv_parameter"); | |||
| context->AppendCode(code.str()); | |||
| @@ -135,18 +147,18 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector< | |||
| Target target) { | |||
| std::vector<Tensor *> inputs = in_tensors; | |||
| std::vector<Tensor *> outputs = out_tensors; | |||
| auto primitive = node->primitive_; | |||
| if (!primitive) { | |||
| const void *primitive = node->primitive_; | |||
| if (primitive == nullptr) { | |||
| return nullptr; | |||
| } | |||
| OpParameter *parameter = | |||
| PopulateRegistry::GetInstance()->GetParameterCreator((schema::PrimitiveType(primitive->Type())))(primitive); | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " | |||
| << schema::EnumNamePrimitiveType((schema::PrimitiveType)(primitive->Type())); | |||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||
| ParameterGen paramGen = | |||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); | |||
| if (paramGen == nullptr) { | |||
| MS_LOG(ERROR) << "parameter generator is null"; | |||
| return nullptr; | |||
| } | |||
| auto conv_param = reinterpret_cast<ConvParameter *>(parameter); | |||
| auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_)); | |||
| bool use_winograd = false; | |||
| int out_unit = 0; | |||
| int kernel_h = conv_param->kernel_h_; | |||
| @@ -159,7 +171,7 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector< | |||
| conv_param->output_channel_ = outputs.at(kOutputIndex)->Channel(); | |||
| conv_param->op_parameter_.thread_num_ = 1; | |||
| CheckIfUseWinograd(&use_winograd, &out_unit, conv_param); | |||
| free(parameter); | |||
| free(conv_param); | |||
| // weight de quant | |||
| std::unique_ptr<OperatorCoder> coder; | |||
| if (kernel_h == 1 && kernel_w == 1) { | |||
| @@ -175,5 +187,32 @@ std::unique_ptr<OperatorCoder> CPUConvolutionFP32CoderCreator(const std::vector< | |||
| return coder; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2D, CPUConvolutionFP32CoderCreator) | |||
| std::unique_ptr<OperatorCoder> CPUConv2DFusionFP32CoderCreator(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, | |||
| Target target) { | |||
| const void *primitive = node->primitive_; | |||
| if (primitive == nullptr) { | |||
| return nullptr; | |||
| } | |||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||
| ParameterGen paramGen = | |||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); | |||
| if (paramGen == nullptr) { | |||
| MS_LOG(ERROR) << "parameter generator is null"; | |||
| return nullptr; | |||
| } | |||
| auto conv_param = reinterpret_cast<ConvParameter *>(paramGen(node->primitive_)); | |||
| std::unique_ptr<OperatorCoder> coder; | |||
| if (conv_param->group_ == 1) { | |||
| coder = CPUConvolutionFP32CoderCreator(in_tensors, out_tensors, node, node_index, target); | |||
| } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) { | |||
| coder = CPUOpCoderCreator<ConvolutionDepthwiseFP32Coder>(in_tensors, out_tensors, node, node_index, target); | |||
| } else { | |||
| // GroupConv | |||
| } | |||
| return coder; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Conv2DFusion, CPUConv2DFusionFP32CoderCreator) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -14,14 +14,14 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_FP32_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_FP32_CODER_H_ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_CONVOLUTION_FP32_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_CONVOLUTION_FP32_CODER_H_ | |||
| #include <vector> | |||
| #include <string> | |||
| #include "nnacl/conv_parameter.h" | |||
| #include "micro/coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class ConvolutionFP32Coder final : public Conv2DBaseCoder { | |||
| @@ -51,12 +51,14 @@ class ConvolutionFP32Coder final : public Conv2DBaseCoder { | |||
| size_t packed_input_size_{0}; | |||
| int thread_stride_{0}; | |||
| bool de_quant_flag_{false}; | |||
| int thread_count_{0}; | |||
| float *col_major_input_{nullptr}; | |||
| size_t col_major_input_size_{0}; | |||
| size_t pack_weight_size_{0}; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_FP32_CONVOLUTION_FP32_CODER_H_ | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_CONVOLUTION_FP32_CODER_H_ | |||
| @@ -13,12 +13,12 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.h" | |||
| #include <array> | |||
| #include "nnacl/base/minimal_filtering_generator.h" | |||
| #include "micro/coder/log.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| const std::array<std::string, 9> InputTransFuncList = { | |||
| @@ -222,10 +222,11 @@ int ConvolutionWinogradFP32Coder::DoCode(CoderContext *const context) { | |||
| asmFiles = {"MatmulFp32.S", "MatmulFp32Opt.S", "PreSum4x16Int8Peroc.S", "MatVecMulFp32.S", | |||
| "PreSum4x16Int8Peroc.S", "PreSum4x16Int8Pert.S", "IndirectGemmInt16to32_8x4.S", "MatmulInt8.S"}; | |||
| } | |||
| Collect(context, {"nnacl/fp32/conv.h", "nnacl/common_func.h"}, | |||
| {"common_func.c", "conv_int8.c", "matmul_int8.c", "pack.c", "conv.c", "winograd_transform.c", | |||
| "common_func_fp32.c", "fixed_point.c", "winograd_utils.c", "minimal_filtering_generator.c"}, | |||
| asmFiles); | |||
| Collect( | |||
| context, {"nnacl/fp32/conv_winograd_fp32.h", "nnacl/common_func.h"}, | |||
| {"common_func.c", "conv_int8.c", "matmul_int8.c", "pack_fp32.c", "conv_winograd_fp32.c", "winograd_transform.c", | |||
| "common_func_fp32.c", "fixed_point.c", "winograd_utils.c", "minimal_filtering_generator.c"}, | |||
| asmFiles); | |||
| NNaclFp32Serializer code; | |||
| // call the op function | |||
| @@ -20,7 +20,7 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "coder/opcoders/base/conv2d_base_coder.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -22,6 +22,7 @@ | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "nnacl/fp32/matmul_fp32.h" | |||
| #include "wrapper/fp32/matmul_fp32_wrapper.h" | |||
| #include "coder/opcoders/nnacl/dequant/de_quant.h" | |||
| using mindspore::schema::PrimitiveType_MatMul; | |||
| @@ -31,6 +32,13 @@ int MatMulFP32BaseCoder::ReSize() { | |||
| ResizeParameter(); | |||
| thread_count_ = MSMIN(thread_num_, UP_DIV(params_->col_align_, col_tile_)); | |||
| thread_stride_ = UP_DIV(UP_DIV(params_->col_align_, col_tile_), thread_count_); | |||
| // can not call Malloc in DoCode,so move this runtime init to final resize | |||
| if (!params_->a_const_) { | |||
| MS_CHECK_RET_CODE(InitBufferA(), "InitBufferA failed"); | |||
| } | |||
| if (!params_->b_const_) { | |||
| MS_CHECK_RET_CODE(InitBufferB(), "InitBufferB failed"); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -45,17 +53,16 @@ int MatMulFP32BaseCoder::InitBiasData() { | |||
| } | |||
| void MatMulFP32BaseCoder::InitParameter() { | |||
| row_tile_ = C12NUM; | |||
| if (target_ == kARM32A) { | |||
| row_tile_ = C12NUM; | |||
| col_tile_ = C4NUM; | |||
| } else { | |||
| row_tile_ = C12NUM; | |||
| col_tile_ = C8NUM; | |||
| } | |||
| } | |||
| void MatMulFP32BaseCoder::ResizeParameter() { | |||
| if (params_->row_ == 1 && !params_->b_const_) { | |||
| if (params_->row_ == 1) { | |||
| vec_matmul_ = true; | |||
| } | |||
| params_->row_align_ = vec_matmul_ ? 1 : UP_ROUND(params_->row_, row_tile_); | |||
| @@ -66,12 +73,11 @@ int MatMulFP32BaseCoder::InitBufferA() { | |||
| if (a_pack_ptr_ != nullptr) { | |||
| return RET_OK; | |||
| } | |||
| a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(float)); | |||
| if (params_->a_const_) { | |||
| a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | |||
| } else { | |||
| a_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->row_align_ * params_->deep_ * sizeof(float)); | |||
| a_pack_ptr_ = | |||
| reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, a_pack_ptr_size_, kOfflinePackWeight)); | |||
| a_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, a_pack_ptr_size_, kWorkspace)); | |||
| } | |||
| MS_CHECK_PTR(a_pack_ptr_); | |||
| return RET_OK; | |||
| @@ -81,12 +87,11 @@ int MatMulFP32BaseCoder::InitBufferB() { | |||
| if (b_pack_ptr_ != nullptr) { | |||
| return RET_OK; | |||
| } | |||
| b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * sizeof(float)); | |||
| if (params_->b_const_) { | |||
| b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, kOnlineSize, kOnlinePackWeight)); | |||
| } else { | |||
| b_pack_ptr_size_ = static_cast<size_t>(params_->batch * params_->col_align_ * params_->deep_ * sizeof(float)); | |||
| b_pack_ptr_ = | |||
| reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, b_pack_ptr_size_, kOfflinePackWeight)); | |||
| b_pack_ptr_ = reinterpret_cast<float *>(allocator_->Malloc(kNumberTypeFloat32, b_pack_ptr_size_, kWorkspace)); | |||
| } | |||
| MS_CHECK_PTR(b_pack_ptr_); | |||
| return RET_OK; | |||
| @@ -108,12 +113,9 @@ int MatMulFP32BaseCoder::Init() { | |||
| MS_CHECK_RET_CODE(InitBiasData(), "InitBiasData failed"); | |||
| if (params_->a_const_) { | |||
| MS_CHECK_RET_CODE(InitBufferA(), "InitBufferA failed"); | |||
| MS_CHECK_RET_CODE(InitMatrixA(reinterpret_cast<float *>(input_tensor_->data_c())), "InitMatrixA failed"); | |||
| } | |||
| if (params_->b_const_) { | |||
| MS_CHECK_RET_CODE(InitBufferB(), "InitBufferB failed"); | |||
| MS_CHECK_RET_CODE(InitMatrixB(reinterpret_cast<float *>(filter_tensor_->data_c())), "InitMatrixB failed"); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -124,12 +126,17 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { | |||
| // generate code .h .c | |||
| std::vector<std::string> asm_files; | |||
| if (target_ == kARM32A) { | |||
| asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S"}; | |||
| asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatmulFp32Opt12x4.S"}; | |||
| } else if (target_ == kARM64) { | |||
| asm_files = {"arm64/MatmulFp32.S", "MatmulFp32Opt.S", "arm64/MatVecMulFp32.S"}; | |||
| asm_files = {"MatmulFp32.S", "MatmulFp32Opt.S", "MatVecMulFp32.S"}; | |||
| } | |||
| std::vector<std::string> h_files = {"nnacl/fp32/matmul_fp32.h", "wrapper/fp32/matmul_fp32_wrapper.h"}; | |||
| std::vector<std::string> c_files = {"matmul_fp32.c", "matmul_fp32_wrapper.c"}; | |||
| if (de_quant_flag_) { | |||
| h_files.emplace_back("wrapper/fp32/dequant_int8_to_fp32_wrapper.h"); | |||
| c_files.emplace_back("dequant_int8_to_fp32_wrapper.c"); | |||
| } | |||
| Collect(context, {"nnacl/fp32/matmul.h", "adapter/fp32/matmul_fp32_adapter.h"}, {"matmul.c", "matmul_fp32_adapter.c"}, | |||
| asm_files); | |||
| Collect(context, h_files, c_files, asm_files); | |||
| NNaclFp32Serializer code; | |||
| NNaclFp32Serializer init_code; | |||
| code.CodeStruct("mat_mul_parameter", *params_); | |||
| @@ -137,9 +144,12 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { | |||
| // do bias packing to init | |||
| if (bias_ptr_) { | |||
| init_code.CodeMallocExpression(bias_ptr_, bias_pack_ptr_size_); | |||
| init_code.CodeFunction("memcpy", bias_ptr_, bias_tensor_->data_c(), bias_pack_ptr_size_); | |||
| init_code.CodeFunction("memcpy", bias_ptr_, bias_tensor_, bias_pack_ptr_size_); | |||
| } | |||
| // Get Tensor Pointer | |||
| std::string a_str = allocator_->GetRuntimeAddr(input_tensor_); | |||
| std::string b_str = allocator_->GetRuntimeAddr(filter_tensor_); | |||
| std::string c_str = allocator_->GetRuntimeAddr(output_tensor_); | |||
| std::string a_pack_str = allocator_->GetRuntimeAddr(a_pack_ptr_); | |||
| std::string b_pack_str = allocator_->GetRuntimeAddr(b_pack_ptr_); | |||
| @@ -147,12 +157,28 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { | |||
| // do const value packing to init | |||
| if (!params_->a_const_) { | |||
| code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_); | |||
| init_code.CodeMallocExpression(b_pack_ptr_, b_pack_ptr_size_); | |||
| std::string b_src_str = b_str; | |||
| if (de_quant_flag_) { | |||
| // reuse to b_pack_str | |||
| b_src_str = Dequant::GetInstance()->de_quant_buffer_str(); | |||
| std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(filter_tensor_, b_str); | |||
| init_code << de_quant_function; | |||
| } | |||
| // b_pack_str has been memset, no need to memset | |||
| init_code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_); | |||
| init_code.CodeFunction("InitMatrixB", b_src_str, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_); | |||
| } | |||
| if (!params_->b_const_) { | |||
| init_code.CodeMallocExpression(a_pack_str, a_pack_ptr_size_); | |||
| std::string a_src_str = a_str; | |||
| if (de_quant_flag_) { | |||
| // reuse to a_pack_str | |||
| a_src_str = Dequant::GetInstance()->de_quant_buffer_str(); | |||
| std::string de_quant_function = Dequant::GetInstance()->GetMicroDeQuantFunction(input_tensor_, a_str); | |||
| init_code << de_quant_function; | |||
| } | |||
| // a_pack_str has been memset, no need to memset | |||
| init_code.CodeFunction("InitMatrixA", input_tensor_, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_); | |||
| init_code.CodeFunction("InitMatrixA", a_src_str, a_pack_ptr_, "&mat_mul_parameter", vec_matmul_); | |||
| code.CodeFunction("InitMatrixB", filter_tensor_, b_pack_ptr_, "&mat_mul_parameter", vec_matmul_); | |||
| } | |||
| @@ -165,13 +191,13 @@ int MatMulFP32BaseCoder::DoCode(CoderContext *const context) { | |||
| } | |||
| code << "for (int i = 0; i < " << params_->batch << "; ++i) {\n"; | |||
| if (vec_matmul_) { | |||
| code << "\t\tbatch_a_ptr = " << a_pack_str << " + i * " << params_->deep_ << ";\n"; | |||
| code << "\t\tbatch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n"; | |||
| code << "\t\tbatch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n"; | |||
| code << "\t\tfloat *batch_a_ptr = " << a_pack_str << " + i * " << params_->deep_ << ";\n"; | |||
| code << "\t\tfloat *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_ << ";\n"; | |||
| code << "\t\tfloat *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n"; | |||
| } else { | |||
| code << "\t\tbatch_a_ptr = " << a_pack_str << " + i * " << params_->row_align_ * params_->deep_ << ";\n"; | |||
| code << "\t\tbatch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_align_ << ";\n"; | |||
| code << "\tbatch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n"; | |||
| code << "\t\tfloat *batch_a_ptr = " << a_pack_str << " + i * " << params_->row_align_ * params_->deep_ << ";\n"; | |||
| code << "\t\tfloat *batch_b_ptr = " << b_pack_str << " + i * " << params_->deep_ * params_->col_align_ << ";\n"; | |||
| code << "\t\tfloat *batch_c_ptr = " << c_str << " + i * " << params_->row_ * params_->col_ << ";\n"; | |||
| } | |||
| if (vec_matmul_) { | |||
| @@ -56,6 +56,7 @@ class MatMulFP32BaseCoder : public OperatorCoder { | |||
| float *b_pack_ptr_ = nullptr; | |||
| float *bias_ptr_{nullptr}; | |||
| bool vec_matmul_{false}; | |||
| bool de_quant_flag_{false}; | |||
| private: | |||
| int col_tile_{0}; | |||
| @@ -18,6 +18,7 @@ | |||
| #include <vector> | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/nnacl/dequant/de_quant.h" | |||
| using mindspore::schema::PrimitiveType_MatMul; | |||
| @@ -77,10 +78,12 @@ int MatMulFP32Coder::Prepare(CoderContext *const context) { | |||
| params_->b_const_ = (filter_tensor_->data_c() != nullptr); | |||
| MatMulFP32BaseCoder::InitParameter(); | |||
| if (params_->a_const_) { | |||
| InitShapeA(); | |||
| de_quant_flag_ = Dequant::GetInstance()->CheckDequantFlag(input_tensor_); | |||
| MS_CHECK_RET_CODE(InitShapeA(), "MatMulFP32Coder init_shape_a failed"); | |||
| } | |||
| if (params_->b_const_) { | |||
| InitShapeB(); | |||
| de_quant_flag_ = Dequant::GetInstance()->CheckDequantFlag(filter_tensor_); | |||
| MS_CHECK_RET_CODE(InitShapeB(), "MatMulFP32Coder init_shape_b failed"); | |||
| } | |||
| MS_CHECK_RET_CODE(MatMulFP32BaseCoder::Init(), "MatMulFP32Coder init failed"); | |||
| return ReSize(); | |||
| @@ -14,14 +14,14 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/pad_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/pad_fp32_coder.h" | |||
| #include <string> | |||
| #include <vector> | |||
| #include "micro/coder/log.h" | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Pad; | |||
| using mindspore::schema::PrimitiveType_PadFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -99,5 +99,5 @@ int PadFP32Coder::DoCode(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Pad, CPUOpCoderCreator<PadFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_PadFusion, CPUOpCoderCreator<PadFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -21,7 +21,8 @@ | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Pooling; | |||
| using mindspore::schema::PrimitiveType_AvgPoolFusion; | |||
| using mindspore::schema::PrimitiveType_MaxPoolFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -46,7 +47,7 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) { | |||
| float minf = -FLT_MAX; | |||
| float maxf = FLT_MAX; | |||
| if (pooling_parameter->pool_mode_ == PoolMode_MaxPool) { | |||
| Collect(context, {"nnacl/kernel/fp32/max_pooling_fp32_slim.h"}, {"max_pooling_fp32_slim.c"}); | |||
| Collect(context, {"nnacl/fp32/pooling_fp32.h"}, {"pooling_fp32.c"}); | |||
| switch (pooling_parameter->act_type_) { | |||
| case ActType_Relu: { | |||
| minf = 0.f; | |||
| @@ -63,14 +64,9 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) { | |||
| } | |||
| } | |||
| if (thread_num_ > 1) { | |||
| code.CodeBaseStruct("PoolingFp32Args", "args", input_tensor_, output_tensor_, "&pooling_parameter", minf, maxf); | |||
| CODE_PARALLEL_FUNC("MaxPoolingFp32Run"); | |||
| } else { | |||
| code.CodeFunction("MaxPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf); | |||
| } | |||
| code.CodeFunction("MaxPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf); | |||
| } else { | |||
| Collect(context, {"nnacl/fp32/pooling.h"}, {"pooling.c"}); | |||
| Collect(context, {"nnacl/fp32/pooling_fp32.h"}, {"pooling_fp32.c"}); | |||
| switch (pooling_parameter->act_type_) { | |||
| case ActType_Relu: { | |||
| minf = 0.f; | |||
| @@ -86,12 +82,7 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) { | |||
| break; | |||
| } | |||
| } | |||
| if (thread_num_ > 1) { | |||
| code.CodeBaseStruct("PoolingFp32Args", "args", input_tensor_, output_tensor_, "&pooling_parameter", minf, maxf); | |||
| CODE_PARALLEL_FUNC("AvgPoolingFp32Run"); | |||
| } else { | |||
| code.CodeFunction("AvgPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf); | |||
| } | |||
| code.CodeFunction("AvgPooling", input_tensor_, output_tensor_, "&pooling_parameter", task_id, minf, maxf); | |||
| } | |||
| MS_LOG(INFO) << "PoolingFp32Code has been called"; | |||
| @@ -99,5 +90,6 @@ int PoolingFP32Coder::DoCode(CoderContext *const context) { | |||
| return lite::RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Pooling, CPUOpCoderCreator<PoolingFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_AvgPoolFusion, CPUOpCoderCreator<PoolingFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_MaxPoolFusion, CPUOpCoderCreator<PoolingFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -18,7 +18,7 @@ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_POOLFP32_CODER_H_ | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -20,7 +20,7 @@ | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Power; | |||
| using mindspore::schema::PrimitiveType_PowFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -55,6 +55,6 @@ int PowerFP32Coder::DoCode(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Power, CPUOpCoderCreator<PowerFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_PowFusion, CPUOpCoderCreator<PowerFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -20,7 +20,7 @@ | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Reduce; | |||
| using mindspore::schema::PrimitiveType_PowFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int ReduceFP32Coder::Prepare(CoderContext *const context) { | |||
| @@ -116,6 +116,6 @@ int ReduceFP32Coder::DoCode(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Reduce, CPUOpCoderCreator<ReduceFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_PowFusion, CPUOpCoderCreator<ReduceFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -18,8 +18,9 @@ | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/parallel.h" | |||
| using mindspore::schema::PrimitiveType_Scale; | |||
| using mindspore::schema::PrimitiveType_ScaleFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| ScaleFP32Coder::~ScaleFP32Coder() { | |||
| @@ -131,34 +132,26 @@ int ScaleFP32Coder::DoCode(CoderContext *const context) { | |||
| NNaclFp32Serializer code; | |||
| code.CodeStruct("scale_parameter", *scale_param_); | |||
| if (thread_num_ > 1) { | |||
| code.CodeBaseStruct("ScaleFp32Args", "args", input_tensor_, output_tensor_, scale_tensor, offset_tensor, | |||
| switch (scale_param_->activation_type_) { | |||
| case schema::ActivationType_RELU6: | |||
| code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale_tensor, offset_tensor, kDefaultTaskId, | |||
| "&scale_parameter"); | |||
| CODE_PARALLEL_FUNC("ScaleFp32Run"); | |||
| } else { | |||
| int task_id = 0; | |||
| switch (scale_param_->activation_type_) { | |||
| case schema::ActivationType_RELU6: | |||
| code.CodeFunction("DoScaleRelu6", input_tensor_, output_tensor_, scale_tensor, offset_tensor, task_id, | |||
| "&scale_parameter"); | |||
| break; | |||
| case schema::ActivationType_RELU: | |||
| code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale_tensor, offset_tensor, task_id, | |||
| "&scale_parameter"); | |||
| break; | |||
| case schema::ActivationType_NO_ACTIVATION: | |||
| code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale_tensor, offset_tensor, task_id, | |||
| "&scale_parameter"); | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Scale does not support activation type " << scale_param_->activation_type_; | |||
| return RET_ERROR; | |||
| } | |||
| break; | |||
| case schema::ActivationType_RELU: | |||
| code.CodeFunction("DoScaleRelu", input_tensor_, output_tensor_, scale_tensor, offset_tensor, kDefaultTaskId, | |||
| "&scale_parameter"); | |||
| break; | |||
| case schema::ActivationType_NO_ACTIVATION: | |||
| code.CodeFunction("DoScale", input_tensor_, output_tensor_, scale_tensor, offset_tensor, kDefaultTaskId, | |||
| "&scale_parameter"); | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Scale does not support activation type " << scale_param_->activation_type_; | |||
| return RET_ERROR; | |||
| } | |||
| MS_LOG(INFO) << "ScaleFP32Code has been called"; | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Scale, CPUOpCoderCreator<ScaleFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_ScaleFusion, CPUOpCoderCreator<ScaleFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -1,74 +0,0 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/nnacl/fp32/slice_fp32_coder.h" | |||
| #include <string> | |||
| #include "nnacl/slice_parameter.h" | |||
| #include "src/ops/slice.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Slice; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int SliceFP32Coder::Prepare(CoderContext *const context) { return RET_OK; } | |||
| int SliceFP32Coder::DoCode(CoderContext *const context) { | |||
| // generate code .h .c | |||
| Collect(context, {"nnacl/slice_parameter.h", "nnacl/fp32/slice.h"}, {"slice.c"}); | |||
| auto param = reinterpret_cast<SliceParameter *>(parameter_); | |||
| auto primitive_slice = reinterpret_cast<const mindspore::lite::Slice *>(OperatorCoder::primitive()); | |||
| std::vector<int> begin = primitive_slice->GetPostProcessBegin(); | |||
| std::vector<int> size = primitive_slice->GetPostProcessSize(); | |||
| std::vector<int> input_shape = input_tensor_->shape(); | |||
| NNaclFp32Serializer code; | |||
| for (int i = 0; i < param->param_length_; i++) { | |||
| param->shape_[i] = input_shape.at(i); | |||
| } | |||
| for (int i = 0; i < param->param_length_; i++) { | |||
| param->begin_[i] = begin.at(i); | |||
| } | |||
| for (int i = 0; i < param->param_length_; i++) { | |||
| int tmp_size = size.at(i); | |||
| if (size.at(i) < 0) { | |||
| tmp_size = input_shape.at(i) - begin.at(i); | |||
| } | |||
| param->end_[i] = (begin.at(i) + tmp_size); | |||
| } | |||
| for (int i = 0; i < param->param_length_; i++) { | |||
| if (size.at(i) < 0) { | |||
| param->size_[i] = (input_shape.at(i) - begin.at(i)); | |||
| continue; | |||
| } | |||
| param->size_[i] = size.at(i); | |||
| } | |||
| code.CodeStruct("slice_parameter", *param); | |||
| // call the op function | |||
| if (param->param_length_ < DIMENSION_4D) { | |||
| code.CodeFunction("PadSliceParameterTo4D", "&slice_parameter"); | |||
| } | |||
| code.CodeFunction("DoSliceNoParallel", input_tensor_, output_tensor_, "&slice_parameter"); | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Slice, CPUOpCoderCreator<SliceFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -1,37 +0,0 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_SLICE_FP32_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_SLICE_FP32_CODER_H_ | |||
| #include <vector> | |||
| #include "coder/opcoders/op_coder.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class SliceFP32Coder final : public OperatorCoder { | |||
| public: | |||
| SliceFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| ~SliceFP32Coder() override = default; | |||
| int Prepare(CoderContext *const context) override; | |||
| int DoCode(CoderContext *const context) override; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCOD ERS_SLICE_FP32_CODER_H_ | |||
| @@ -20,7 +20,7 @@ | |||
| #include "schema/inner/ops_generated.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_SoftMax; | |||
| using mindspore::schema::PrimitiveType_Softmax; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -48,7 +48,7 @@ int SoftMaxFP32Coder::Prepare(CoderContext *const context) { | |||
| } | |||
| int SoftMaxFP32Coder::DoCode(CoderContext *const context) { | |||
| Collect(context, {"nnacl/fp32/softmax.h"}, {"softmax.c"}); | |||
| Collect(context, {"nnacl/fp32/softmax_fp32.h"}, {"softmax_fp32.c", "exp_fp32.c"}); | |||
| NNaclFp32Serializer code; | |||
| code.CodeStruct("softmax_parameter", *softmax_param_); | |||
| code.CodeFunction("memset", sum_data_, "0", sum_data_size_); | |||
| @@ -58,6 +58,6 @@ int SoftMaxFP32Coder::DoCode(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_SoftMax, CPUOpCoderCreator<SoftMaxFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Softmax, CPUOpCoderCreator<SoftMaxFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -17,7 +17,7 @@ | |||
| #define MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_ | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/base/softmax_base_coder.h" | |||
| #include "coder/opcoders/base/softmax_base_coder.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class SoftMaxFP32Coder final : public SoftmaxBaseCoder { | |||
| @@ -0,0 +1,57 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/nnacl/fp32/splice_fp32_coder.h" | |||
| #include <string> | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "src/common/log_adapter.h" | |||
| #include "nnacl/splice_parameter.h" | |||
| using mindspore::schema::PrimitiveType_Splice; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int SpliceFP32Coder::DoCode(CoderContext *const context) { | |||
| auto splice_parameter = reinterpret_cast<SpliceParameter *>(parameter_); | |||
| // to make forward_indexes nullptr | |||
| splice_parameter->forward_indexes_ = nullptr; | |||
| std::vector<int> src_shape = input_tensor_->shape(); | |||
| std::vector<int> dst_shape = output_tensor_->shape(); | |||
| if (src_shape.size() != dst_shape.size() || src_shape.size() != kInputSize2 || dst_shape.size() != kInputSize2) { | |||
| MS_LOG(ERROR) << "SpliceFP32Coder src_shape size not equal to dst_shape"; | |||
| return RET_ERROR; | |||
| } | |||
| int src_row = src_shape.at(kInputIndex); | |||
| int dst_row = dst_shape.at(kInputIndex); | |||
| int src_col = src_shape.at(kBiasIndex); | |||
| int dst_col = dst_shape.at(kBiasIndex); | |||
| if (src_row != dst_row) { | |||
| MS_LOG(ERROR) << "SpliceFP32Coder src_row not equal to dst_row"; | |||
| return RET_ERROR; | |||
| } | |||
| if (src_col * splice_parameter->context_dim_ != dst_col) { | |||
| MS_LOG(ERROR) << "SpliceFP32Coder src_col not match to dst_col"; | |||
| return RET_ERROR; | |||
| } | |||
| Collect(context, {"nnacl/splice_parameter.h", "nnacl/fp32/splice_fp32.h"}, {"splice_fp32.c"}); | |||
| NNaclFp32Serializer code; | |||
| code.CodeStruct("splice_parameter", *splice_parameter); | |||
| code.CodeFunction("SpliceFp32", input_tensor_, src_row, src_col, "&splice_parameter", output_tensor_, dst_row, | |||
| dst_col); | |||
| context->AppendCode(code.str()); | |||
| MS_LOG(DEBUG) << "SpliceFP32Coder do_code ok"; | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Splice, CPUOpCoderCreator<SpliceFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -0,0 +1,35 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_SPLICE_FP32_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_SPLICE_FP32_CODER_H_ | |||
| #include <vector> | |||
| #include "coder/opcoders/op_coder.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class SpliceFP32Coder final : public OperatorCoder { | |||
| public: | |||
| SpliceFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| ~SpliceFP32Coder() override = default; | |||
| int Prepare(CoderContext *const context) override { return RET_OK; } | |||
| int DoCode(CoderContext *const context) override; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_FP32_SPLICE_FP32_CODER_H_ | |||
| @@ -20,7 +20,7 @@ | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Tile; | |||
| using mindspore::schema::PrimitiveType_TileFusion; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| void TileFP32Coder::ComputeStrides(const int *shape, int *strides, int ndim) const { | |||
| @@ -63,6 +63,6 @@ int TileFP32Coder::DoCode(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Tile, CPUOpCoderCreator<TileFP32Coder>) | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_TileFusion, CPUOpCoderCreator<TileFP32Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -14,11 +14,11 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/fp32/transpose_fp32_coder.h" | |||
| #include "coder/opcoders/nnacl/fp32/transpose_fp32_coder.h" | |||
| #include <vector> | |||
| #include <string> | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| using mindspore::schema::PrimitiveType_Transpose; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| @@ -83,8 +83,8 @@ int TransposeFp32Coder::DoCode(CoderContext *const context) { | |||
| NNaclFp32Serializer code; | |||
| code.CodeStruct("transpose_parameter", *transpose_parameter_); | |||
| code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, in_shape_, out_shape_, "&transpose_parameter", | |||
| task_id, num_unit_thread, dim_size_, position_); | |||
| code.CodeFunction("DoTransposeFp32", input_tensor_, output_tensor_, in_shape_, out_shape_, | |||
| "(TransposeParameter *)&transpose_parameter", task_id, num_unit_thread, dim_size_, position_); | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| @@ -39,14 +39,14 @@ class TransposeFp32Coder final : public OperatorCoder { | |||
| private: | |||
| TransposeParameter *transpose_parameter_ = nullptr; | |||
| int thread_num_ = 1; | |||
| int thread_h_stride_ = 0; | |||
| int thread_h_num_ = 0; | |||
| int num_unit_ = 0; | |||
| int *in_shape_ = nullptr; | |||
| int *out_shape_ = nullptr; | |||
| int *dim_size_ = nullptr; | |||
| int *position_ = nullptr; | |||
| int thread_num_{1}; | |||
| int thread_h_stride_{0}; | |||
| int thread_h_num_{0}; | |||
| int num_unit_{0}; | |||
| int *in_shape_{nullptr}; | |||
| int *out_shape_{nullptr}; | |||
| int *dim_size_{nullptr}; | |||
| int *position_{nullptr}; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -0,0 +1,74 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/nnacl/int8/sigmoid_int8_coder.h" | |||
| #include "coder/opcoders/nnacl/int8/relux_int8_coder.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "nnacl/fp32/activation_fp32.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/common/version_manager.h" | |||
| using mindspore::schema::PrimitiveType_Activation; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| std::unique_ptr<OperatorCoder> CPUActivationINT8CoderCreator(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, | |||
| Target target) { | |||
| const void *primitive_c = node->primitive_; | |||
| if (primitive_c == nullptr) { | |||
| return nullptr; | |||
| } | |||
| int schema_version = VersionManager::GetInstance()->GetSchemaVersion(); | |||
| ParameterGen parameter_gen = | |||
| PopulateRegistry::GetInstance()->GetParameterCreator(GetPrimitiveType(node->primitive_), schema_version); | |||
| if (parameter_gen == nullptr) { | |||
| MS_LOG(ERROR) << "parameter generator is nullptr"; | |||
| return nullptr; | |||
| } | |||
| OpParameter *parameter = parameter_gen(node->primitive_); | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " | |||
| << schema::EnumNamePrimitiveType((schema::PrimitiveType)GetPrimitiveType(node->primitive_)); | |||
| return nullptr; | |||
| } | |||
| auto type = (reinterpret_cast<ActivationParameter *>(parameter))->type_; | |||
| std::unique_ptr<OperatorCoder> coder; | |||
| switch (static_cast<schema::ActivationType>(type)) { | |||
| case schema::ActivationType_SIGMOID: | |||
| coder = CPUOpCoderCreator<SigmodInt8Coder>(in_tensors, out_tensors, node, node_index, target); | |||
| break; | |||
| case schema::ActivationType_RELU: | |||
| coder = CPUOpCoderCreator<ReluInt8Coder>(in_tensors, out_tensors, node, node_index, target); | |||
| break; | |||
| case schema::ActivationType_RELU6: | |||
| coder = CPUOpCoderCreator<Relu6Int8Coder>(in_tensors, out_tensors, node, node_index, target); | |||
| break; | |||
| default: | |||
| break; | |||
| } | |||
| if (coder == nullptr) { | |||
| MS_LOG(ERROR) << "create conv2d int8 coder failed"; | |||
| return nullptr; | |||
| } | |||
| return coder; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Activation, CPUActivationINT8CoderCreator) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -14,17 +14,18 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "micro/coder/opcoders/nnacl/int8/add_int8_coder.h" | |||
| #include "coder/opcoders/nnacl/int8/add_int8_coder.h" | |||
| #include <algorithm> | |||
| #include <type_traits> | |||
| #include "nnacl/int8/quantize.h" | |||
| #include "micro/coder/log.h" | |||
| #include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" | |||
| #include "micro/coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/parallel.h" | |||
| using mindspore::schema::PrimitiveType_Add; | |||
| using mindspore::schema::PrimitiveType_AddFusion; | |||
| namespace mindspore::lite::micro { | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int AddInt8Coder::Prepare(CoderContext *const context) { | |||
| input0 = input_tensors().at(0); | |||
| @@ -38,26 +39,8 @@ int AddInt8Coder::Prepare(CoderContext *const context) { | |||
| return RET_OK; | |||
| } | |||
| int AddInt8Coder::DoCode(CoderContext *const context) { | |||
| Collect(context, {"wrapper/int8/conv1x1_init_int8.h"}, {"add_int8_wrapper.c", "add_int8.c", "thread_pool.c"}); | |||
| nnacl::NNaclInt8Serializer code; | |||
| code.CodeStruct("para", para_); | |||
| code.CodeStruct("arith_para", *arith_para_); | |||
| code.CodeBaseStruct("AddArgs", "args", "para", "arith_para", in_size_, out_size_, thread_num_s_, elements_num_, | |||
| support_opt_add_, input0, input1, output_tensor_); | |||
| if (arith_para_->broadcasting_) { | |||
| code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "AddBroadcastRun", "&args", thread_num_s_); | |||
| } else { | |||
| code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "AddRun", "&args", thread_num_s_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int AddInt8Coder::Init() { | |||
| arith_para_ = reinterpret_cast<ArithmeticParameter *>(parameter_); | |||
| para_.in0_args_.zp_ = input0->quant_params().front().zeroPoint * -1; | |||
| para_.in1_args_.zp_ = input1->quant_params().front().zeroPoint * -1; | |||
| para_.out_zp_ = output_tensor_->quant_params().front().zeroPoint; | |||
| @@ -152,5 +135,32 @@ int AddInt8Coder::ReSize() { | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Add, CPUOpCoderCreator<AddInt8Coder>) | |||
| } // namespace mindspore::lite::micro | |||
| int AddInt8Coder::DoCode(CoderContext *const context) { | |||
| Collect(context, {"wrapper/int8/add_int8_wrapper.h"}, | |||
| {"add_int8_wrapper.c", "add_int8.c", "arithmetic_base.c", "arithmetic_int8.c", "thread_pool.c"}); | |||
| nnacl::NNaclInt8Serializer code; | |||
| code.CodeStruct("para", para_); | |||
| code.CodeStruct("arith_para", *arith_para_); | |||
| code.CodeBaseStruct("AddInt8Args", kRunArgs, "¶", "&arith_para", in_size_, out_size_, gThreadNum, elements_num_, | |||
| support_opt_add_, input0, input1, output_tensor_); | |||
| if (support_parallel_) { | |||
| if (arith_para_->broadcasting_) { | |||
| code.CodeFunction(kParallelLaunch, gThreadPool, "AddBroadcastInt8Run", kRunArgsAddr, gThreadNum); | |||
| } else { | |||
| code.CodeFunction(kParallelLaunch, gThreadPool, "AddInt8Run", kRunArgsAddr, gThreadNum); | |||
| } | |||
| } else { | |||
| if (arith_para_->broadcasting_) { | |||
| code.CodeFunction("AddBroadcastInt8Run", kRunArgsAddr, kDefaultTaskId); | |||
| } else { | |||
| code.CodeFunction("AddInt8Run", kRunArgsAddr, kDefaultTaskId); | |||
| } | |||
| } | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_AddFusion, CPUOpCoderCreator<AddInt8Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -18,17 +18,15 @@ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_ADD_INT8_CODER_H_ | |||
| #include <vector> | |||
| #include "micro/coder/opcoders/op_coder.h" | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/int8/add_int8.h" | |||
| namespace mindspore::lite::micro { | |||
| class AddInt8Coder : public OperatorCoder { | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class AddInt8Coder final : public OperatorCoder { | |||
| public: | |||
| AddInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) { | |||
| arith_para_ = reinterpret_cast<ArithmeticParameter *>(parameter_); | |||
| } | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) {} | |||
| ~AddInt8Coder() override = default; | |||
| @@ -49,5 +47,5 @@ class AddInt8Coder : public OperatorCoder { | |||
| int elements_num_{0}; | |||
| bool support_opt_add_{false}; | |||
| }; | |||
| } // namespace mindspore::lite::micro | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_ADD_INT8_CODER_H_ | |||
| @@ -0,0 +1,162 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "coder/opcoders/nnacl/int8/batchnorm_int8_coder.h" | |||
| #include <string> | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/opcoders/parallel.h" | |||
| using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::lite::micro::nnacl { | |||
| int BatchNormInt8Coder::Prepare(CoderContext *const context) { | |||
| std::vector<int> input_shapes = input_tensor_->shape(); | |||
| size_t n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| batchnorm_param_->units_ = 1; | |||
| for (size_t i = 0; i < n_dim - 1; i++) { | |||
| batchnorm_param_->units_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); | |||
| if (target_ == kARM32M) { | |||
| batchnorm_param_->unit_ = batchnorm_param_->units_; | |||
| } else { | |||
| batchnorm_param_->unit_ = UP_DIV(batchnorm_param_->units_, kMaxThreadNumSupported); | |||
| } | |||
| if (batchnorm_param_->fused_) { | |||
| MS_CHECK_RET_CODE(InitFusedConstTensor(), "InitFusedConstTensor failed"); | |||
| } else { | |||
| MS_CHECK_RET_CODE(InitConstTensor(), "InitConstTensor failed"); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int BatchNormInt8Coder::DoCode(CoderContext *context) { | |||
| std::vector<std::string> headers = {"nnacl/slice_parameter.h"}; | |||
| std::vector<std::string> cFiles = {"batchnorm_int8.c"}; | |||
| NNaclInt8Serializer code; | |||
| code.CodeStruct("param", *batchnorm_param_); | |||
| code.CodeFunction("BatchNormInt8", output_tensor_, input_tensor_, alpha_addr_, beta_addr_, kDefaultTaskId, "¶m"); | |||
| Collect(context, headers, cFiles); | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||
| } | |||
| int BatchNormInt8Coder::InitConstTensor() { | |||
| MS_CHECK_TRUE(input_tensors_.size() >= kInputSize2, "input tensors number not match"); | |||
| Tensor *input = input_tensor_; | |||
| Tensor *mean = input_tensors_.at(1); | |||
| Tensor *variance = input_tensors_.at(2); | |||
| Tensor *output = output_tensor_; | |||
| auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData()); | |||
| auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData()); | |||
| MS_CHECK_PTR(mean_ptr); | |||
| MS_CHECK_PTR(var_ptr); | |||
| alpha_addr_ = reinterpret_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat, mean->ElementsNum() * sizeof(float), kOfflinePackWeight)); | |||
| MS_CHECK_PTR(alpha_addr_); | |||
| beta_addr_ = reinterpret_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat, variance->ElementsNum() * sizeof(float), kOfflinePackWeight)); | |||
| MS_CHECK_PTR(beta_addr_); | |||
| // compute alpha, beta; | |||
| auto eps = batchnorm_param_->epsilon_; | |||
| int32_t zp_in = input->quant_params().at(0).zeroPoint; | |||
| int32_t zp_mean = mean->quant_params().at(0).zeroPoint; | |||
| int32_t zp_var = variance->quant_params().at(0).zeroPoint; | |||
| int32_t zp_out = output->quant_params().at(0).zeroPoint; | |||
| auto s_in = static_cast<float>(input->quant_params().at(0).scale); | |||
| auto s_mean = static_cast<float>(mean->quant_params().at(0).scale); | |||
| auto s_var = static_cast<float>(variance->quant_params().at(0).scale); | |||
| auto s_out = static_cast<float>(output->quant_params().at(0).scale); | |||
| for (int i = 0; i < batchnorm_param_->channel_; ++i) { | |||
| float tmp = s_out * sqrt(eps + s_var * (var_ptr[i] - zp_var)); | |||
| float tmp_a = s_in / tmp; | |||
| float tmp_b = zp_out - tmp_a * zp_in - (s_mean * (mean_ptr[i] - zp_mean)) / tmp; | |||
| alpha_addr_[i] = tmp_a; | |||
| beta_addr_[i] = tmp_b; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int BatchNormInt8Coder::InitFusedConstTensor() { | |||
| MS_CHECK_TRUE(input_tensors_.size() >= 5, "input tensors number not match"); | |||
| Tensor *input = input_tensors_.at(0); | |||
| Tensor *scale = input_tensors_.at(1); | |||
| Tensor *offset = input_tensors_.at(2); | |||
| Tensor *mean = input_tensors_.at(3); | |||
| Tensor *variance = input_tensors_.at(4); | |||
| Tensor *output = output_tensor_; | |||
| auto scale_ptr = reinterpret_cast<int8_t *>(scale->MutableData()); | |||
| auto offset_ptr = reinterpret_cast<int8_t *>(offset->MutableData()); | |||
| auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData()); | |||
| auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData()); | |||
| MS_CHECK_PTR(scale_ptr); | |||
| MS_CHECK_PTR(offset_ptr); | |||
| MS_CHECK_PTR(mean_ptr); | |||
| MS_CHECK_PTR(var_ptr); | |||
| alpha_addr_ = reinterpret_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat, mean->ElementsNum() * sizeof(float), kOfflinePackWeight)); | |||
| MS_CHECK_PTR(alpha_addr_); | |||
| beta_addr_ = reinterpret_cast<float *>( | |||
| allocator_->Malloc(kNumberTypeFloat, variance->ElementsNum() * sizeof(float), kOfflinePackWeight)); | |||
| MS_CHECK_PTR(beta_addr_); | |||
| // compute alpha, beta; | |||
| float eps = batchnorm_param_->epsilon_; | |||
| int32_t zp_in = input->quant_params().at(0).zeroPoint; | |||
| int32_t zp_scale = scale->quant_params().at(0).zeroPoint; | |||
| int32_t zp_offset = offset->quant_params().at(0).zeroPoint; | |||
| int32_t zp_mean = mean->quant_params().at(0).zeroPoint; | |||
| int32_t zp_var = variance->quant_params().at(0).zeroPoint; | |||
| int32_t zp_out = output->quant_params().at(0).zeroPoint; | |||
| auto s_in = static_cast<float>(input->quant_params().at(0).scale); | |||
| auto s_scale = static_cast<float>(scale->quant_params().at(0).scale); | |||
| auto s_offset = static_cast<float>(offset->quant_params().at(0).scale); | |||
| auto s_mean = static_cast<float>(mean->quant_params().at(0).scale); | |||
| auto s_var = static_cast<float>(variance->quant_params().at(0).scale); | |||
| auto s_out = static_cast<float>(output->quant_params().at(0).scale); | |||
| float mul_12 = s_in * s_scale; | |||
| float mul_24 = s_scale * s_mean; | |||
| float div_36 = s_offset / s_out; | |||
| for (int i = 0; i < batchnorm_param_->channel_; ++i) { | |||
| float tmp = s_out * sqrt(eps + s_var * (var_ptr[i] - zp_var)); | |||
| float tmp_a = (mul_12 * (scale_ptr[i] - zp_scale)) / tmp; | |||
| float tmp_b = zp_out + div_36 * (offset_ptr[i] - zp_offset) - tmp_a * zp_in - | |||
| (mul_24 * (scale_ptr[i] - zp_scale) * (mean_ptr[i] - zp_mean)) / tmp; | |||
| alpha_addr_[i] = tmp_a; | |||
| beta_addr_[i] = tmp_b; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_BatchNorm, CPUOpCoderCreator<BatchNormInt8Coder>) | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| @@ -0,0 +1,49 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_BATCHNORM_INT8_CODER_H_ | |||
| #define MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_BATCHNORM_INT8_CODER_H_ | |||
| #include <cstring> | |||
| #include <vector> | |||
| #include "coder/opcoders/op_coder.h" | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| namespace mindspore::lite::micro::nnacl { | |||
| class BatchNormInt8Coder final : public OperatorCoder { | |||
| public: | |||
| BatchNormInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const Model::Node *node, size_t node_index, Target target) | |||
| : OperatorCoder(in_tensors, out_tensors, node, node_index, target) { | |||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter_); | |||
| } | |||
| ~BatchNormInt8Coder() override = default; | |||
| int Prepare(CoderContext *const context) override; | |||
| int DoCode(CoderContext *context) override; | |||
| private: | |||
| int InitConstTensor(); | |||
| int InitFusedConstTensor(); | |||
| float *alpha_addr_{nullptr}; | |||
| float *beta_addr_{nullptr}; | |||
| BatchNormParameter *batchnorm_param_; | |||
| }; | |||
| } // namespace mindspore::lite::micro::nnacl | |||
| #endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_NNACL_BATCHNORM_INT8_CODER_H_ | |||
| @@ -21,6 +21,7 @@ | |||
| #include "nnacl/int8/quantize.h" | |||
| #include "coder/opcoders/file_collector.h" | |||
| #include "coder/log.h" | |||
| #include "coder/opcoders/parallel.h" | |||
| #include "coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" | |||
| int MallocQuantArgForConcat(ConcatQuantArg *quant_arg, size_t input_num) { | |||
| @@ -37,7 +38,6 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) { | |||
| concat_param_->input_shapes_ = nullptr; | |||
| size_t input_num = input_tensors().size(); | |||
| MS_CHECK_PTR(input_data_); | |||
| MS_CHECK_RET_CODE(MallocQuantArgForConcat(&concat_param_->quant_arg_, input_num), | |||
| "Null pointer reference: quant_concat_parm_->in_quant_args_."); | |||
| for (int i = 0; i < static_cast<int>(input_num); i++) { | |||
| @@ -60,7 +60,10 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) { | |||
| concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num)); | |||
| MS_CHECK_PTR(concat_param_->input_shapes_); | |||
| for (int i = 0; i < static_cast<int>(input_num); i++) { | |||
| concat_param_->input_shapes_[i] = reinterpret_cast<int *>(input_tensors().at(i)->shape().data()); | |||
| auto in_shape = input_tensors_.at(i)->shape(); | |||
| concat_param_->input_shapes_[i] = reinterpret_cast<int *>(malloc(in_shape.size() * sizeof(int))); | |||
| MS_CHECK_PTR(concat_param_->input_shapes_[i]); | |||
| memcpy(reinterpret_cast<void *>(concat_param_->input_shapes_[i]), in_shape.data(), sizeof(int) * in_shape.size()); | |||
| } | |||
| before_axis_size = 1; | |||
| @@ -70,7 +73,10 @@ int ConcatInt8Coder::Prepare(CoderContext *const context) { | |||
| int64_t after_axis_size = 1; | |||
| int output_dim = static_cast<int>(output_tensor_->shape().size()); | |||
| concat_param_->output_shapes_ = output_tensor_->shape().data(); | |||
| concat_param_->output_shapes_ = reinterpret_cast<int *>(malloc(output_dim * sizeof(int))); | |||
| MS_CHECK_PTR(concat_param_->output_shapes_); | |||
| memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor_->shape().data(), | |||
| sizeof(int) * output_dim); | |||
| for (int i = axis_ + 1; i < output_dim; i++) { | |||
| after_axis_size *= concat_param_->output_shapes_[i]; | |||
| } | |||
| @@ -84,7 +90,8 @@ int ConcatInt8Coder::DoCode(CoderContext *const context) { | |||
| count_unit_ = thread_num_ > 1 ? UP_DIV(before_axis_size, thread_num_) : before_axis_size; | |||
| concat_param_->count_unit_ = count_unit_; | |||
| Collect(context, {"nnacl/int8/concat_int8.h"}, {"concat_int8.c"}); | |||
| Collect(context, {"nnacl/int8/concat_int8.h", "wrapper/int8/concat_int8_wrapper.h"}, | |||
| {"concat_int8.c", "concat_int8_wrapper.c"}); | |||
| NNaclInt8Serializer code; | |||
| int in_tensor_count = input_tensors().size(); | |||
| @@ -96,15 +103,12 @@ int ConcatInt8Coder::DoCode(CoderContext *const context) { | |||
| } | |||
| code.CodeStruct("concat_param", *concat_param_, in_tensor_count, input_tensor_->shape().size(), | |||
| output_tensor_->shape().size()); | |||
| if (thread_num_ > 1) { | |||
| code.CodeBaseStruct("ConcatInt8Args", "args", "input_data", output_tensor_, "&concat_param", axis_, | |||
| before_axis_size, count_unit_); | |||
| code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "ConcatInt8Run", "&args", "thread_num"); | |||
| code.CodeBaseStruct("ConcatInt8Args", kRunArgs, "input_data", output_tensor_, "&concat_param", axis_, | |||
| before_axis_size, count_unit_); | |||
| if (support_parallel_) { | |||
| code.CodeFunction(kParallelLaunch, gThreadPool, "ConcatInt8Run", kRunArgsAddr, gThreadNum); | |||
| } else { | |||
| int task_id = 0; | |||
| int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_); | |||
| code.CodeFunction("Int8Concat", "input_data", output_tensor_, "&concat_param", axis_, real_dst_count, task_id); | |||
| code.CodeFunction("ConcatInt8Run", kRunArgsAddr, kDefaultTaskId); | |||
| } | |||
| context->AppendCode(code.str()); | |||
| return RET_OK; | |||