diff --git a/mindspore/lite/include/errorcode.h b/mindspore/lite/include/errorcode.h index cf61470844..ff74b02dac 100644 --- a/mindspore/lite/include/errorcode.h +++ b/mindspore/lite/include/errorcode.h @@ -28,13 +28,14 @@ using STATUS = int; constexpr int RET_OK = 0; /**< No error occurs. */ /* Common error code, range: [-1, -100)*/ -constexpr int RET_ERROR = -1; /**< Common error code. */ -constexpr int RET_NULL_PTR = -2; /**< NULL pointer returned.*/ -constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/ -constexpr int RET_NO_CHANGE = -4; /**< No change. */ -constexpr int RET_SUCCESS_EXIT = -5; /**< No error but exit. */ -constexpr int RET_MEMORY_FAILED = -6; /**< Fail to create memory. */ -constexpr int RET_NOT_SUPPORT = -7; /**< Fail to support. */ +constexpr int RET_ERROR = -1; /**< Common error code. */ +constexpr int RET_NULL_PTR = -2; /**< NULL pointer returned.*/ +constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/ +constexpr int RET_NO_CHANGE = -4; /**< No change. */ +constexpr int RET_SUCCESS_EXIT = -5; /**< No error but exit. */ +constexpr int RET_MEMORY_FAILED = -6; /**< Fail to create memory. */ +constexpr int RET_NOT_SUPPORT = -7; /**< Fail to support. */ +constexpr int RET_THREAD_POOL_ERROR = -8; /**< Error occur in thread pool. */ /* Executor error code, range: [-100,-200) */ constexpr int RET_OUT_OF_TENSOR_RANGE = -100; /**< Failed to check range. */ diff --git a/mindspore/lite/src/errorcode.cc b/mindspore/lite/src/errorcode.cc index 1c555fccb2..3fc8cb4f68 100644 --- a/mindspore/lite/src/errorcode.cc +++ b/mindspore/lite/src/errorcode.cc @@ -29,6 +29,7 @@ std::string GetErrorInfo(STATUS status) { {RET_SUCCESS_EXIT, "No error but exit."}, {RET_MEMORY_FAILED, "Fail to create memory."}, {RET_NOT_SUPPORT, "Fail to support."}, + {RET_THREAD_POOL_ERROR, "Thread pool error."}, {RET_OUT_OF_TENSOR_RANGE, "Failed to check range."}, {RET_INPUT_TENSOR_ERROR, "Failed to check input tensor."}, {RET_REENTRANT_ERROR, "Exist executor running."}, diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc index 6d7f5aad04..6227f71ce4 100644 --- a/mindspore/lite/src/lite_kernel.cc +++ b/mindspore/lite/src/lite_kernel.cc @@ -17,13 +17,14 @@ #include "src/lite_kernel.h" #include #include +#include #include "src/tensor.h" #include "src/common/utils.h" namespace mindspore::kernel { using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; - +#ifdef SUPPORT_TRAIN void *LiteKernel::workspace_ = nullptr; void LiteKernel::AllocWorkspace(size_t size) { @@ -40,7 +41,7 @@ void LiteKernel::FreeWorkspace() { free(workspace_); workspace_ = nullptr; } - +#endif bool LiteKernel::IsReady(const std::vector &scope_tensors) { return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) { if (IsContain(scope_tensors, kernel_in_tensor)) { diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index d1466c4d27..4e4a3a7d75 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -169,15 +169,18 @@ class LiteKernel { void set_desc(const KernelKey kernel_key) { desc_ = kernel_key; } const mindspore::lite::PrimitiveC *GetPrimitive() const { return primitive_; } + + SubGraphType subgraph_type() const { return this->subgraph_type_; } + + virtual std::string ToString() const; + +#ifdef SUPPORT_TRAIN void set_workspace_size(size_t value) { workspace_size_ = value; } size_t workspace_size() { return workspace_size_; } static void AllocWorkspace(size_t size); static void FreeWorkspace(); void *workspace() { return workspace_; } - - SubGraphType subgraph_type() const { return this->subgraph_type_; } - - virtual std::string ToString() const; +#endif protected: bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->infer_flag()); } @@ -195,9 +198,11 @@ class LiteKernel { bool train_mode_ = false; bool trainable_ = false; // paramaters of this Kernel are trained in Train Session bool is_model_output_ = false; + SubGraphType subgraph_type_ = kNotSubGraph; +#ifdef SUPPORT_TRAIN size_t workspace_size_ = 0; static void *workspace_; - SubGraphType subgraph_type_ = kNotSubGraph; +#endif }; typedef LiteKernel *(*KernelCreator)(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/thread_pool.c b/mindspore/lite/src/runtime/thread_pool.c index ae20ee7f81..cfe3ea2220 100644 --- a/mindspore/lite/src/runtime/thread_pool.c +++ b/mindspore/lite/src/runtime/thread_pool.c @@ -40,7 +40,7 @@ #endif #define RET_TP_OK (0) -#define RET_TP_ERROR (1) +#define RET_TP_ERROR (-8) #define RET_TP_SYSTEM_ERROR (-1) #define MAX_THREAD_NUM (8) @@ -49,6 +49,8 @@ typedef struct { int (*func)(void *arg, int); void *content; + int *return_code; + int task_num; } Task; typedef struct Thread { @@ -669,8 +671,11 @@ int DistributeTask(struct ThreadPool *thread_pool, Task *task, int task_num) { return RET_TP_ERROR; } bool k_success_flag = false; - int size = thread_pool->thread_num < task_num ? thread_pool->thread_num : task_num; - for (int i = 0; i < size - 1; ++i) { + if (thread_pool->thread_num < task_num) { + LOG_ERROR("task_num: %d should not be larger than thread num: %d", task_num, thread_pool->thread_num); + return RET_TP_ERROR; + } + for (int i = 0; i < task_num - 1; ++i) { do { k_success_flag = true; if (!PushTaskToQueue(thread_pool, i, task)) { @@ -683,9 +688,18 @@ int DistributeTask(struct ThreadPool *thread_pool, Task *task, int task_num) { LOG_ERROR("task->func is nullptr"); return RET_TP_ERROR; } - task->func(task->content, size - 1); + if (task->task_num <= task_num - 1) { + LOG_ERROR("task_num out of range in master thread"); + return RET_TP_ERROR; + } + task->return_code[task_num - 1] = task->func(task->content, task_num - 1); // wait WaitAllThread(thread_pool); + for (size_t i = 0; i < task->task_num; i++) { + if (task->return_code[i] != 0) { + return task->return_code[i]; + } + } return RET_TP_OK; } @@ -697,14 +711,26 @@ int AddTask(struct ThreadPool *thread_pool, int func(void *, int), void *content // if single thread, run master thread if (thread_pool->thread_num <= 1 || task_num <= 1) { for (int i = 0; i < task_num; ++i) { - func(content, i); + int ret = func(content, i); + if (ret != 0) { + return ret; + } } return RET_TP_OK; } Task task; task.func = func; task.content = content; - return DistributeTask(thread_pool, &task, task_num); + task.return_code = (int *)malloc(sizeof(int) * task_num); + task.task_num = task_num; + if (task.return_code == NULL) { + LOG_ERROR("malloc return code return nullptr"); + return RET_TP_ERROR; + } + memset(task.return_code, 0, sizeof(int) * task_num); + int ret = DistributeTask(thread_pool, &task, task_num); + free(task.return_code); + return ret; } int ParallelLaunch(struct ThreadPool *thread_pool, int (*func)(void *, int), void *content, int task_num) { @@ -730,7 +756,11 @@ void ThreadRun(Thread *thread) { LOG_ERROR("task->func is nullptr"); return; } - task->func(task->content, thread_id); + if (task->task_num <= thread_id) { + LOG_ERROR("task_num out of range in worker thread"); + return; + } + task->return_code[thread_id] = task->func(task->content, thread_id); atomic_fetch_sub_explicit(&thread->task_size, 1, memory_order_release); spin_count = 0; sem_trywait(&thread->sem); diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc index 6b8b215050..23d798ed86 100644 --- a/mindspore/lite/src/tensor.cc +++ b/mindspore/lite/src/tensor.cc @@ -209,10 +209,15 @@ int32_t Tensor::Width() const { } size_t Tensor::Size() const { - size_t size = DataTypeSize(this->data_type_); - size *= (format_ == schema::Format::Format_NC4HW4 || format_ == schema::Format::Format_NHWC4) ? ElementsC4Num() - : ElementsNum(); - return size; + size_t element_size = DataTypeSize(this->data_type_); + auto element_num = (format_ == schema::Format::Format_NC4HW4 || format_ == schema::Format::Format_NHWC4) + ? ElementsC4Num() + : ElementsNum(); + if (element_num < 0) { + MS_LOG(ERROR) << "Element number of tensor should large than 0 : " << element_num; + return 0; + } + return element_size * element_num; } int Tensor::ElementsNum() const { diff --git a/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg b/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg index 4aacb8ecf4..05da988dd3 100644 --- a/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg +++ b/mindspore/lite/test/models_with_several_inputs_or_without_outputs.cfg @@ -1,7 +1,7 @@ lite-model_arbitrary-image-stylization-inceptionv3_dr_transfer_1.tflite lite-model_arbitrary-image-stylization-inceptionv3_int8_transfer_1.tflite lite-model_arbitrary-image-stylization-inceptionv3_fp16_transfer_1.tflite;2 -lite-model_arbitrary-image-stylization-inceptionv3-dynamic-shapes_dr_transfer_1.tflite +# lite-model_arbitrary-image-stylization-inceptionv3-dynamic-shapes_dr_transfer_1.tflite # has nan input for rsqrt lite-model_cartoongan_dr_1.tflite mindspore_efficientnet_b0.mindir mindspore_efficientnet_b4minus.mindir @@ -17,7 +17,7 @@ lite-model_east-text-detector_dr_1.tflite magenta_arbitrary-image-stylization-v1-256_fp16_transfer_1.tflite;2 magenta_arbitrary-image-stylization-v1-256_int8_transfer_1.tflite magenta_arbitrary-image-stylization-v1-256_int8_prediction_1.tflite -albert_lite_base_squadv1_1.tflite;3 +# albert_lite_base_squadv1_1.tflite;3 # input need in range [0,2) efficientnet_lite0_int8_2.tflite efficientnet_lite1_int8_2.tflite efficientnet_lite2_int8_2.tflite @@ -26,7 +26,7 @@ efficientnet_lite4_int8_2.tflite mtk_transformer_encoder.tflite mtk_transformer_decoder_joint.tflite ml_ei_facedetection.onnx -mobilebert_1_default_1.tflite;3 +# mobilebert_1_default_1.tflite;3 # input need in range [0,2) quant_aware_bank_card_detection_inception.onnx quant_aware_bank_card_recognition_fcny.onnx quant_aware_identify_card_detect.onnx diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index b2d6e79f44..3020e0dbc7 100644 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -784,6 +784,9 @@ function Run_x86_sse() { # Run converted models which has several inputs or does not need to be cared about the accuracy: while read line; do model_name=${line%%;*} + if [[ $model_name == \#* ]]; then + continue + fi model_name_len=${#model_name} input_params=${line:model_name_len+1} input_num=${input_params%%;*} @@ -1163,7 +1166,7 @@ function Run_arm64() { else run_result='arm64_gpu_fp16: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi - #sleep 1 + #sleep 1 done < ${models_gpu_fp16_config} # Run GPU weightquant converted models: @@ -1182,7 +1185,7 @@ function Run_arm64() { else run_result='arm64_gpu_weightquant: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi - #sleep 1 + #sleep 1 done < ${models_gpu_weightquant_config} # Run mindir converted models: @@ -1206,8 +1209,8 @@ function Run_arm64() { # run benchmark test without clib data echo ${model_name} >> "${run_arm64_log_file}" echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt - echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" - echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt + echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" + echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" if [ $? = 0 ]; then run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} @@ -1266,6 +1269,9 @@ function Run_arm64() { # Run converted models which has several inputs or does not need to be cared about the accuracy: while read line; do model_name=${line%%;*} + if [[ $model_name == \#* ]]; then + continue + fi model_name_len=${#model_name} input_params=${line:model_name_len+1} input_num=${input_params%%;*} @@ -1379,15 +1385,15 @@ echo ${basepath} while getopts "r:m:d:" opt; do case ${opt} in r) - release_path=${OPTARG} + release_path=${OPTARG} echo "release_path is ${OPTARG}" ;; m) - models_path=${OPTARG} + models_path=${OPTARG} echo "models_path is ${OPTARG}" ;; d) - device_id=${OPTARG} + device_id=${OPTARG} echo "device_id is ${OPTARG}" ;; ?) diff --git a/mindspore/lite/test/st/sub_graph_test.cc b/mindspore/lite/test/st/sub_graph_test.cc index fbe29e4f7b..e05946a2bc 100644 --- a/mindspore/lite/test/st/sub_graph_test.cc +++ b/mindspore/lite/test/st/sub_graph_test.cc @@ -32,160 +32,286 @@ namespace mindspore { class SubGraphTest : public mindspore::CommonTest { public: - SubGraphTest() {} + SubGraphTest() = default; }; TEST_F(SubGraphTest, RecursiveSubGraphTest) { - // add0 partial1 2 3 tensor0 1 2 - auto add_0 = std::make_unique(); - add_0->inputIndex = {0, 1}; - add_0->outputIndex = {2}; - add_0->primitive = std::make_unique(); - add_0->primitive->value.type = schema::PrimitiveType_Add; - auto add_0_prim = new schema::AddT; - add_0_prim->activationType = schema::ActivationType_NO_ACTIVATION; - add_0->primitive->value.value = add_0_prim; - add_0->name = "Add0"; - auto partial_1 = std::make_unique(); - partial_1->inputIndex = {2}; - partial_1->outputIndex = {7}; - partial_1->primitive = std::make_unique(); - partial_1->primitive->value.type = schema::PrimitiveType_Partial; - auto partial_1_prim = new schema::PartialT; - partial_1_prim->subGraphIndex = 1; - partial_1->primitive->value.value = partial_1_prim; - partial_1->name = "Partial1"; - auto partial_2 = std::make_unique(); - partial_2->inputIndex = {2}; - partial_2->outputIndex = {7}; - partial_2->primitive = std::make_unique(); - partial_2->primitive->value.type = schema::PrimitiveType_Partial; - auto partial_2_prim = new schema::PartialT; - partial_2_prim->subGraphIndex = 2; - partial_2->primitive->value.value = partial_2_prim; - partial_2->name = "Partial2"; - auto partial_3 = std::make_unique(); - partial_3->inputIndex = {4, 6}; - partial_3->outputIndex = {7}; - partial_3->primitive = std::make_unique(); - partial_3->primitive->value.type = schema::PrimitiveType_Partial; - auto partial_3_prim = new schema::PartialT; - partial_3_prim->subGraphIndex = 3; - partial_3->primitive->value.value = partial_3_prim; - partial_3->name = "Partial3"; - auto tensor_0 = std::make_unique(); - tensor_0->nodeType = schema::NodeType::NodeType_Parameter; - tensor_0->format = schema::Format_NHWC; - tensor_0->dataType = TypeId::kNumberTypeFloat32; - tensor_0->dims = {1, 2}; - auto tensor_1 = std::make_unique(); - tensor_1->nodeType = schema::NodeType::NodeType_ValueNode; - tensor_1->format = schema::Format_NHWC; - tensor_1->dataType = TypeId::kNumberTypeFloat32; - tensor_1->dims = {1, 2}; - auto tensor_2 = std::make_unique(); - tensor_2->nodeType = schema::NodeType::NodeType_Parameter; - tensor_2->format = schema::Format_NHWC; - tensor_2->dataType = TypeId::kNumberTypeFloat32; - auto sub_graph_0 = std::make_unique(); - sub_graph_0->name = "main_graph"; - sub_graph_0->inputIndices = {0}; - sub_graph_0->outputIndices = {7}; - sub_graph_0->nodeIndices = {0, 1, 2}; - sub_graph_0->tensorIndices = {0, 1, 2, 7}; - // add1 tensor3 4 - auto add_1 = std::make_unique(); - add_1->inputIndex = {2, 3}; - add_1->outputIndex = {4}; - add_1->primitive = std::make_unique(); - add_1->primitive->value.type = schema::PrimitiveType_Add; - auto add_1_prim = new schema::AddT; - add_1_prim->activationType = schema::ActivationType_NO_ACTIVATION; - add_1->primitive->value.value = add_1_prim; - add_1->name = "Add1"; - auto tensor_3 = std::make_unique(); - tensor_3->nodeType = schema::NodeType::NodeType_ValueNode; - tensor_3->format = schema::Format_NHWC; - tensor_3->dataType = TypeId::kNumberTypeFloat32; - tensor_3->dims = {1, 2}; - auto tensor_4 = std::make_unique(); - tensor_4->nodeType = schema::NodeType::NodeType_Parameter; - tensor_4->format = schema::Format_NHWC; - tensor_4->dataType = TypeId::kNumberTypeFloat32; - auto sub_graph_1 = std::make_unique(); - sub_graph_1->name = "sub_graph_1"; - sub_graph_1->inputIndices = {2}; - sub_graph_1->outputIndices = {7}; - sub_graph_1->nodeIndices = {4, 3}; - sub_graph_1->tensorIndices = {2, 3, 4, 7}; - // add2 tensor5 6 - auto add_2 = std::make_unique(); - add_2->inputIndex = {2, 5}; - add_2->outputIndex = {6}; - add_2->primitive = std::make_unique(); - add_2->primitive->value.type = schema::PrimitiveType_Add; - auto add_2_prim = new schema::AddT; - add_2_prim->activationType = schema::ActivationType_NO_ACTIVATION; - add_2->primitive->value.value = add_2_prim; - add_2->name = "Add2"; - auto tensor_5 = std::make_unique(); - tensor_5->nodeType = schema::NodeType::NodeType_ValueNode; - tensor_5->format = schema::Format_NHWC; - tensor_5->dataType = TypeId::kNumberTypeFloat32; - tensor_5->dims = {1, 2}; - auto tensor_6 = std::make_unique(); - tensor_6->nodeType = schema::NodeType::NodeType_Parameter; - tensor_6->format = schema::Format_NHWC; - tensor_6->dataType = TypeId::kNumberTypeFloat32; - auto sub_graph_2 = std::make_unique(); - sub_graph_2->name = "sub_graph_2"; - sub_graph_2->inputIndices = {2}; - sub_graph_2->outputIndices = {7}; - sub_graph_2->nodeIndices = {5, 3}; - sub_graph_2->tensorIndices = {2, 5, 6, 7}; - // add3 tensor7 - auto add_3 = std::make_unique(); - add_3->inputIndex = {4, 6}; - add_3->outputIndex = {7}; - add_3->primitive = std::make_unique(); - add_3->primitive->value.type = schema::PrimitiveType_Add; - auto add_3_prim = new schema::AddT; - add_3_prim->activationType = schema::ActivationType_NO_ACTIVATION; - add_3->primitive->value.value = add_3_prim; - add_3->name = "Add3"; - auto tensor_7 = std::make_unique(); - tensor_7->nodeType = schema::NodeType::NodeType_Parameter; - tensor_7->format = schema::Format_NHWC; - tensor_7->dataType = TypeId::kNumberTypeFloat32; - auto sub_graph_3 = std::make_unique(); - sub_graph_3->name = "sub_graph_3"; - sub_graph_3->inputIndices = {4, 6}; - sub_graph_3->outputIndices = {7}; - sub_graph_3->nodeIndices = {6}; - sub_graph_3->tensorIndices = {4, 6, 7}; - - // make graph auto meta_graph = std::make_shared(); + meta_graph->allTensors.resize(16); + { // subgraph-0 + { // add-0 + auto add_0 = std::make_unique(); + add_0->inputIndex = {0, 1}; + add_0->outputIndex = {2}; + add_0->primitive = std::make_unique(); + add_0->primitive->value.type = schema::PrimitiveType_Add; + auto add_0_prim = new schema::AddT; + add_0_prim->activationType = schema::ActivationType_NO_ACTIVATION; + add_0->primitive->value.value = add_0_prim; + add_0->name = "Add0"; + auto tensor_0 = std::make_unique(); + tensor_0->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_0->format = schema::Format_NHWC; + tensor_0->dataType = TypeId::kNumberTypeFloat32; + tensor_0->dims = {1}; + auto tensor_1 = std::make_unique(); + tensor_1->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_1->format = schema::Format_NHWC; + tensor_1->dataType = TypeId::kNumberTypeFloat32; + tensor_1->dims = {1}; + tensor_1->data.resize(sizeof(float)); + auto data1 = reinterpret_cast(tensor_1->data.data()); + ASSERT_NE(data1, nullptr); + data1[0] = 1; + auto tensor_2 = std::make_unique(); + tensor_2->nodeType = schema::NodeType::NodeType_Parameter; + tensor_2->format = schema::Format_NHWC; + tensor_2->dataType = TypeId::kNumberTypeFloat32; + meta_graph->nodes.emplace_back(std::move(add_0)); + meta_graph->allTensors[0] = std::move(tensor_0); + meta_graph->allTensors[1] = std::move(tensor_1); + meta_graph->allTensors[2] = std::move(tensor_2); + } + { // add-1 + auto add_1 = std::make_unique(); + add_1->inputIndex = {2, 3}; + add_1->outputIndex = {4}; + add_1->primitive = std::make_unique(); + add_1->primitive->value.type = schema::PrimitiveType_Add; + auto add_1_prim = new schema::AddT; + add_1_prim->activationType = schema::ActivationType_NO_ACTIVATION; + add_1->primitive->value.value = add_1_prim; + add_1->name = "Add1"; + auto tensor_3 = std::make_unique(); + tensor_3->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_3->format = schema::Format_NHWC; + tensor_3->dataType = TypeId::kNumberTypeFloat32; + tensor_3->dims = {1}; + tensor_3->data.resize(sizeof(float)); + auto data3 = reinterpret_cast(tensor_3->data.data()); + ASSERT_NE(data3, nullptr); + data3[0] = 1; + auto tensor_4 = std::make_unique(); + tensor_4->nodeType = schema::NodeType::NodeType_Parameter; + tensor_4->format = schema::Format_NHWC; + tensor_4->dataType = TypeId::kNumberTypeFloat32; + meta_graph->nodes.emplace_back(std::move(add_1)); + meta_graph->allTensors[3] = std::move(tensor_3); + meta_graph->allTensors[4] = std::move(tensor_4); + } + { // partial cond + auto partial_cond = std::make_unique(); + partial_cond->inputIndex = {4}; + partial_cond->outputIndex = {9}; + partial_cond->primitive = std::make_unique(); + partial_cond->primitive->value.type = schema::PrimitiveType_Partial; + auto partial_cond_prim = new schema::PartialT; + partial_cond_prim->subGraphIndex = 1; + partial_cond->primitive->value.value = partial_cond_prim; + partial_cond->name = "partial_cond"; + meta_graph->nodes.emplace_back(std::move(partial_cond)); + } + { // add-5 + auto add_5 = std::make_unique(); + add_5->inputIndex = {9, 13}; + add_5->outputIndex = {14}; + add_5->primitive = std::make_unique(); + add_5->primitive->value.type = schema::PrimitiveType_Add; + auto add_5_prim = new schema::AddT; + add_5_prim->activationType = schema::ActivationType_NO_ACTIVATION; + add_5->primitive->value.value = add_5_prim; + add_5->name = "Add5"; + auto tensor_13 = std::make_unique(); + tensor_13->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_13->format = schema::Format_NHWC; + tensor_13->dataType = TypeId::kNumberTypeFloat32; + tensor_13->dims = {1}; + tensor_13->data.resize(sizeof(float)); + auto data13 = reinterpret_cast(tensor_13->data.data()); + ASSERT_NE(data13, nullptr); + data13[0] = 1; + auto tensor_14 = std::make_unique(); + tensor_14->nodeType = schema::NodeType::NodeType_Parameter; + tensor_14->format = schema::Format_NHWC; + tensor_14->dataType = TypeId::kNumberTypeFloat32; + meta_graph->nodes.emplace_back(std::move(add_5)); + meta_graph->allTensors[13] = std::move(tensor_13); + meta_graph->allTensors[14] = std::move(tensor_14); + } + auto sub_graph_0 = std::make_unique(); + sub_graph_0->name = "main_graph"; + sub_graph_0->inputIndices = {0}; + sub_graph_0->outputIndices = {14}; + sub_graph_0->nodeIndices = {0, 1, 2, 3}; + sub_graph_0->tensorIndices = {0, 1, 2, 3, 4, 9, 13, 14}; + meta_graph->subGraph.emplace_back(std::move(sub_graph_0)); + } + { // subgraph-1 + { // add-2 + auto add_2 = std::make_unique(); + add_2->inputIndex = {4, 5}; + add_2->outputIndex = {6}; + add_2->primitive = std::make_unique(); + add_2->primitive->value.type = schema::PrimitiveType_Add; + auto add_2_prim = new schema::AddT; + add_2_prim->activationType = schema::ActivationType_NO_ACTIVATION; + add_2->primitive->value.value = add_2_prim; + add_2->name = "Add2"; + auto tensor_5 = std::make_unique(); + tensor_5->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_5->format = schema::Format_NHWC; + tensor_5->dataType = TypeId::kNumberTypeFloat32; + tensor_5->dims = {1}; + tensor_5->data.resize(sizeof(float)); + auto data5 = reinterpret_cast(tensor_5->data.data()); + ASSERT_NE(data5, nullptr); + data5[0] = 1; + auto tensor_6 = std::make_unique(); + tensor_6->nodeType = schema::NodeType::NodeType_Parameter; + tensor_6->format = schema::Format_NHWC; + tensor_6->dataType = TypeId::kNumberTypeFloat32; + meta_graph->nodes.emplace_back(std::move(add_2)); + meta_graph->allTensors[5] = std::move(tensor_5); + meta_graph->allTensors[6] = std::move(tensor_6); + } + { // less + auto less = std::make_unique(); + less->inputIndex = {6, 15}; + less->outputIndex = {7}; + less->primitive = std::make_unique(); + less->primitive->value.type = schema::PrimitiveType_Less; + auto less_prim = new schema::LessT; + less->primitive->value.value = less_prim; + less->name = "less"; + auto tensor_15 = std::make_unique(); + tensor_15->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_15->format = schema::Format_NHWC; + tensor_15->dataType = TypeId::kNumberTypeFloat32; + tensor_15->dims = {1}; + tensor_15->data.resize(sizeof(float)); + auto data15 = reinterpret_cast(tensor_15->data.data()); + ASSERT_NE(data15, nullptr); + data15[0] = 1; + auto tensor_7 = std::make_unique(); + tensor_7->nodeType = schema::NodeType::NodeType_Parameter; + tensor_7->format = schema::Format_NHWC; + tensor_7->dataType = TypeId::kNumberTypeFloat32; + meta_graph->nodes.emplace_back(std::move(less)); + meta_graph->allTensors[7] = std::move(tensor_7); + meta_graph->allTensors[15] = std::move(tensor_15); + } + { // switch + auto switchop = std::make_unique(); + switchop->inputIndex = {7, 4}; + switchop->outputIndex = {8, 9}; + switchop->primitive = std::make_unique(); + switchop->primitive->value.type = schema::PrimitiveType_Switch; + auto switch_prim = new schema::SwitchT; + switchop->primitive->value.value = switch_prim; + switchop->name = "switch"; + auto tensor_8 = std::make_unique(); + tensor_8->nodeType = schema::NodeType::NodeType_Parameter; + tensor_8->format = schema::Format_NHWC; + tensor_8->dataType = TypeId::kNumberTypeFloat32; + auto tensor_9 = std::make_unique(); + tensor_9->nodeType = schema::NodeType::NodeType_Parameter; + tensor_9->format = schema::Format_NHWC; + tensor_9->dataType = TypeId::kNumberTypeFloat32; + meta_graph->nodes.emplace_back(std::move(switchop)); + meta_graph->allTensors[8] = std::move(tensor_8); + meta_graph->allTensors[9] = std::move(tensor_9); + } + { // partial body + auto partial_body = std::make_unique(); + partial_body->inputIndex = {8}; + partial_body->outputIndex = {4}; + partial_body->primitive = std::make_unique(); + partial_body->primitive->value.type = schema::PrimitiveType_Partial; + auto partial_body_prim = new schema::PartialT; + partial_body_prim->subGraphIndex = 2; + partial_body->primitive->value.value = partial_body_prim; + partial_body->name = "partial_body"; + meta_graph->nodes.emplace_back(std::move(partial_body)); + } + auto sub_graph_1 = std::make_unique(); + sub_graph_1->name = "while_cond"; + sub_graph_1->inputIndices = {4}; + sub_graph_1->outputIndices = {9}; + sub_graph_1->nodeIndices = {4, 5, 6, 7}; + sub_graph_1->tensorIndices = {4, 5, 6, 7, 8, 9, 15}; + meta_graph->subGraph.emplace_back(std::move(sub_graph_1)); + } + { // subgraph-2 + { // add-3 + auto add_3 = std::make_unique(); + add_3->inputIndex = {8, 10}; + add_3->outputIndex = {11}; + add_3->primitive = std::make_unique(); + add_3->primitive->value.type = schema::PrimitiveType_Add; + auto add_3_prim = new schema::AddT; + add_3_prim->activationType = schema::ActivationType_NO_ACTIVATION; + add_3->primitive->value.value = add_3_prim; + add_3->name = "Add3"; + auto tensor_10 = std::make_unique(); + tensor_10->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_10->format = schema::Format_NHWC; + tensor_10->dataType = TypeId::kNumberTypeFloat32; + tensor_10->dims = {1}; + tensor_10->data.resize(sizeof(float)); + auto data10 = reinterpret_cast(tensor_10->data.data()); + ASSERT_NE(data10, nullptr); + data10[0] = 1; + auto tensor_11 = std::make_unique(); + tensor_11->nodeType = schema::NodeType::NodeType_Parameter; + tensor_11->format = schema::Format_NHWC; + tensor_11->dataType = TypeId::kNumberTypeFloat32; + meta_graph->nodes.emplace_back(std::move(add_3)); + meta_graph->allTensors[10] = std::move(tensor_10); + meta_graph->allTensors[11] = std::move(tensor_11); + } + { // add-4 + auto add_4 = std::make_unique(); + add_4->inputIndex = {11, 12}; + add_4->outputIndex = {4}; + add_4->primitive = std::make_unique(); + add_4->primitive->value.type = schema::PrimitiveType_Add; + auto add_4_prim = new schema::AddT; + add_4_prim->activationType = schema::ActivationType_NO_ACTIVATION; + add_4->primitive->value.value = add_4_prim; + add_4->name = "Add4"; + auto tensor_12 = std::make_unique(); + tensor_12->nodeType = schema::NodeType::NodeType_ValueNode; + tensor_12->format = schema::Format_NHWC; + tensor_12->dataType = TypeId::kNumberTypeFloat32; + tensor_12->dims = {1}; + tensor_12->data.resize(sizeof(float)); + auto data12 = reinterpret_cast(tensor_12->data.data()); + ASSERT_NE(data12, nullptr); + data12[0] = 1; + meta_graph->nodes.emplace_back(std::move(add_4)); + meta_graph->allTensors[12] = std::move(tensor_12); + } + { // partial cond + auto partial_cond = std::make_unique(); + partial_cond->inputIndex = {4}; + partial_cond->outputIndex = {9}; + partial_cond->primitive = std::make_unique(); + partial_cond->primitive->value.type = schema::PrimitiveType_Partial; + auto partial_cond_prim = new schema::PartialT; + partial_cond_prim->subGraphIndex = 1; + partial_cond->primitive->value.value = partial_cond_prim; + partial_cond->name = "partial_cond1"; + meta_graph->nodes.emplace_back(std::move(partial_cond)); + } + auto sub_graph_2 = std::make_unique(); + sub_graph_2->name = "while_body"; + sub_graph_2->inputIndices = {8}; + sub_graph_2->outputIndices = {9}; + sub_graph_2->nodeIndices = {8, 9, 10}; + sub_graph_2->tensorIndices = {8, 10, 11, 12, 4, 9}; + meta_graph->subGraph.emplace_back(std::move(sub_graph_2)); + } meta_graph->name = "graph"; - meta_graph->nodes.emplace_back(std::move(add_0)); - meta_graph->nodes.emplace_back(std::move(partial_1)); - meta_graph->nodes.emplace_back(std::move(partial_2)); - meta_graph->nodes.emplace_back(std::move(partial_3)); - meta_graph->nodes.emplace_back(std::move(add_1)); - meta_graph->nodes.emplace_back(std::move(add_2)); - meta_graph->nodes.emplace_back(std::move(add_3)); - meta_graph->allTensors.emplace_back(std::move(tensor_0)); - meta_graph->allTensors.emplace_back(std::move(tensor_1)); - meta_graph->allTensors.emplace_back(std::move(tensor_2)); - meta_graph->allTensors.emplace_back(std::move(tensor_3)); - meta_graph->allTensors.emplace_back(std::move(tensor_4)); - meta_graph->allTensors.emplace_back(std::move(tensor_5)); - meta_graph->allTensors.emplace_back(std::move(tensor_6)); - meta_graph->allTensors.emplace_back(std::move(tensor_7)); - meta_graph->subGraph.emplace_back(std::move(sub_graph_0)); - meta_graph->subGraph.emplace_back(std::move(sub_graph_1)); - meta_graph->subGraph.emplace_back(std::move(sub_graph_2)); - meta_graph->subGraph.emplace_back(std::move(sub_graph_3)); meta_graph->version = lite::Version(); // ----------------------------------------------------------------------- lite::Storage::Save(*meta_graph, diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index d382e850b4..2eaf256ac8 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -33,11 +33,37 @@ static const char *DELIM_COLON = ":"; static const char *DELIM_COMMA = ","; static const char *DELIM_SLASH = "/"; -int Benchmark::GenerateRandomData(size_t size, void *data) { +int Benchmark::GenerateRandomData(size_t size, void *data, TypeId data_type) { MS_ASSERT(data != nullptr); - char *casted_data = static_cast(data); - for (size_t i = 0; i < size; i++) { - casted_data[i] = static_cast(i); + switch (data_type) { + case kNumberTypeFloat32: + case kNumberTypeFloat: + FillInputData(size, data, std::uniform_real_distribution(-0.5f, 0.5f)); + break; + case kNumberTypeFloat64: + FillInputData(size, data, std::uniform_real_distribution(-0.5, 0.5)); + break; + case kNumberTypeInt64: + FillInputData(size, data, std::uniform_int_distribution(0, 99)); + break; + case kNumberTypeInt: + case kNumberTypeInt32: + FillInputData(size, data, std::uniform_int_distribution(0, 99)); + break; + case kNumberTypeInt16: + FillInputData(size, data, std::uniform_int_distribution(0, 99)); + break; + case kNumberTypeInt8: + FillInputData(size, data, std::uniform_int_distribution(-127, 127)); + break; + case kNumberTypeUInt8: + FillInputData(size, data, std::uniform_int_distribution(0, 254)); + break; + default: + char *casted_data = static_cast(data); + for (size_t i = 0; i < size; i++) { + casted_data[i] = static_cast(i); + } } return RET_OK; } @@ -54,7 +80,7 @@ int Benchmark::GenerateInputData() { if (tensor->data_type() == kObjectTypeString) { status = StringsToMSTensor({"you're the best."}, tensor); } else { - status = GenerateRandomData(tensor->Size(), input_data); + status = GenerateRandomData(tensor->Size(), input_data, tensor->data_type()); } if (status != RET_OK) { std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h index 744653a08a..cf8d97a954 100644 --- a/mindspore/lite/tools/benchmark/benchmark.h +++ b/mindspore/lite/tools/benchmark/benchmark.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -124,7 +125,7 @@ class MS_API Benchmark { // call GenerateRandomData to fill inputTensors int GenerateInputData(); - int GenerateRandomData(size_t size, void *data); + int GenerateRandomData(size_t size, void *data, TypeId data_type); int ReadInputFile(); @@ -224,6 +225,14 @@ class MS_API Benchmark { } } + template + void FillInputData(int size, void *data, Distribution distribution) { + MS_ASSERT(data != nullptr); + int elements_num = size / sizeof(T); + (void)std::generate_n(static_cast(data), elements_num, + [&]() { return static_cast(distribution(random_engine_)); }); + } + int MarkPerformance(); int MarkAccuracy(); @@ -249,6 +258,7 @@ class MS_API Benchmark { KernelCallBack before_call_back_; KernelCallBack after_call_back_; + std::mt19937 random_engine_; }; int MS_API RunBenchmark(int argc, const char **argv);