From 1c76edf24c4b37e9b4d0cf8ad0783ae151f80f98 Mon Sep 17 00:00:00 2001 From: sunsuodong Date: Tue, 3 Nov 2020 16:55:53 +0800 Subject: [PATCH] benchmark support string --- mindspore/lite/include/lite_utils.h | 2 +- mindspore/lite/src/scheduler.cc | 3 + mindspore/lite/test/models_tflite.cfg | 1 + mindspore/lite/tools/benchmark/benchmark.cc | 289 +++++++++++++------- mindspore/lite/tools/benchmark/benchmark.h | 29 +- 5 files changed, 205 insertions(+), 119 deletions(-) diff --git a/mindspore/lite/include/lite_utils.h b/mindspore/lite/include/lite_utils.h index b97eccf23a..c059b6e346 100644 --- a/mindspore/lite/include/lite_utils.h +++ b/mindspore/lite/include/lite_utils.h @@ -49,6 +49,6 @@ int MS_API StringsToMSTensor(const std::vector &inputs, tensor::MST /// \brief Get string vector from MSTensor. /// \param[in] MSTensor. /// \return string vector. -std::vector MSTensorToStrings(const tensor::MSTensor *tensor); +std::vector MS_API MSTensorToStrings(const tensor::MSTensor *tensor); } // namespace mindspore::lite #endif // MINDSPORE_LITE_INCLUDE_LITE_UTILS_H_ diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 38b736d544..d96dd0755b 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -299,6 +299,9 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector &in_tens TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector &in_tensors) { for (const auto &tensor : in_tensors) { auto dtype = tensor->data_type(); + if (dtype == kObjectTypeString) { + return kNumberTypeFloat32; + } if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8 || dtype == kNumberTypeInt32 || dtype == kNumberTypeBool) { return dtype; diff --git a/mindspore/lite/test/models_tflite.cfg b/mindspore/lite/test/models_tflite.cfg index b984dbe566..60dd353403 100644 --- a/mindspore/lite/test/models_tflite.cfg +++ b/mindspore/lite/test/models_tflite.cfg @@ -131,3 +131,4 @@ mtk_276landmark_0913.tflite mtk_face_recognition.tflite mtk_convert_model.tflite mtk_model_face_dress_fp16.tflite +smartreply.tflite diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index 3ed0700f6f..0d7f10d634 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -20,6 +20,7 @@ #undef __STDC_FORMAT_MACROS #include #include +#include #include "include/context.h" #include "include/ms_tensor.h" #include "include/version.h" @@ -49,10 +50,13 @@ int Benchmark::GenerateInputData() { MS_LOG(ERROR) << "MallocData for inTensor failed"; return RET_ERROR; } - MS_ASSERT(tensor->GetData() != nullptr); - auto tensor_byte_size = tensor->Size(); - auto status = GenerateRandomData(tensor_byte_size, input_data); - if (status != 0) { + int status; + if (tensor->data_type() == kObjectTypeString) { + status = StringsToMSTensor({"you're the best."}, tensor); + } else { + status = GenerateRandomData(tensor->Size(), input_data); + } + if (status != RET_OK) { std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status; return status; @@ -141,39 +145,64 @@ int Benchmark::ReadCalibData() { in_file.close(); return RET_ERROR; } - - std::string line; - MS_LOG(INFO) << "Start reading calibData file"; + std::string line; std::string tensor_name; + while (!in_file.eof()) { getline(in_file, line); std::stringstream string_line1(line); size_t dim = 0; string_line1 >> tensor_name >> dim; std::vector dims; - size_t shape_size = 1; for (size_t i = 0; i < dim; i++) { size_t tmp_dim; string_line1 >> tmp_dim; dims.push_back(tmp_dim); - shape_size *= tmp_dim; } + auto ret = ReadTensorData(in_file, tensor_name, dims); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Read tensor data failed, tensor name: " << tensor_name; + return RET_ERROR; + } + } + in_file.close(); + MS_LOG(INFO) << "Finish reading calibData file"; + return RET_OK; +} - getline(in_file, line); - std::stringstream string_line2(line); - std::vector tensor_data; +int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, + const std::vector &dims) { + std::string line; + getline(in_file_stream, line); + std::stringstream line_stream(line); + tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(tensor_name); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name; + return RET_ERROR; + } + std::vector data; + std::vector strings_data; + size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); + if (tensor->data_type() == kObjectTypeString) { + strings_data.push_back(line); + for (size_t i = 1; i < shape_size; i++) { + getline(in_file_stream, line); + strings_data.push_back(line); + } + } else { for (size_t i = 0; i < shape_size; i++) { float tmp_data; - string_line2 >> tmp_data; - tensor_data.push_back(tmp_data); + line_stream >> tmp_data; + data.push_back(tmp_data); } - - auto *check_tensor = new CheckTensor(dims, tensor_data); - this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor)); } - in_file.close(); - MS_LOG(INFO) << "Finish reading calibData file"; + auto *check_tensor = new (std::nothrow) CheckTensor(dims, data, strings_data); + if (check_tensor == nullptr) { + MS_LOG(ERROR) << "Now CheckTensor failed, tensor name: " << tensor_name; + return RET_ERROR; + } + this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor)); return RET_OK; } @@ -181,80 +210,110 @@ int Benchmark::CompareOutput() { std::cout << "================ Comparing Output data ================" << std::endl; float total_bias = 0; int total_size = 0; - bool has_error = false; for (const auto &calib_tensor : benchmark_data_) { std::string node_or_tensor_name = calib_tensor.first; - auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name); - mindspore::tensor::MSTensor *tensor = nullptr; - if (tensors.empty() || tensors.size() != 1) { - MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name - << " or node has more than one output tensor, switch to GetOutputByTensorName"; - tensor = session_->GetOutputByTensorName(node_or_tensor_name); - if (tensor == nullptr) { - MS_LOG(ERROR) << "Cannot find output tensor " << node_or_tensor_name << ", get model output failed"; - return RET_ERROR; - } - } else { - tensor = tensors.front(); - } - MS_ASSERT(tensor->MutableData() != nullptr); - float bias = 0; - switch (msCalibDataType) { - case TypeId::kNumberTypeFloat: { - bias = CompareData(node_or_tensor_name, tensor->shape(), static_cast(tensor->MutableData())); - break; - } - case TypeId::kNumberTypeInt8: { - bias = CompareData(node_or_tensor_name, tensor->shape(), static_cast(tensor->MutableData())); - break; - } - case TypeId::kNumberTypeUInt8: { - bias = - CompareData(node_or_tensor_name, tensor->shape(), static_cast(tensor->MutableData())); - break; - } - case TypeId::kNumberTypeInt32: { - bias = - CompareData(node_or_tensor_name, tensor->shape(), static_cast(tensor->MutableData())); - break; - } - default: - MS_LOG(ERROR) << "Datatype " << msCalibDataType << " is not supported."; - return RET_ERROR; + tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(node_or_tensor_name); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name; + return RET_ERROR; } - if (bias >= 0) { - total_bias += bias; - total_size++; + int ret; + if (tensor->data_type() == kObjectTypeString) { + ret = CompareStringData(node_or_tensor_name, tensor); } else { - has_error = true; - break; + ret = CompareDataGetTotalBiasAndSize(node_or_tensor_name, tensor, &total_bias, &total_size); + } + if (ret != RET_OK) { + MS_LOG(ERROR) << "Error in CompareData"; + std::cerr << "Error in CompareData" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + return ret; } } + float mean_bias; + if (total_size != 0) { + mean_bias = total_bias / total_size * 100; + } else { + mean_bias = 0; + } - if (!has_error) { - float mean_bias; - if (total_size != 0) { - mean_bias = total_bias / total_size * 100; - } else { - mean_bias = 0; - } + std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; - std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl; - std::cout << "=======================================================" << std::endl << std::endl; + if (mean_bias > this->flags_->accuracy_threshold_) { + MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; + std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; + return RET_ERROR; + } + return RET_OK; +} - if (mean_bias > this->flags_->accuracy_threshold_) { - MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; - std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; - return RET_ERROR; - } else { - return RET_OK; - } +tensor::MSTensor *Benchmark::GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name) { + tensor::MSTensor *tensor = nullptr; + auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name); + if (tensors.empty() || tensors.size() != 1) { + MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name + << " or node has more than one output tensor, switch to GetOutputByTensorName"; + tensor = session_->GetOutputByTensorName(node_or_tensor_name); } else { - MS_LOG(ERROR) << "Error in CompareData"; - std::cerr << "Error in CompareData" << std::endl; - std::cout << "=======================================================" << std::endl << std::endl; + tensor = tensors.front(); + } + return tensor; +} + +int Benchmark::CompareStringData(const std::string &name, tensor::MSTensor *tensor) { + auto iter = this->benchmark_data_.find(name); + if (iter != this->benchmark_data_.end()) { + std::vector calib_strings = iter->second->strings_data; + std::vector output_strings = MSTensorToStrings(tensor); + size_t compare_num = std::min(calib_strings.size(), output_strings.size()); + size_t print_num = std::min(compare_num, static_cast(5)); + + std::cout << "Data of node " << name << " : " << std::endl; + for (size_t i = 0; i < compare_num; i++) { + if (i < print_num) { + std::cout << " " << output_strings[i] << std::endl; + } + if (calib_strings[i] != output_strings[i]) { + MS_LOG(ERROR) << ""; + return RET_ERROR; + } + } + } + return RET_OK; +} + +int Benchmark::CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias, + int *total_size) { + float bias = 0; + switch (msCalibDataType) { + case TypeId::kNumberTypeFloat: { + bias = CompareData(name, tensor->shape(), tensor->MutableData()); + break; + } + case TypeId::kNumberTypeInt8: { + bias = CompareData(name, tensor->shape(), tensor->MutableData()); + break; + } + case TypeId::kNumberTypeUInt8: { + bias = CompareData(name, tensor->shape(), tensor->MutableData()); + break; + } + case TypeId::kNumberTypeInt32: { + bias = CompareData(name, tensor->shape(), tensor->MutableData()); + break; + } + default: + MS_LOG(ERROR) << "Datatype " << msCalibDataType << " is not supported."; + return RET_ERROR; + } + if (bias < 0) { + MS_LOG(ERROR) << "CompareData failed, name: " << name; return RET_ERROR; } + *total_bias += bias; + *total_size += 1; + return RET_OK; } int Benchmark::MarkPerformance() { @@ -316,42 +375,25 @@ int Benchmark::MarkPerformance() { int Benchmark::MarkAccuracy() { MS_LOG(INFO) << "MarkAccuracy"; std::cout << "MarkAccuracy" << std::endl; - for (auto &msInput : ms_inputs_) { - switch (msInput->data_type()) { - case TypeId::kNumberTypeFloat: - PrintInputData(msInput); - break; - case TypeId::kNumberTypeFloat32: - PrintInputData(msInput); - break; - case TypeId::kNumberTypeInt8: - PrintInputData(msInput); - break; - case TypeId::kNumberTypeUInt8: - PrintInputData(msInput); - break; - case TypeId::kNumberTypeInt32: - PrintInputData(msInput); - break; - default: - MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported."; - return RET_ERROR; - } + + auto status = PrintInputData(); + if (status != RET_OK) { + MS_LOG(ERROR) << "PrintInputData error " << status; + std::cerr << "PrintInputData error " << status << std::endl; + return status; } - auto status = session_->RunGraph(); + status = session_->RunGraph(); if (status != RET_OK) { MS_LOG(ERROR) << "Inference error " << status; std::cerr << "Inference error " << status << std::endl; return status; } - status = ReadCalibData(); if (status != RET_OK) { MS_LOG(ERROR) << "Read calib data error " << status; std::cerr << "Read calib data error " << status << std::endl; return status; } - status = CompareOutput(); if (status != RET_OK) { MS_LOG(ERROR) << "Compare output error " << status; @@ -361,6 +403,43 @@ int Benchmark::MarkAccuracy() { return RET_OK; } +int Benchmark::PrintInputData() { + for (size_t i = 0; i < ms_inputs_.size(); i++) { + auto input = ms_inputs_[i]; + MS_ASSERT(input != nullptr); + auto tensor_data_type = input->data_type(); + + std::cout << "InData" << i << ": "; + if (tensor_data_type == TypeId::kObjectTypeString) { + std::vector output_strings = MSTensorToStrings(input); + size_t print_num = std::min(output_strings.size(), static_cast(20)); + for (size_t j = 0; j < print_num; j++) { + std::cout << output_strings[j] << std::endl; + } + continue; + } + size_t print_num = std::min(input->ElementsNum(), 20); + const void *in_data = input->MutableData(); + + for (size_t j = 0; j < print_num; j++) { + if (tensor_data_type == TypeId::kNumberTypeFloat32 || tensor_data_type == TypeId::kNumberTypeFloat) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeInt8) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeUInt8) { + std::cout << static_cast(in_data)[j] << " "; + } else if (tensor_data_type == TypeId::kNumberTypeInt32) { + std::cout << static_cast(in_data)[j] << " "; + } else { + MS_LOG(ERROR) << "Datatype: " << tensor_data_type << " is not supported."; + return RET_ERROR; + } + } + std::cout << std::endl; + } + return RET_OK; +} + int Benchmark::RunBenchmark() { auto start_prepare_time = GetTimeUs(); // Load graph diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h index 5795e9ed3c..0d68f1a3b8 100644 --- a/mindspore/lite/tools/benchmark/benchmark.h +++ b/mindspore/lite/tools/benchmark/benchmark.h @@ -42,12 +42,15 @@ constexpr float relativeTolerance = 1e-5; constexpr float absoluteTolerance = 1e-8; struct MS_API CheckTensor { - CheckTensor(const std::vector &shape, const std::vector &data) { + CheckTensor(const std::vector &shape, const std::vector &data, + const std::vector &strings_data = {""}) { this->shape = shape; this->data = data; + this->strings_data = strings_data; } std::vector shape; std::vector data; + std::vector strings_data; }; class MS_API BenchmarkFlags : public virtual FlagParser { @@ -127,27 +130,27 @@ class MS_API Benchmark { int ReadCalibData(); + int ReadTensorData(std::ifstream &in_file_stream, const std::string &tensor_name, const std::vector &dims); + int CompareOutput(); + tensor::MSTensor *GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name); + + int CompareStringData(const std::string &name, tensor::MSTensor *tensor); + + int CompareDataGetTotalBiasAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_bias, + int *total_size); + int InitCallbackParameter(); int PrintResult(const std::vector &title, const std::map> &result); - template - void PrintInputData(tensor::MSTensor *input) { - MS_ASSERT(input != nullptr); - static int i = 0; - auto inData = reinterpret_cast(input->MutableData()); - std::cout << "InData" << i++ << ": "; - for (size_t j = 0; j < 20; j++) { - std::cout << static_cast(inData[j]) << " "; - } - std::cout << std::endl; - } + int PrintInputData(); // tensorData need to be converter first template - float CompareData(const std::string &nodeName, std::vector msShape, T *msTensorData) { + float CompareData(const std::string &nodeName, std::vector msShape, const void *tensor_data) { + const T *msTensorData = static_cast(tensor_data); auto iter = this->benchmark_data_.find(nodeName); if (iter != this->benchmark_data_.end()) { std::vector castedMSShape;