diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 887da2c9de..3c71257eb9 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -119,4 +119,5 @@ if (BUILD_DEVICE) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profile) endif() diff --git a/mindspore/lite/include/lite_session.h b/mindspore/lite/include/lite_session.h index 57683641c8..ea762f0f60 100644 --- a/mindspore/lite/include/lite_session.h +++ b/mindspore/lite/include/lite_session.h @@ -27,7 +27,8 @@ namespace mindspore { namespace session { struct CallBackParam { - std::string name_callback_aram; + std::string name_callback_param; + std::string type_callback_param; }; using KernelCallBack = std::function inputs, diff --git a/mindspore/lite/src/common/ms_tensor_utils.cc b/mindspore/lite/src/common/ms_tensor_utils.cc index 529d9f47bc..44d04afbc8 100644 --- a/mindspore/lite/src/common/ms_tensor_utils.cc +++ b/mindspore/lite/src/common/ms_tensor_utils.cc @@ -33,7 +33,7 @@ std::vector PackToMSTensors(const std::vector &in_tensors) MS_LOG(ERROR) << "new LiteTensor failed"; return ret; } - ret.emplace_back(); + ret.emplace_back(ms_tensor); } return ret; } diff --git a/mindspore/lite/src/executor.cc b/mindspore/lite/src/executor.cc index 27289dfa0b..b6159f9ba9 100644 --- a/mindspore/lite/src/executor.cc +++ b/mindspore/lite/src/executor.cc @@ -43,7 +43,8 @@ int Executor::Run(std::vector &inputs, std::vectorMallocData(); } session::CallBackParam callbackParam; - callbackParam.name_callback_aram = kernel->Name(); + callbackParam.name_callback_param = kernel->Name(); + callbackParam.type_callback_param = kernel->type_str(); if (before != nullptr) { if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) { diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/opencl/opencl_executor.cc index d59e4e2f70..216c9121fc 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.cc @@ -48,7 +48,7 @@ int OpenCLExecutor::Run(std::vector &inputs, std::vectorMallocData(); } session::CallBackParam callbackParam; - callbackParam.name_callback_aram = kernel->Name(); + callbackParam.name_callback_param = kernel->Name(); if (before != nullptr) { if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) { diff --git a/mindspore/lite/tools/converter/quantizer/post_training.cc b/mindspore/lite/tools/converter/quantizer/post_training.cc index c1dd6f6c7b..28e61c5c3c 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training.cc +++ b/mindspore/lite/tools/converter/quantizer/post_training.cc @@ -791,14 +791,14 @@ STATUS PostTrainingQuantizer::DoInference() { [&](const std::vector &beforeInputs, const std::vector &beforeOutputs, const mindspore::session::CallBackParam &callParam) -> bool { - if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) { + if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) { return false; } auto tensor = beforeInputs[0]; const float *tData = static_cast(tensor->MutableData()); size_t shapeSize = tensor->ElementsNum(); vector data(tData, tData + shapeSize); - this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetInputDivergInfo()); + this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetInputDivergInfo()); return true; }; // func @@ -806,14 +806,14 @@ STATUS PostTrainingQuantizer::DoInference() { const std::vector &afterInputs, const std::vector &afterOutputs, const mindspore::session::CallBackParam &callParam) -> bool { - if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, afterOutputs) != RET_OK) { + if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, afterOutputs) != RET_OK) { return false; } auto tensor = afterOutputs[0]; const float *tensor_data = static_cast(tensor->MutableData()); size_t shape_size = tensor->ElementsNum(); vector data(tensor_data, tensor_data + shape_size); - this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetOutputDivergInfo()); + this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetOutputDivergInfo()); return true; }; status = session_->RunGraph(beforeCallBack, afterCallBack); @@ -844,14 +844,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() { [&](const std::vector &beforeInputs, const std::vector &beforeOutputs, const mindspore::session::CallBackParam &callParam) { - if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) { + if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) { return false; } auto tensor = beforeInputs[0]; const float *tensor_data = static_cast(tensor->MutableData()); size_t shape_size = tensor->ElementsNum(); vector data(tensor_data, tensor_data + shape_size); - this->calibrator_->UpdateDataFrequency(callParam.name_callback_aram, data, tensor->shape(), + this->calibrator_->UpdateDataFrequency(callParam.name_callback_param, data, tensor->shape(), this->calibrator_->GetInputDivergInfo()); return true; }; @@ -860,14 +860,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() { [&](const std::vector &after_inputs, const std::vector &after_outputs, const mindspore::session::CallBackParam &call_param) { - if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_aram, after_outputs) != RET_OK) { + if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_param, after_outputs) != RET_OK) { return false; } auto tensor = after_outputs[0]; const float *tenosr_data = static_cast(tensor->MutableData()); size_t shape_size = tensor->ElementsNum(); vector data(tenosr_data, tenosr_data + shape_size); - this->calibrator_->UpdateDataFrequency(call_param.name_callback_aram, data, tensor->shape(), + this->calibrator_->UpdateDataFrequency(call_param.name_callback_param, data, tensor->shape(), this->calibrator_->GetOutputDivergInfo()); return true; }; diff --git a/mindspore/lite/tools/time_profile/CMakeLists.txt b/mindspore/lite/tools/time_profile/CMakeLists.txt new file mode 100644 index 0000000000..4eb61c24e3 --- /dev/null +++ b/mindspore/lite/tools/time_profile/CMakeLists.txt @@ -0,0 +1,18 @@ +# add shared link library + +set(COMMON_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc + ) + +add_executable(timeprofile + ${CMAKE_CURRENT_SOURCE_DIR}/main.cc + ${CMAKE_CURRENT_SOURCE_DIR}/time_profile.cc + ${COMMON_SRC}) + +if (PLATFORM_ARM32 OR PLATFORM_ARM64) + target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY}) +else() + target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY} pthread) +endif() diff --git a/mindspore/lite/tools/time_profile/main.cc b/mindspore/lite/tools/time_profile/main.cc new file mode 100644 index 0000000000..73b537a1b7 --- /dev/null +++ b/mindspore/lite/tools/time_profile/main.cc @@ -0,0 +1,19 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/time_profile/time_profile.h" + +int main(int argc, const char **argv) { return mindspore::lite::RunTimeProfile(argc, argv); } diff --git a/mindspore/lite/tools/time_profile/time_profile.cc b/mindspore/lite/tools/time_profile/time_profile.cc new file mode 100644 index 0000000000..d17cdc2dad --- /dev/null +++ b/mindspore/lite/tools/time_profile/time_profile.cc @@ -0,0 +1,372 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/time_profile/time_profile.h" +#define __STDC_FORMAT_MACROS +#include +#undef __STDC_FORMAT_MACROS +#include +#include +#include +#include "include/ms_tensor.h" +#include "utils/log_adapter.h" +#include "include/context.h" + +namespace mindspore { +namespace lite { +int TimeProfile::GenerateRandomData(size_t size, void *data) { + MS_ASSERT(data != nullptr); + char *castedData = static_cast(data); + for (size_t i = 0; i < size; i++) { + castedData[i] = static_cast(i); + } + return RET_OK; +} + +int TimeProfile::GenerateInputData() { + for (auto tensor : ms_inputs_) { + MS_ASSERT(tensor != nullptr); + auto input_data = tensor->MutableData(); + if (input_data == nullptr) { + MS_LOG(ERROR) << "MallocData for inTensor failed"; + } + MS_ASSERT(tensor->GetData() != nullptr); + auto tensor_byte_size = tensor->Size(); + auto status = GenerateRandomData(tensor_byte_size, input_data); + if (status != RET_OK) { + MS_LOG(ERROR) << "Generate RandomData for inTensor failed %d" << status; + } + } + return RET_OK; +} + +int TimeProfile::ReadInputFile() { + if (ms_inputs_.empty()) { + return RET_OK; + } + + auto inTensor = ms_inputs_.at(0); + MS_ASSERT(inTensor != nullptr); + + size_t size; + char *bin_buf = ReadFile(_flags->in_data_path_.c_str(), &size); + + auto tensor_data_size = inTensor->Size(); + if (size != tensor_data_size) { + MS_LOG(ERROR) << "Input binary file size error, required: %zu, in fact: %zu" << tensor_data_size << size; + } + auto input_data = inTensor->MutableData(); + memcpy(input_data, bin_buf, tensor_data_size); + return RET_OK; +} + +int TimeProfile::LoadInput() { + ms_inputs_ = session_->GetInputs(); + if (_flags->in_data_path_.empty()) { + auto status = GenerateInputData(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Generate input data error " << status; + } + } else { + auto status = ReadInputFile(); + if (status != RET_OK) { + MS_LOG(ERROR) << "ReadInputFile error, " << status; + } + } + return RET_OK; +} + +int TimeProfile::InitSession() { + size_t size = 0; + char *graph_buf = ReadFile(_flags->model_path_.c_str(), &size); + if (graph_buf == nullptr) { + MS_LOG(ERROR) << "Load graph failed, path %s" << _flags->model_path_; + } + + auto ctx = new lite::Context; + ctx->cpu_bind_mode_ = static_cast(_flags->cpu_bind_mode_); + ctx->device_ctx_.type = lite::DT_CPU; + ctx->thread_num_ = _flags->num_threads_; + + session_ = session::LiteSession::CreateSession(ctx); + if (session_ == nullptr) { + MS_LOG(ERROR) << "New session failed while running."; + } + + return RET_OK; +} + +int TimeProfile::InitCallbackParameter() { + // before callback + before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const session::CallBackParam &callParam) { + if (before_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; + } + if (before_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; + } + if (op_times_by_type_.find(callParam.type_callback_param) == op_times_by_type_.end()) { + op_times_by_type_.insert(std::make_pair(callParam.type_callback_param, std::make_pair(0, 0.0f))); + } + if (op_times_by_name_.find(callParam.name_callback_param) == op_times_by_name_.end()) { + op_times_by_name_.insert(std::make_pair(callParam.name_callback_param, std::make_pair(0, 0.0f))); + } + + op_call_times_total_++; + op_begin_ = GetTimeUs(); + return true; + }; + + // after callback + after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, + const session::CallBackParam &call_param) { + uint64_t opEnd = GetTimeUs(); + + if (after_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; + } + if (after_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; + } + + float cost = static_cast(opEnd - op_begin_) / 1000.0f; + op_cost_total_ += cost; + op_times_by_type_[call_param.type_callback_param].first++; + op_times_by_type_[call_param.type_callback_param].second += cost; + op_times_by_name_[call_param.name_callback_param].first++; + op_times_by_name_[call_param.name_callback_param].second += cost; + return true; + }; + + return RET_OK; +} + +int TimeProfile::Init() { + if (this->_flags == nullptr) { + return 1; + } + MS_LOG(INFO) << "ModelPath = " << _flags->model_path_; + MS_LOG(INFO) << "InDataPath = " << _flags->in_data_path_; + MS_LOG(INFO) << "LoopCount = " << _flags->loop_count_; + MS_LOG(INFO) << "NumThreads = " << _flags->num_threads_; + if (_flags->cpu_bind_mode_ == -1) { + MS_LOG(INFO) << "cpuBindMode = MID_CPU"; + } else if (_flags->cpu_bind_mode_ == 1) { + MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU"; + } else { + MS_LOG(INFO) << "cpuBindMode = NO_BIND"; + } + + if (_flags->model_path_.empty()) { + MS_LOG(ERROR) << "modelPath is required"; + return 1; + } + + auto status = InitSession(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Init session failed."; + return RET_ERROR; + } + + status = this->LoadInput(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Load input failed."; + return RET_ERROR; + } + + status = InitCallbackParameter(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Init callback Parameter failed."; + return RET_ERROR; + } + + return RET_OK; +} + +int TimeProfile::PrintResult(const std::vector &title, + const std::map> &result) { + std::vector columnLenMax(5); + std::vector> rows; + + for (auto &iter : result) { + char stringBuf[5][100] = {}; + std::vector columns; + int len; + + len = iter.first.size(); + if (len > columnLenMax.at(0)) { + columnLenMax.at(0) = len + 4; + } + columns.push_back(iter.first); + + len = sprintf_s(stringBuf[1], 100, "%f", iter.second.second / _flags->loop_count_); + if (len > columnLenMax.at(1)) { + columnLenMax.at(1) = len + 4; + } + columns.emplace_back(stringBuf[1]); + + len = sprintf_s(stringBuf[2], 100, "%f", iter.second.second / op_cost_total_); + if (len > columnLenMax.at(2)) { + columnLenMax.at(2) = len + 4; + } + columns.emplace_back(stringBuf[2]); + + len = sprintf_s(stringBuf[3], 100, "%d", iter.second.first); + if (len > columnLenMax.at(3)) { + columnLenMax.at(3) = len + 4; + } + columns.emplace_back(stringBuf[3]); + + len = sprintf_s(stringBuf[4], 100, "%f", iter.second.second); + if (len > columnLenMax.at(4)) { + columnLenMax.at(4) = len + 4; + } + columns.emplace_back(stringBuf[4]); + + rows.push_back(columns); + } + + printf("-------------------------------------------------------------------------\n"); + for (int i = 0; i < 5; i++) { + auto printBuf = title[i]; + if (printBuf.size() > columnLenMax.at(i)) { + columnLenMax.at(i) = printBuf.size(); + } + printBuf.resize(columnLenMax.at(i), ' '); + printf("%s", printBuf.c_str()); + } + printf("\n"); + for (int i = 0; i < rows.size(); i++) { + for (int j = 0; j < 5; j++) { + auto printBuf = rows[i][j]; + printBuf.resize(columnLenMax.at(j), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + } + return RET_OK; +} + +int TimeProfile::RunTimeProfile() { + uint64_t time_avg = 0; + + // Load graph + std::string modelName = _flags->model_path_.substr(_flags->model_path_.find_last_of("/") + 1); + + MS_LOG(INFO) << "start reading model file"; + size_t size = 0; + char *graphBuf = ReadFile(_flags->model_path_.c_str(), &size); + if (graphBuf == nullptr) { + MS_LOG(ERROR) << "Load graph failed while running %s", modelName.c_str(); + return 1; + } + auto model = lite::Model::Import(graphBuf, size); + + auto ret = session_->CompileGraph(model.get()); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Compile graph failed."; + return RET_ERROR; + } + + // load input + MS_LOG(INFO) << "start generate input data"; + auto status = LoadInput(); + if (status != 0) { + MS_LOG(ERROR) << "Generate input data error"; + return status; + } + + // run graph and test + for (int i = 0; i < _flags->loop_count_; i++) { + session_->BindThread(true); + uint64_t run_begin = GetTimeUs(); + + ret = session_->RunGraph(before_call_back_, after_call_back_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run graph failed."; + } + auto outputs = session_->GetOutputs(); + + uint64_t run_end = GetTimeUs(); + uint64_t time = run_end - run_begin; + time_avg += time; + session_->BindThread(false); + /* + for(auto &output : outputs) { + for (auto &outputTensor : output.second) { + delete outputTensor; + } + }*/ + outputs.clear(); + } + + time_avg /= _flags->loop_count_; + float runCost = static_cast(time_avg) / 1000.0f; + + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run session failed."; + } + + const std::vector per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + const std::vector per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + PrintResult(per_op_name, op_times_by_name_); + PrintResult(per_op_type, op_times_by_type_); + + printf("\n total time: %5.5f ms, kernel cost: %5.5f ms \n\n", runCost, op_cost_total_ / _flags->loop_count_); + printf("-------------------------------------------------------------------------\n"); + + for (auto &msInput : ms_inputs_) { + delete msInput; + } + ms_inputs_.clear(); + delete graphBuf; + return ret; +} + +int RunTimeProfile(int argc, const char **argv) { + TimeProfileFlags flags; + Option err = flags.ParseFlags(argc, argv); + + if (err.IsSome()) { + std::cerr << err.Get() << std::endl; + std::cerr << flags.Usage() << std::endl; + return -1; + } + + if (flags.help) { + std::cerr << flags.Usage() << std::endl; + return 0; + } + + TimeProfile time_profile(&flags); + auto ret = time_profile.Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init TimeProfile failed."; + } + + ret = time_profile.RunTimeProfile(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run TimeProfile failed."; + } + + return RET_OK; +} + +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/time_profile/time_profile.h b/mindspore/lite/tools/time_profile/time_profile.h new file mode 100644 index 0000000000..eaad720d34 --- /dev/null +++ b/mindspore/lite/tools/time_profile/time_profile.h @@ -0,0 +1,95 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINNIE_TIMEPROFILE_TIMEPROFILE_H_ +#define MINNIE_TIMEPROFILE_TIMEPROFILE_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "tools/common/flag_parser.h" +#include "src/common/file_utils.h" +#include "src/common/utils.h" +#include "schema/model_generated.h" +#include "include/model.h" +#include "include/lite_session.h" + + +namespace mindspore { +namespace lite { + +class MS_API TimeProfileFlags : public virtual FlagParser { + public: + TimeProfileFlags() { + AddFlag(&TimeProfileFlags::model_path_, "modelPath", "Input model path", ""); + AddFlag(&TimeProfileFlags::in_data_path_, "inDataPath", "Input data path, if not set, use random input", ""); + AddFlag(&TimeProfileFlags::cpu_bind_mode_, "cpuBindMode", + "Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1); + AddFlag(&TimeProfileFlags::loop_count_, "loopCount", "Run loop count", 10); + AddFlag(&TimeProfileFlags::num_threads_, "numThreads", "Run threads number", 2); + } + + ~TimeProfileFlags() override = default; + + public: + std::string model_path_; + std::string in_data_path_; + int cpu_bind_mode_ = 1; + int loop_count_; + int num_threads_; +}; + +class MS_API TimeProfile { + public: + explicit TimeProfile(TimeProfileFlags *flags) : _flags(flags) {} + ~TimeProfile() = default; + + int Init(); + int RunTimeProfile(); + + private: + int GenerateRandomData(size_t size, void *data); + int GenerateInputData(); + int LoadInput(); + int ReadInputFile(); + int InitCallbackParameter(); + int InitSession(); + int PrintResult(const std::vector& title, const std::map>& result); + + private: + TimeProfileFlags *_flags; + std::vector ms_inputs_; + session::LiteSession *session_; + + // callback parameters + uint64_t op_begin_ = 0; + int op_call_times_total_ = 0; + float op_cost_total_ = 0.0f; + std::map> op_times_by_type_; + std::map> op_times_by_name_; + + session::KernelCallBack before_call_back_; + session::KernelCallBack after_call_back_; +}; + +int MS_API RunTimeProfile(int argc, const char **argv); +} // namespace lite +} // namespace mindspore +#endif // MINNIE_TIMEPROFILE_TIMEPROFILE_H_