Browse Source

add time profile

tags/v0.7.0-beta
yeyunpeng 5 years ago
parent
commit
32d85c0f94
10 changed files with 519 additions and 12 deletions
  1. +1
    -0
      mindspore/lite/CMakeLists.txt
  2. +2
    -1
      mindspore/lite/include/lite_session.h
  3. +1
    -1
      mindspore/lite/src/common/ms_tensor_utils.cc
  4. +2
    -1
      mindspore/lite/src/executor.cc
  5. +1
    -1
      mindspore/lite/src/runtime/opencl/opencl_executor.cc
  6. +8
    -8
      mindspore/lite/tools/converter/quantizer/post_training.cc
  7. +18
    -0
      mindspore/lite/tools/time_profile/CMakeLists.txt
  8. +19
    -0
      mindspore/lite/tools/time_profile/main.cc
  9. +372
    -0
      mindspore/lite/tools/time_profile/time_profile.cc
  10. +95
    -0
      mindspore/lite/tools/time_profile/time_profile.h

+ 1
- 0
mindspore/lite/CMakeLists.txt View File

@@ -119,4 +119,5 @@ if (BUILD_DEVICE)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profile)
endif()

+ 2
- 1
mindspore/lite/include/lite_session.h View File

@@ -27,7 +27,8 @@
namespace mindspore {
namespace session {
struct CallBackParam {
std::string name_callback_aram;
std::string name_callback_param;
std::string type_callback_param;
};

using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs,


+ 1
- 1
mindspore/lite/src/common/ms_tensor_utils.cc View File

@@ -33,7 +33,7 @@ std::vector<MSTensor *> PackToMSTensors(const std::vector<Tensor *> &in_tensors)
MS_LOG(ERROR) << "new LiteTensor failed";
return ret;
}
ret.emplace_back();
ret.emplace_back(ms_tensor);
}
return ret;
}


+ 2
- 1
mindspore/lite/src/executor.cc View File

@@ -43,7 +43,8 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten
output->MallocData();
}
session::CallBackParam callbackParam;
callbackParam.name_callback_aram = kernel->Name();
callbackParam.name_callback_param = kernel->Name();
callbackParam.type_callback_param = kernel->type_str();

if (before != nullptr) {
if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) {


+ 1
- 1
mindspore/lite/src/runtime/opencl/opencl_executor.cc View File

@@ -48,7 +48,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
output->MallocData();
}
session::CallBackParam callbackParam;
callbackParam.name_callback_aram = kernel->Name();
callbackParam.name_callback_param = kernel->Name();

if (before != nullptr) {
if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) {


+ 8
- 8
mindspore/lite/tools/converter/quantizer/post_training.cc View File

@@ -791,14 +791,14 @@ STATUS PostTrainingQuantizer::DoInference() {
[&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs,
const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs,
const mindspore::session::CallBackParam &callParam) -> bool {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) {
return false;
}
auto tensor = beforeInputs[0];
const float *tData = static_cast<const float *>(tensor->MutableData());
size_t shapeSize = tensor->ElementsNum();
vector<float> data(tData, tData + shapeSize);
this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetInputDivergInfo());
this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetInputDivergInfo());
return true;
};
// func
@@ -806,14 +806,14 @@ STATUS PostTrainingQuantizer::DoInference() {
const std::vector<mindspore::tensor::MSTensor *> &afterInputs,
const std::vector<mindspore::tensor::MSTensor *> &afterOutputs,
const mindspore::session::CallBackParam &callParam) -> bool {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, afterOutputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, afterOutputs) != RET_OK) {
return false;
}
auto tensor = afterOutputs[0];
const float *tensor_data = static_cast<const float *>(tensor->MutableData());
size_t shape_size = tensor->ElementsNum();
vector<float> data(tensor_data, tensor_data + shape_size);
this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetOutputDivergInfo());
this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetOutputDivergInfo());
return true;
};
status = session_->RunGraph(beforeCallBack, afterCallBack);
@@ -844,14 +844,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() {
[&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs,
const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs,
const mindspore::session::CallBackParam &callParam) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) {
return false;
}
auto tensor = beforeInputs[0];
const float *tensor_data = static_cast<const float *>(tensor->MutableData());
size_t shape_size = tensor->ElementsNum();
vector<float> data(tensor_data, tensor_data + shape_size);
this->calibrator_->UpdateDataFrequency(callParam.name_callback_aram, data, tensor->shape(),
this->calibrator_->UpdateDataFrequency(callParam.name_callback_param, data, tensor->shape(),
this->calibrator_->GetInputDivergInfo());
return true;
};
@@ -860,14 +860,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() {
[&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
const mindspore::session::CallBackParam &call_param) {
if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_aram, after_outputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_param, after_outputs) != RET_OK) {
return false;
}
auto tensor = after_outputs[0];
const float *tenosr_data = static_cast<const float *>(tensor->MutableData());
size_t shape_size = tensor->ElementsNum();
vector<float> data(tenosr_data, tenosr_data + shape_size);
this->calibrator_->UpdateDataFrequency(call_param.name_callback_aram, data, tensor->shape(),
this->calibrator_->UpdateDataFrequency(call_param.name_callback_param, data, tensor->shape(),
this->calibrator_->GetOutputDivergInfo());
return true;
};


+ 18
- 0
mindspore/lite/tools/time_profile/CMakeLists.txt View File

@@ -0,0 +1,18 @@
# add shared link library

set(COMMON_SRC
${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc
)

add_executable(timeprofile
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
${CMAKE_CURRENT_SOURCE_DIR}/time_profile.cc
${COMMON_SRC})

if (PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY})
else()
target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY} pthread)
endif()

+ 19
- 0
mindspore/lite/tools/time_profile/main.cc View File

@@ -0,0 +1,19 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "tools/time_profile/time_profile.h"

int main(int argc, const char **argv) { return mindspore::lite::RunTimeProfile(argc, argv); }

+ 372
- 0
mindspore/lite/tools/time_profile/time_profile.cc View File

@@ -0,0 +1,372 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "tools/time_profile/time_profile.h"
#define __STDC_FORMAT_MACROS
#include <cinttypes>
#undef __STDC_FORMAT_MACROS
#include <cmath>
#include <algorithm>
#include <utility>
#include "include/ms_tensor.h"
#include "utils/log_adapter.h"
#include "include/context.h"

namespace mindspore {
namespace lite {
int TimeProfile::GenerateRandomData(size_t size, void *data) {
MS_ASSERT(data != nullptr);
char *castedData = static_cast<char *>(data);
for (size_t i = 0; i < size; i++) {
castedData[i] = static_cast<char>(i);
}
return RET_OK;
}

int TimeProfile::GenerateInputData() {
for (auto tensor : ms_inputs_) {
MS_ASSERT(tensor != nullptr);
auto input_data = tensor->MutableData();
if (input_data == nullptr) {
MS_LOG(ERROR) << "MallocData for inTensor failed";
}
MS_ASSERT(tensor->GetData() != nullptr);
auto tensor_byte_size = tensor->Size();
auto status = GenerateRandomData(tensor_byte_size, input_data);
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate RandomData for inTensor failed %d" << status;
}
}
return RET_OK;
}

int TimeProfile::ReadInputFile() {
if (ms_inputs_.empty()) {
return RET_OK;
}

auto inTensor = ms_inputs_.at(0);
MS_ASSERT(inTensor != nullptr);

size_t size;
char *bin_buf = ReadFile(_flags->in_data_path_.c_str(), &size);

auto tensor_data_size = inTensor->Size();
if (size != tensor_data_size) {
MS_LOG(ERROR) << "Input binary file size error, required: %zu, in fact: %zu" << tensor_data_size << size;
}
auto input_data = inTensor->MutableData();
memcpy(input_data, bin_buf, tensor_data_size);
return RET_OK;
}

int TimeProfile::LoadInput() {
ms_inputs_ = session_->GetInputs();
if (_flags->in_data_path_.empty()) {
auto status = GenerateInputData();
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate input data error " << status;
}
} else {
auto status = ReadInputFile();
if (status != RET_OK) {
MS_LOG(ERROR) << "ReadInputFile error, " << status;
}
}
return RET_OK;
}

int TimeProfile::InitSession() {
size_t size = 0;
char *graph_buf = ReadFile(_flags->model_path_.c_str(), &size);
if (graph_buf == nullptr) {
MS_LOG(ERROR) << "Load graph failed, path %s" << _flags->model_path_;
}

auto ctx = new lite::Context;
ctx->cpu_bind_mode_ = static_cast<CpuBindMode>(_flags->cpu_bind_mode_);
ctx->device_ctx_.type = lite::DT_CPU;
ctx->thread_num_ = _flags->num_threads_;

session_ = session::LiteSession::CreateSession(ctx);
if (session_ == nullptr) {
MS_LOG(ERROR) << "New session failed while running.";
}

return RET_OK;
}

int TimeProfile::InitCallbackParameter() {
// before callback
before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
const session::CallBackParam &callParam) {
if (before_inputs.empty()) {
MS_LOG(INFO) << "The num of beforeInputs is empty";
}
if (before_outputs.empty()) {
MS_LOG(INFO) << "The num of beforeOutputs is empty";
}
if (op_times_by_type_.find(callParam.type_callback_param) == op_times_by_type_.end()) {
op_times_by_type_.insert(std::make_pair(callParam.type_callback_param, std::make_pair(0, 0.0f)));
}
if (op_times_by_name_.find(callParam.name_callback_param) == op_times_by_name_.end()) {
op_times_by_name_.insert(std::make_pair(callParam.name_callback_param, std::make_pair(0, 0.0f)));
}

op_call_times_total_++;
op_begin_ = GetTimeUs();
return true;
};

// after callback
after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
const session::CallBackParam &call_param) {
uint64_t opEnd = GetTimeUs();

if (after_inputs.empty()) {
MS_LOG(INFO) << "The num of beforeInputs is empty";
}
if (after_outputs.empty()) {
MS_LOG(INFO) << "The num of beforeOutputs is empty";
}

float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f;
op_cost_total_ += cost;
op_times_by_type_[call_param.type_callback_param].first++;
op_times_by_type_[call_param.type_callback_param].second += cost;
op_times_by_name_[call_param.name_callback_param].first++;
op_times_by_name_[call_param.name_callback_param].second += cost;
return true;
};

return RET_OK;
}

int TimeProfile::Init() {
if (this->_flags == nullptr) {
return 1;
}
MS_LOG(INFO) << "ModelPath = " << _flags->model_path_;
MS_LOG(INFO) << "InDataPath = " << _flags->in_data_path_;
MS_LOG(INFO) << "LoopCount = " << _flags->loop_count_;
MS_LOG(INFO) << "NumThreads = " << _flags->num_threads_;
if (_flags->cpu_bind_mode_ == -1) {
MS_LOG(INFO) << "cpuBindMode = MID_CPU";
} else if (_flags->cpu_bind_mode_ == 1) {
MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU";
} else {
MS_LOG(INFO) << "cpuBindMode = NO_BIND";
}

if (_flags->model_path_.empty()) {
MS_LOG(ERROR) << "modelPath is required";
return 1;
}

auto status = InitSession();
if (status != RET_OK) {
MS_LOG(ERROR) << "Init session failed.";
return RET_ERROR;
}

status = this->LoadInput();
if (status != RET_OK) {
MS_LOG(ERROR) << "Load input failed.";
return RET_ERROR;
}

status = InitCallbackParameter();
if (status != RET_OK) {
MS_LOG(ERROR) << "Init callback Parameter failed.";
return RET_ERROR;
}

return RET_OK;
}

int TimeProfile::PrintResult(const std::vector<std::string> &title,
const std::map<std::string, std::pair<int, float>> &result) {
std::vector<size_t> columnLenMax(5);
std::vector<std::vector<std::string>> rows;

for (auto &iter : result) {
char stringBuf[5][100] = {};
std::vector<std::string> columns;
int len;

len = iter.first.size();
if (len > columnLenMax.at(0)) {
columnLenMax.at(0) = len + 4;
}
columns.push_back(iter.first);

len = sprintf_s(stringBuf[1], 100, "%f", iter.second.second / _flags->loop_count_);
if (len > columnLenMax.at(1)) {
columnLenMax.at(1) = len + 4;
}
columns.emplace_back(stringBuf[1]);

len = sprintf_s(stringBuf[2], 100, "%f", iter.second.second / op_cost_total_);
if (len > columnLenMax.at(2)) {
columnLenMax.at(2) = len + 4;
}
columns.emplace_back(stringBuf[2]);

len = sprintf_s(stringBuf[3], 100, "%d", iter.second.first);
if (len > columnLenMax.at(3)) {
columnLenMax.at(3) = len + 4;
}
columns.emplace_back(stringBuf[3]);

len = sprintf_s(stringBuf[4], 100, "%f", iter.second.second);
if (len > columnLenMax.at(4)) {
columnLenMax.at(4) = len + 4;
}
columns.emplace_back(stringBuf[4]);

rows.push_back(columns);
}

printf("-------------------------------------------------------------------------\n");
for (int i = 0; i < 5; i++) {
auto printBuf = title[i];
if (printBuf.size() > columnLenMax.at(i)) {
columnLenMax.at(i) = printBuf.size();
}
printBuf.resize(columnLenMax.at(i), ' ');
printf("%s", printBuf.c_str());
}
printf("\n");
for (int i = 0; i < rows.size(); i++) {
for (int j = 0; j < 5; j++) {
auto printBuf = rows[i][j];
printBuf.resize(columnLenMax.at(j), ' ');
printf("%s\t", printBuf.c_str());
}
printf("\n");
}
return RET_OK;
}

int TimeProfile::RunTimeProfile() {
uint64_t time_avg = 0;

// Load graph
std::string modelName = _flags->model_path_.substr(_flags->model_path_.find_last_of("/") + 1);

MS_LOG(INFO) << "start reading model file";
size_t size = 0;
char *graphBuf = ReadFile(_flags->model_path_.c_str(), &size);
if (graphBuf == nullptr) {
MS_LOG(ERROR) << "Load graph failed while running %s", modelName.c_str();
return 1;
}
auto model = lite::Model::Import(graphBuf, size);

auto ret = session_->CompileGraph(model.get());
if (ret != RET_OK) {
MS_LOG(ERROR) << "Compile graph failed.";
return RET_ERROR;
}

// load input
MS_LOG(INFO) << "start generate input data";
auto status = LoadInput();
if (status != 0) {
MS_LOG(ERROR) << "Generate input data error";
return status;
}

// run graph and test
for (int i = 0; i < _flags->loop_count_; i++) {
session_->BindThread(true);
uint64_t run_begin = GetTimeUs();

ret = session_->RunGraph(before_call_back_, after_call_back_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run graph failed.";
}
auto outputs = session_->GetOutputs();

uint64_t run_end = GetTimeUs();
uint64_t time = run_end - run_begin;
time_avg += time;
session_->BindThread(false);
/*
for(auto &output : outputs) {
for (auto &outputTensor : output.second) {
delete outputTensor;
}
}*/
outputs.clear();
}

time_avg /= _flags->loop_count_;
float runCost = static_cast<float>(time_avg) / 1000.0f;

if (ret != RET_OK) {
MS_LOG(ERROR) << "Run session failed.";
}

const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
PrintResult(per_op_name, op_times_by_name_);
PrintResult(per_op_type, op_times_by_type_);

printf("\n total time: %5.5f ms, kernel cost: %5.5f ms \n\n", runCost, op_cost_total_ / _flags->loop_count_);
printf("-------------------------------------------------------------------------\n");

for (auto &msInput : ms_inputs_) {
delete msInput;
}
ms_inputs_.clear();
delete graphBuf;
return ret;
}

int RunTimeProfile(int argc, const char **argv) {
TimeProfileFlags flags;
Option<std::string> err = flags.ParseFlags(argc, argv);

if (err.IsSome()) {
std::cerr << err.Get() << std::endl;
std::cerr << flags.Usage() << std::endl;
return -1;
}

if (flags.help) {
std::cerr << flags.Usage() << std::endl;
return 0;
}

TimeProfile time_profile(&flags);
auto ret = time_profile.Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init TimeProfile failed.";
}

ret = time_profile.RunTimeProfile();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run TimeProfile failed.";
}

return RET_OK;
}

} // namespace lite
} // namespace mindspore

+ 95
- 0
mindspore/lite/tools/time_profile/time_profile.h View File

@@ -0,0 +1,95 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINNIE_TIMEPROFILE_TIMEPROFILE_H_
#define MINNIE_TIMEPROFILE_TIMEPROFILE_H_

#include <getopt.h>
#include <signal.h>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <utility>

#include "tools/common/flag_parser.h"
#include "src/common/file_utils.h"
#include "src/common/utils.h"
#include "schema/model_generated.h"
#include "include/model.h"
#include "include/lite_session.h"


namespace mindspore {
namespace lite {

class MS_API TimeProfileFlags : public virtual FlagParser {
public:
TimeProfileFlags() {
AddFlag(&TimeProfileFlags::model_path_, "modelPath", "Input model path", "");
AddFlag(&TimeProfileFlags::in_data_path_, "inDataPath", "Input data path, if not set, use random input", "");
AddFlag(&TimeProfileFlags::cpu_bind_mode_, "cpuBindMode",
"Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1);
AddFlag(&TimeProfileFlags::loop_count_, "loopCount", "Run loop count", 10);
AddFlag(&TimeProfileFlags::num_threads_, "numThreads", "Run threads number", 2);
}

~TimeProfileFlags() override = default;

public:
std::string model_path_;
std::string in_data_path_;
int cpu_bind_mode_ = 1;
int loop_count_;
int num_threads_;
};

class MS_API TimeProfile {
public:
explicit TimeProfile(TimeProfileFlags *flags) : _flags(flags) {}
~TimeProfile() = default;

int Init();
int RunTimeProfile();

private:
int GenerateRandomData(size_t size, void *data);
int GenerateInputData();
int LoadInput();
int ReadInputFile();
int InitCallbackParameter();
int InitSession();
int PrintResult(const std::vector<std::string>& title, const std::map<std::string, std::pair<int, float>>& result);

private:
TimeProfileFlags *_flags;
std::vector<mindspore::tensor::MSTensor *> ms_inputs_;
session::LiteSession *session_;

// callback parameters
uint64_t op_begin_ = 0;
int op_call_times_total_ = 0;
float op_cost_total_ = 0.0f;
std::map<std::string, std::pair<int, float>> op_times_by_type_;
std::map<std::string, std::pair<int, float>> op_times_by_name_;

session::KernelCallBack before_call_back_;
session::KernelCallBack after_call_back_;
};

int MS_API RunTimeProfile(int argc, const char **argv);
} // namespace lite
} // namespace mindspore
#endif // MINNIE_TIMEPROFILE_TIMEPROFILE_H_

Loading…
Cancel
Save