From e0a936228692b6345b161c957e4ae8ac54fb2bfb Mon Sep 17 00:00:00 2001 From: Emir Haleva Date: Thu, 29 Apr 2021 07:26:19 +0300 Subject: [PATCH] Support TrainSession::ExportInference --- mindspore/lite/include/train/train_session.h | 10 +- mindspore/lite/schema/ops.fbs | 10 +- mindspore/lite/src/CMakeLists.txt | 1 + mindspore/lite/src/train/train_export.cc | 202 +++++++++++++++ mindspore/lite/src/train/train_export.h | 58 +++++ mindspore/lite/src/train/train_session.cc | 27 +- mindspore/lite/src/train/train_session.h | 1 + mindspore/lite/src/train/train_utils.cc | 16 ++ mindspore/lite/src/train/train_utils.h | 10 + mindspore/lite/test/CMakeLists.txt | 2 + mindspore/lite/test/run_net_train.sh | 23 +- .../lite/tools/benchmark_train/net_train.cc | 232 +++++++++++++++++- .../lite/tools/benchmark_train/net_train.h | 17 +- 13 files changed, 572 insertions(+), 37 deletions(-) create mode 100644 mindspore/lite/src/train/train_export.cc create mode 100644 mindspore/lite/src/train/train_export.h diff --git a/mindspore/lite/include/train/train_session.h b/mindspore/lite/include/train/train_session.h index 2c80b65357..fe0bb65c5d 100644 --- a/mindspore/lite/include/train/train_session.h +++ b/mindspore/lite/include/train/train_session.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_LITE_INCLUDE_TRAIN_SESSION_H_ -#define MINDSPORE_LITE_INCLUDE_TRAIN_SESSION_H_ +#ifndef MINDSPORE_LITE_INCLUDE_TRAIN_TRAIN_SESSION_H_ +#define MINDSPORE_LITE_INCLUDE_TRAIN_TRAIN_SESSION_H_ #include #include #include @@ -115,6 +115,10 @@ class TrainSession : public session::LiteSession { loss_name_ = loss_name; return mindspore::lite::RET_OK; } + /// \brief Save model for inference (LiteSession) + /// \param[in] fb_name pretrained model file name prefix. '.ms' is added as extension. + /// \return STATUS as an error code of the set operation, STATUS is defined in errorcode.h + virtual int ExportInference(std::string fb_name) { return mindspore::lite::RET_ERROR; } protected: bool train_mode_ = false; @@ -125,4 +129,4 @@ class TrainSession : public session::LiteSession { }; } // namespace session } // namespace mindspore -#endif // MINDSPORE_LITE_INCLUDE_TRAIN_SESSION_H_ +#endif // MINDSPORE_LITE_INCLUDE_TRAIN_TRAIN_SESSION_H_ diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index da0bd88273..a0db93b125 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -443,11 +443,6 @@ table Crop { offsets: [long]; } -table CumSum { - exclusive: bool = false; - reverse: bool = false; -} - table CustomExtractFeatures { } @@ -1111,6 +1106,11 @@ table LogSoftmax { table Call { } +table CumSum { + exclusive: bool; + reverse: bool; +} + table Custom { type: string; attr: [Attribute]; diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index 8fec0ab48d..c7fc328ffb 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -123,6 +123,7 @@ if(SUPPORT_TRAIN) ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_metrics.cc ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc + ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc ) if(ENABLE_V0) set(LITE_SRC diff --git a/mindspore/lite/src/train/train_export.cc b/mindspore/lite/src/train/train_export.cc new file mode 100644 index 0000000000..5f7761d705 --- /dev/null +++ b/mindspore/lite/src/train/train_export.cc @@ -0,0 +1,202 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#define _STUB +#include "src/train/train_export.h" +#include +#include +#include +#include +#include +#include +#include "schema/inner/model_generated.h" +#include "src/train/train_utils.h" + +namespace mindspore { +namespace lite { + +std::vector TrainExport::CreateData(const mindspore::lite::Tensor *tensor) { + uint8_t *tensor_data = reinterpret_cast(tensor->data_c()); + auto size = tensor->Size(); + std::vector data(tensor_data, tensor_data + size); + return data; +} + +std::unique_ptr TrainExport::CreateTensor(const mindspore::lite::Tensor *tensor, + schema::Tensor *scTensor) { + auto tensorT = std::make_unique(); + tensorT->nodeType = scTensor->nodeType(); + tensorT->dataType = tensor->data_type(); + tensorT->dims = tensor->shape(); + tensorT->format = tensor->format(); + tensorT->name = tensor->tensor_name(); + tensorT->refCount = 0; + tensorT->offset = 0; + tensorT->enableHuffmanCode = false; + if ((tensorT->nodeType == NodeType_ValueNode) && (scTensor->data() != nullptr) && (scTensor->data()->size() > 0)) { + tensorT->data = CreateData(tensor); + } + for (auto quant_param : tensor->quant_params()) { + auto quantParamT = std::make_unique(); + quantParamT->scale = quant_param.scale; + quantParamT->zeroPoint = quant_param.zeroPoint; + quantParamT->min = 0; + quantParamT->max = 0; + quantParamT->narrowRange = true; + quantParamT->numBits = quant_param.bitNum; + quantParamT->inited = quant_param.inited; + quantParamT->varCorr = quant_param.var_corr; + quantParamT->meanCorr = quant_param.mean_corr; + quantParamT->dstDtype = quant_param.dstDtype; + quantParamT->roundType = quant_param.roundType; + quantParamT->multiplier = quant_param.multiplier; + tensorT->quantParams.emplace_back(std::move(quantParamT)); + } + tensorT->quantClusters = tensor->quant_clusters(); + return tensorT; +} + +mindspore::lite::Model::Node *TrainExport::FindNode(const mindspore::kernel::LiteKernel *kernel) { + auto nodes = model_->all_nodes_; + auto it = std::find_if(nodes.begin(), nodes.end(), + [&kernel](mindspore::lite::Model::Node *n) { return (kernel->name() == n->name_); }); + if (it == nodes.end()) { + return nullptr; + } + return *it; +} + +std::unique_ptr TrainExport::CreateCNode(const mindspore::kernel::LiteKernel *kernel, + std::vector inputIndex, + std::vector outputIndex) { + auto cnodeT = std::make_unique(); + cnodeT->inputIndex = inputIndex; + cnodeT->outputIndex = outputIndex; + cnodeT->name = kernel->name(); + cnodeT->quantType = schema::QuantType_QUANT_NONE; + // find kernel in model + auto *node = FindNode(kernel); + if (node == nullptr) { + MS_LOG(ERROR) << "cannot find kernel " + kernel->name() + " in model"; + return nullptr; + } + auto primitive = reinterpret_cast(const_cast(node->primitive_)); + cnodeT->primitive = std::unique_ptr(primitive->UnPack()); + return cnodeT; +} + +int TrainExport::Export(const std::vector &kernels, + const std::vector &tensors, + const std::vector &output_names) { + std::map remap; + std::vector map_index; + std::set out_set; + int tensor_idx = 0; + auto meta_graph = std::make_unique(); + meta_graph->fmkType = 3; + meta_graph->name = model_->name_; + meta_graph->version = model_->version_; + for (const auto kernel : kernels) { + std::vector in_idx, out_idx; + for (const auto tensor : kernel->in_tensors()) { + size_t id = TSFindTensor(tensors, tensor); + if (id == tensors.size()) { + MS_LOG(ERROR) << "cannot find tensor " + tensor->ToString() + " in model"; + return RET_ERROR; + } + auto it = remap.find(id); + if (it == remap.end()) { + remap[id] = tensor_idx; + in_idx.push_back(tensor_idx); + map_index.push_back(id); + tensor_idx++; + } else { + in_idx.push_back(it->second); + } + } + for (const auto tensor : kernel->out_tensors()) { + size_t id = TSFindTensor(tensors, tensor); + if (id == tensors.size()) { + MS_LOG(ERROR) << "cannot find tensor " + tensor->ToString() + " in model"; + return RET_ERROR; + } + out_set.insert(id); + auto it = remap.find(id); + if (it == remap.end()) { + remap[id] = tensor_idx; + map_index.push_back(id); + out_idx.push_back(tensor_idx); + out_set.insert(tensor_idx); + tensor_idx++; + } else { + out_idx.push_back(it->second); + out_set.insert(it->second); + } + } + auto cnode = CreateCNode(kernel, in_idx, out_idx); + meta_graph->nodes.emplace_back(std::move(cnode)); + } + for (auto id : map_index) { + mindspore::lite::Tensor *tensor = tensors.at(id); + schema::Tensor *scTensor = model_->all_tensors_.at(id); + auto tensorT = CreateTensor(tensor, scTensor); + // find a tensor which is not an output + if (out_set.find(id) == out_set.end()) { + if ((tensorT->nodeType == NodeType_ValueNode) && (tensorT->data.size() == 0)) { + meta_graph->inputIndex.push_back(remap[id]); + } + } + // find output tensor + if (std::find(output_names.begin(), output_names.end(), tensor->tensor_name()) != output_names.end()) { + meta_graph->outputIndex.push_back(remap[id]); + } + meta_graph->allTensors.emplace_back(std::move(tensorT)); + } + auto graph = meta_graph.release(); + int err = SaveToFile(graph, file_name_); + if (err != RET_OK) { + MS_LOG(ERROR) << "failed to save flatbuffer file " << file_name_; + } + delete graph; + return err; +} + +int TrainExport::SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath) { + flatbuffers::FlatBufferBuilder builder(1024); + auto offset = schema::MetaGraph::Pack(builder, graph); + builder.Finish(offset); + schema::FinishMetaGraphBuffer(builder, offset); + int size = builder.GetSize(); + auto content = builder.GetBufferPointer(); + if (content == nullptr) { + MS_LOG(ERROR) << "GetBufferPointer nullptr"; + return RET_ERROR; + } + if (access((outputPath + ".ms").c_str(), F_OK) == 0) { + chmod((outputPath + ".ms").c_str(), S_IWUSR); + } + std::ofstream output(outputPath + ".ms", std::ofstream::binary); + if (!output.is_open()) { + MS_LOG(ERROR) << "Can not open output file: " << outputPath << ".ms"; + return RET_ERROR; + } + output.write((const char *)content, size); + output.close(); + chmod((outputPath + ".ms").c_str(), S_IRUSR); + return RET_OK; +} + +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/train/train_export.h b/mindspore/lite/src/train/train_export.h new file mode 100644 index 0000000000..41cac80237 --- /dev/null +++ b/mindspore/lite/src/train/train_export.h @@ -0,0 +1,58 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_TRAIN_TRAIN_EXPORT_H_ +#define MINDSPORE_LITE_SRC_TRAIN_TRAIN_EXPORT_H_ +#include +#include +#include +#include "schema/inner/model_generated.h" +#include "src/lite_kernel.h" +#include "src/lite_model.h" + +namespace mindspore { +#ifndef _STUB +namespace schema { +struct CNodeT; +struct TensorT; +struct MetaGraphT; +} // namespace schema +#endif +namespace lite { + +class TrainExport { + public: + TrainExport(const std::string file_name, const mindspore::lite::Model *model) + : model_(model), file_name_(file_name) {} + virtual ~TrainExport() {} + int Export(const std::vector &kernels, + const std::vector &tensors, const std::vector &output_names); + + protected: + virtual std::vector CreateData(const mindspore::lite::Tensor *tensor); + + private: + const Model *model_; + std::string file_name_; + mindspore::lite::Model::Node *FindNode(const mindspore::kernel::LiteKernel *kernel); + std::unique_ptr CreateTensor(const mindspore::lite::Tensor *tensor, schema::Tensor *scTensor); + std::unique_ptr CreateCNode(const mindspore::kernel::LiteKernel *kernel, + std::vector inputIndex, std::vector outputIndex); + int SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath); +}; +}; // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_SRC_TRAIN_TRAIN_EXPORT_H_ diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc index f04d17cbcb..41b4736a65 100644 --- a/mindspore/lite/src/train/train_session.cc +++ b/mindspore/lite/src/train/train_session.cc @@ -37,25 +37,12 @@ #include "src/runtime/kernel/arm/fp32_grad/convolution.h" #include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h" #include "src/common/tensor_util.h" +#include "src/train/train_utils.h" +#include "src/train/train_export.h" namespace mindspore { namespace lite { -static size_t TSFindTensor(const std::vector &where, const lite::Tensor *searchParameter) { - for (size_t i = 0; i < where.size(); i++) { - if (where[i] == searchParameter) { - return i; - } - } - return where.size(); -} - -static kernel::LiteKernel *TSFindKernel(const std::vector &where, - const std::string &searchParameter) { - auto it = std::find_if(where.begin(), where.end(), - [&searchParameter](const kernel::LiteKernel *k) { return (k->name() == searchParameter); }); - return *it; -} TrainSession::TrainSession() { is_train_session_ = true; #ifdef ENABLE_V0 @@ -476,6 +463,16 @@ int TrainSession::SetLossName(std::string loss_name) { } return RET_OK; } + +int TrainSession::ExportInference(std::string file_name) { + bool orig_train_state = IsTrain(); + Eval(); + TrainExport texport(file_name, model_); + int status = texport.Export(inference_kernels_, tensors_, GetOutputTensorNames()); + if (orig_train_state) Train(); + return status; +} + } // namespace lite session::TrainSession *session::TrainSession::CreateSession(mindspore::lite::Model *model, lite::Context *context, diff --git a/mindspore/lite/src/train/train_session.h b/mindspore/lite/src/train/train_session.h index cee89e1ff2..dfeac194db 100644 --- a/mindspore/lite/src/train/train_session.h +++ b/mindspore/lite/src/train/train_session.h @@ -87,6 +87,7 @@ class TrainSession : virtual public session::TrainSession, virtual public lite:: } return outputs; } + int ExportInference(std::string file_name) override; protected: void AllocWorkSpace(); diff --git a/mindspore/lite/src/train/train_utils.cc b/mindspore/lite/src/train/train_utils.cc index bf3ac8af84..118207d78a 100644 --- a/mindspore/lite/src/train/train_utils.cc +++ b/mindspore/lite/src/train/train_utils.cc @@ -19,10 +19,26 @@ #include "include/errorcode.h" #include "include/ms_tensor.h" #include "src/common/utils.h" +#include "src/lite_kernel.h" namespace mindspore { namespace lite { +size_t TSFindTensor(const std::vector &where, const lite::Tensor *searchParameter) { + for (size_t i = 0; i < where.size(); i++) { + if (where[i] == searchParameter) { + return i; + } + } + return where.size(); +} + +kernel::LiteKernel *TSFindKernel(const std::vector &where, const std::string &searchParameter) { + auto it = std::find_if(where.begin(), where.end(), + [&searchParameter](const kernel::LiteKernel *k) { return (k->name() == searchParameter); }); + return *it; +} + float CalculateSparseClassification(tensor::MSTensor *input, tensor::MSTensor *output) { if ((input->shape().size() != 1) || (input->data_type() != kNumberTypeInt32) || (output->shape().size() != 2)) { MS_LOG(WARNING) << "SparceClassification got a " << input->shape() << "-D input tensor, " << output->shape() diff --git a/mindspore/lite/src/train/train_utils.h b/mindspore/lite/src/train/train_utils.h index cba2f57da3..6be9bc704b 100644 --- a/mindspore/lite/src/train/train_utils.h +++ b/mindspore/lite/src/train/train_utils.h @@ -16,11 +16,21 @@ #ifndef MINDSPORE_LITE_SRC_TRAIN_TRAIN_UTILS_H_ #define MINDSPORE_LITE_SRC_TRAIN_TRAIN_UTILS_H_ +#include +#include #include "include/ms_tensor.h" +#include "src/tensor.h" namespace mindspore { +namespace kernel { +class LiteKernel; +} + namespace lite { +kernel::LiteKernel *TSFindKernel(const std::vector &where, const std::string &searchParameter); +size_t TSFindTensor(const std::vector &where, const lite::Tensor *searchParameter); + float CalculateSparseClassification(tensor::MSTensor *input, tensor::MSTensor *output); float CalculateOneHotClassification(tensor::MSTensor *input, tensor::MSTensor *output); diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index a3a755fa87..2c06e76b50 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -292,6 +292,8 @@ if(SUPPORT_TRAIN) ${LITE_DIR}/src/train/train_populate_parameter.cc ${LITE_DIR}/src/train/train_populate_parameter_v0.cc ${LITE_DIR}/src/train/train_session.cc + ${LITE_DIR}/src/train/train_export.cc + ${LITE_DIR}/src/train/train_utils.cc ${LITE_DIR}/src/train/transfer_session.cc ${LITE_DIR}/src/lite_session.cc ) diff --git a/mindspore/lite/test/run_net_train.sh b/mindspore/lite/test/run_net_train.sh index 3824cf0bf2..c259b5ea38 100755 --- a/mindspore/lite/test/run_net_train.sh +++ b/mindspore/lite/test/run_net_train.sh @@ -89,13 +89,15 @@ function Run_x86() { model_name=${line_array[0]}'_train_quant' accuracy_limit=${line_array[2]} fi - + if [[ "${save_lite}" == "1" ]]; then + inference_file="${ms_models_path}/${model_name}_infer" + fi echo ${model_name} >> "${run_x86_log_file}" ${run_valgrind}./tools/benchmark_train/benchmark_train \ --modelFile=${ms_models_path}/${model_name}.ms \ --inDataFile=${train_io_path}/${model_prefix}_input1.bin,${train_io_path}/${model_prefix}_input2.bin \ --expectedDataFile=${train_io_path}/${model_prefix}_output --epochs=${epoch_num} --numThreads=${threads} \ - --accuracyThreshold=${accuracy_limit} >> "${run_x86_log_file}" + --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} >> "${run_x86_log_file}" if [ $? = 0 ]; then run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file} else @@ -138,8 +140,8 @@ function Run_arm() { # If build with minddata, copy the minddata related libs cd ${benchmark_train_test_path} || exit 1 if [ -f ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/lib/libminddata-lite.so ]; then - cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libjpeg.so ${benchmark_train_test_path}/libjpeg.so || exit 1 - cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libturbojpeg.so ${benchmark_train_test_path}/libturbojpeg.so || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libjpeg.so* ${benchmark_train_test_path}/ || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/third_party/libjpeg-turbo/lib/libturbojpeg.so* ${benchmark_train_test_path}/ || exit 1 cp -a ${arm_path}/mindspore-lite-${version_arm}-train-android-${process_unit}/train/lib/libminddata-lite.so ${benchmark_train_test_path}/libminddata-lite.so || exit 1 fi if [ "$1" == arm64 ]; then @@ -178,8 +180,9 @@ function Run_arm() { run_result=$1': '${model_name}' irrelevant'; echo ${run_result} >> ${run_benchmark_train_result_file} continue fi - - + if [[ "${save_lite}" == "1" ]]; then + inference_file="${ms_models_path}/${model_name}_infer" + fi # run benchmark_train test without clib data echo ${model_name} >> "${run_arm_log_file}" adb -s ${device_id} push ${train_io_path}/${model_prefix}_input*.bin ${train_io_path}/${model_prefix}_output*.bin /data/local/tmp/benchmark_train_test >> ${adb_push_log_file} @@ -198,7 +201,7 @@ function Run_arm() { --modelFile=${model_name}.ms \ --inDataFile=${tmp_dir}/${model_prefix}_input1.bin,${tmp_dir}/${model_prefix}_input2.bin \ --expectedDataFile=${tmp_dir}/${model_prefix}_output \ - --numThreads=${threads} --accuracyThreshold=${accuracy_limit} + --numThreads=${threads} --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} ENDM ) echo "${adb_cmd}" >> ${run_arm_log_file} @@ -249,7 +252,7 @@ models_mindspore_train_config=${basepath}/models_ms_train.cfg epoch_num=1 threads=2 train_io_path="" -while getopts "r:M:c:m:d:i:e:vt:q:D" opt; do +while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do case ${opt} in r) release_path=${OPTARG} @@ -291,6 +294,8 @@ while getopts "r:M:c:m:d:i:e:vt:q:D" opt; do t) epoch_num=${OPTARG} echo "train epoch num is ${epoch_num}" + ;; + F) save_lite=1 ;; ?) echo "unknown para" @@ -342,7 +347,7 @@ if [[ $enable_export == 1 ]]; then Run_Export Print_Result ${export_result_file} -fi +fi # Write converter result to temp file run_converter_log_file=${logs_path}/run_converter_log.txt diff --git a/mindspore/lite/tools/benchmark_train/net_train.cc b/mindspore/lite/tools/benchmark_train/net_train.cc index 8fcaa0029b..2271defc52 100644 --- a/mindspore/lite/tools/benchmark_train/net_train.cc +++ b/mindspore/lite/tools/benchmark_train/net_train.cc @@ -20,6 +20,9 @@ #undef __STDC_FORMAT_MACROS #include #include +#ifdef ENABLE_NEON +#include +#endif #include "src/common/common.h" #include "include/ms_tensor.h" #include "include/context.h" @@ -178,6 +181,88 @@ int NetTrain::CompareOutput() { MS_LOG(ERROR) << "ReadFile return nullptr"; return RET_ERROR; } + + if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) { + if (static_cast(size / sizeof(float)) != tensor->ElementsNum()) { + MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size() + << ", read size: " << size / sizeof(float); + return RET_ERROR; + } + } else { + if (size != tensor->Size()) { + MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size() + << ", read size: " << size; + return RET_ERROR; + } + } + float bias = 0.f; + if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) { +#ifdef ENABLE_FP16 + bias = CompareData(bin_buf, tensor->ElementsNum(), reinterpret_cast(outputs)); +#endif + } else { + bias = CompareData(bin_buf, tensor->ElementsNum(), reinterpret_cast(outputs)); + } + if (bias >= 0) { + total_bias += bias; + total_size++; + } else { + has_error = true; + break; + } + i++; + delete[] bin_buf; + } + + if (!has_error) { + float mean_bias; + if (total_size != 0) { + mean_bias = total_bias / total_size * 100; + } else { + mean_bias = 0; + } + + std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" + << " threshold is:" << this->flags_->accuracy_threshold_ << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + + if (mean_bias > this->flags_->accuracy_threshold_) { + MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; + std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; + return RET_ERROR; + } else { + return RET_OK; + } + } else { + MS_LOG(ERROR) << "Error in CompareData"; + std::cerr << "Error in CompareData" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + return RET_ERROR; + } +} +int NetTrain::CompareOutputLite(const std::unique_ptr &lite_session) { + std::cout << "================ Comparing Forward Output data ================" << std::endl; + float total_bias = 0; + int total_size = 0; + bool has_error = false; + auto tensors_list = lite_session->GetOutputs(); + if (tensors_list.empty()) { + MS_LOG(ERROR) << "Cannot find output tensors, get model output failed"; + return RET_ERROR; + } + mindspore::tensor::MSTensor *tensor = nullptr; + int i = 1; + for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) { + tensor = lite_session->GetOutputByTensorName(it->first); + std::cout << "output is tensor " << it->first << "\n"; + auto outputs = tensor->MutableData(); + size_t size; + std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin"; + auto *bin_buf = ReadFileBuf(output_file.c_str(), &size); + if (bin_buf == nullptr) { + MS_LOG(ERROR) << "ReadFile return nullptr"; + return RET_ERROR; + } if (size != tensor->Size()) { MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size() << ", read size: " << size; @@ -288,7 +373,7 @@ int NetTrain::MarkAccuracy() { } session_->Eval(); - auto status = session_->RunGraph(); + auto status = session_->RunGraph(before_call_back_, after_call_back_); if (status != RET_OK) { MS_LOG(ERROR) << "Inference error " << status; std::cerr << "Inference error " << status << std::endl; @@ -303,6 +388,40 @@ int NetTrain::MarkAccuracy() { } return RET_OK; } +int NetTrain::MarkAccuracyLite(const std::unique_ptr &lite_session) { + MS_LOG(INFO) << "MarkAccuracy"; + std::cout << "MarkAccuracy" << std::endl; + for (auto &msInput : ms_inputs_) { + switch (msInput->data_type()) { + case TypeId::kNumberTypeFloat: + PrintInputData(msInput); + break; + case TypeId::kNumberTypeFloat32: + PrintInputData(msInput); + break; + case TypeId::kNumberTypeInt32: + PrintInputData(msInput); + break; + default: + MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported."; + return RET_ERROR; + } + } + auto status = lite_session->RunGraph(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Inference error " << status; + std::cerr << "Inference error " << status << std::endl; + return status; + } + + status = CompareOutputLite(lite_session); + if (status != RET_OK) { + MS_LOG(ERROR) << "Compare output error " << status; + std::cerr << "Compare output error " << status << std::endl; + return status; + } + return RET_OK; +} int NetTrain::RunExportedNet() { auto start_prepare_time = GetTimeUs(); @@ -375,6 +494,80 @@ int NetTrain::RunExportedNet() { return RET_OK; } +int NetTrain::RunExportedNetLite(std::string file_name) { + auto start_prepare_time = GetTimeUs(); + // Load graph + std::string model_name = file_name.substr(file_name.find_last_of(DELIM_SLASH) + 1); + + MS_LOG(INFO) << "start reading exported model file"; + std::cout << "reading " << file_name << std::endl; + auto context = std::make_shared(); + if (context == nullptr) { + MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); + std::cerr << "New context failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + + if (flags_->cpu_bind_mode_ == 2) { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU; + } else if (flags_->cpu_bind_mode_ == 1) { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU; + } else { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND; + } + + context->thread_num_ = flags_->num_threads_; + + auto *model = mindspore::lite::Model::Import(file_name.c_str()); + if (model == nullptr) { + MS_LOG(ERROR) << "create model for lite session failed"; + return RET_ERROR; + } + auto lite_session = std::unique_ptr(session::LiteSession::CreateSession(context.get())); + if (lite_session == nullptr) { + MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str(); + std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + if (lite_session->CompileGraph(model) != RET_OK) { + MS_LOG(ERROR) << "Cannot compile model"; + delete model; + return RET_ERROR; + } + ms_inputs_ = lite_session->GetInputs(); + auto end_prepare_time = GetTimeUs(); + MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; + std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; + + // Load input + MS_LOG(INFO) << "start generate input data"; + auto status = LoadInput(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Generate input data error"; + delete model; + return status; + } + if (!flags_->data_file_.empty()) { + MS_LOG(INFO) << "Check accuracy for exported model"; + std::cout << "Check accuracy for exported model " << std::endl; + status = MarkAccuracyLite(lite_session); + for (auto &data : data_) { + data.second->shape.clear(); + data.second->data.clear(); + delete data.second; + } + data_.clear(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status; + std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl; + delete model; + return status; + } + } + delete model; + return RET_OK; +} + int NetTrain::RunNetTrain() { auto start_prepare_time = GetTimeUs(); // Load graph @@ -451,6 +644,17 @@ int NetTrain::RunNetTrain() { return status; } } + status = CheckExecute(model); + if (status != RET_OK) { + MS_LOG(ERROR) << "Run CheckExecute error: " << status; + std::cout << "Run CheckExecute error: " << status << std::endl; + return status; + } + return RET_OK; +} + +int NetTrain::CheckExecute(mindspore::lite::Model *model) { + int status; if (!flags_->export_file_.empty()) { auto ret = Model::Export(model, flags_->export_file_.c_str()); if (ret != RET_OK) { @@ -459,12 +663,33 @@ int NetTrain::RunNetTrain() { return RET_ERROR; } delete session_; + session_ = nullptr; status = RunExportedNet(); if (status != RET_OK) { MS_LOG(ERROR) << "Run Exported model error: " << status; std::cout << "Run Exported model error: " << status << std::endl; return status; } + } else { + if (!flags_->inference_file_.empty()) { + auto tick = GetTimeUs(); + status = session_->ExportInference(flags_->inference_file_); + if (status != RET_OK) { + MS_LOG(ERROR) << "Save model error: " << status; + std::cout << "Save model error: " << status << std::endl; + return status; + } + std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n"; + delete session_; + session_ = nullptr; + + status = RunExportedNetLite(flags_->inference_file_ + ".ms"); + if (status != RET_OK) { + MS_LOG(ERROR) << "Running saved model error: " << status; + std::cout << "Running saved model error: " << status << std::endl; + return status; + } + } } return RET_OK; } @@ -554,6 +779,11 @@ int NetTrain::InitCallbackParameter() { case kNumberTypeInt32: std::cout << TensorSum(output, tensor_size); break; +#ifdef ENABLE_FP16 + case kNumberTypeFloat16: + std::cout << TensorSum(output, tensor_size); + break; +#endif default: std::cout << "unsupported type:" << type; break; diff --git a/mindspore/lite/tools/benchmark_train/net_train.h b/mindspore/lite/tools/benchmark_train/net_train.h index 0d2839825e..252f3d31be 100644 --- a/mindspore/lite/tools/benchmark_train/net_train.h +++ b/mindspore/lite/tools/benchmark_train/net_train.h @@ -30,6 +30,7 @@ #include #include #include + #include "tools/common/flag_parser.h" #include "src/common/file_utils.h" #include "src/common/utils.h" @@ -51,14 +52,15 @@ struct MS_API CheckTensor { }; template -T TensorSum(void *data, int size) { +float TensorSum(void *data, int size) { T *typed_data = reinterpret_cast(data); - T sum = static_cast(0); + float sum = 0.f; for (int i = 0; i < size; i++) { - sum += typed_data[i]; + sum += static_cast(typed_data[i]); } return sum; } + class MS_API NetTrainFlags : public virtual FlagParser { public: NetTrainFlags() { @@ -77,6 +79,7 @@ class MS_API NetTrainFlags : public virtual FlagParser { AddFlag(&NetTrainFlags::layer_checksum_, "layerCheckSum", "layer output checksum print (debug)", false); AddFlag(&NetTrainFlags::enable_fp16_, "enableFp16", "Enable float16", false); AddFlag(&NetTrainFlags::loss_name_, "lossName", "loss layer name", ""); + AddFlag(&NetTrainFlags::inference_file_, "inferenceFile", "MS file to export inference model", ""); } ~NetTrainFlags() override = default; @@ -109,6 +112,7 @@ class MS_API NetTrainFlags : public virtual FlagParser { bool layer_checksum_ = false; std::vector> resize_dims_; std::string loss_name_ = ""; + std::string inference_file_ = ""; }; class MS_API NetTrain { @@ -166,6 +170,7 @@ class MS_API NetTrain { for (int j = 0; j < std::min(50, size); j++) { std::cout << refOutput[j] << " "; } + std::cout << std::endl; for (int j = 0; j < size; j++) { if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) { std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; @@ -174,7 +179,7 @@ class MS_API NetTrain { } auto tolerance = absoluteTolerance + relativeTolerance * fabs(refOutput[j]); - auto absoluteError = std::fabs(msTensorData[j] - refOutput[j]); + auto absoluteError = std::fabs(static_cast(msTensorData[j]) - refOutput[j]); if (absoluteError > tolerance) { if (fabs(refOutput[j]) == 0) { if (absoluteError > 1e-5) { @@ -208,6 +213,10 @@ class MS_API NetTrain { int MarkAccuracy(); private: + int RunExportedNetLite(std::string file_name); + int MarkAccuracyLite(const std::unique_ptr &lite_session); + int CompareOutputLite(const std::unique_ptr &lite_session); + int CheckExecute(mindspore::lite::Model *model); NetTrainFlags *flags_; session::TrainSession *session_ = nullptr; std::vector ms_inputs_;