| @@ -228,7 +228,6 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl) | |||||
| if (NOT WIN32) | if (NOT WIN32) | ||||
| if (ENABLE_TOOLS) | if (ENABLE_TOOLS) | ||||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) | ||||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profiler) | |||||
| endif() | endif() | ||||
| if (BUILD_TESTCASES) | if (BUILD_TESTCASES) | ||||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test) | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test) | ||||
| @@ -20,18 +20,14 @@ | |||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include "include/ms_tensor.h" | #include "include/ms_tensor.h" | ||||
| #include "include/lite_utils.h" | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| /// \brief Allocator defined a memory pool for malloc memory and free memory dynamically. | |||||
| /// | |||||
| /// \note List public class and interface for reference. | |||||
| class Allocator; | |||||
| /// \brief CpuBindMode defined for holding bind cpu strategy argument. | /// \brief CpuBindMode defined for holding bind cpu strategy argument. | ||||
| typedef enum { | typedef enum { | ||||
| MID_CPU = -1, /**< bind middle cpu first */ | |||||
| NO_BIND = 0, /**< no bind */ | |||||
| HIGHER_CPU = 1, /**< bind higher cpu first */ | HIGHER_CPU = 1, /**< bind higher cpu first */ | ||||
| NO_BIND = 0 /**< no bind */ | |||||
| MID_CPU = 2 /**< bind middle cpu first */ | |||||
| } CpuBindMode; | } CpuBindMode; | ||||
| /// \brief DeviceType defined for holding user's preferred backend. | /// \brief DeviceType defined for holding user's preferred backend. | ||||
| @@ -43,10 +39,10 @@ typedef enum { | |||||
| /// \brief Context defined for holding environment variables during runtime. | /// \brief Context defined for holding environment variables during runtime. | ||||
| struct Context { | struct Context { | ||||
| bool float16_priority = false; /**< prior enable float16 inference */ | |||||
| bool enable_float16_ = false; /**< prior enable float16 inference */ | |||||
| DeviceType device_type_ = DT_CPU; | DeviceType device_type_ = DT_CPU; | ||||
| int thread_num_ = 2; /**< thread number config for thread pool */ | int thread_num_ = 2; /**< thread number config for thread pool */ | ||||
| std::shared_ptr<Allocator> allocator = nullptr; | |||||
| AllocatorPtr allocator = nullptr; | |||||
| CpuBindMode cpu_bind_mode_ = MID_CPU; | CpuBindMode cpu_bind_mode_ = MID_CPU; | ||||
| }; | }; | ||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| @@ -25,7 +25,7 @@ using STATUS = int; | |||||
| /* Success */ | /* Success */ | ||||
| constexpr int RET_OK = 0; /**< No error occurs. */ | constexpr int RET_OK = 0; /**< No error occurs. */ | ||||
| /* Common error code, range: [-1, -100]*/ | |||||
| /* Common error code, range: [-1, -100)*/ | |||||
| constexpr int RET_ERROR = -1; /**< Common error code. */ | constexpr int RET_ERROR = -1; /**< Common error code. */ | ||||
| constexpr int RET_NULL_PTR = -2; /**< NULL pointer returned.*/ | constexpr int RET_NULL_PTR = -2; /**< NULL pointer returned.*/ | ||||
| constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/ | constexpr int RET_PARAM_INVALID = -3; /**< Invalid parameter.*/ | ||||
| @@ -34,30 +34,29 @@ constexpr int RET_SUCCESS_EXIT = -5; /**< No error but exit. */ | |||||
| constexpr int RET_MEMORY_FAILED = -6; /**< Fail to create memory. */ | constexpr int RET_MEMORY_FAILED = -6; /**< Fail to create memory. */ | ||||
| constexpr int RET_NOT_SUPPORT = -7; /**< Fail to support. */ | constexpr int RET_NOT_SUPPORT = -7; /**< Fail to support. */ | ||||
| /* Executor error code, range: [-101,-200] */ | |||||
| constexpr int RET_OUT_OF_TENSOR_RANGE = -101; /**< Failed to check range. */ | |||||
| constexpr int RET_INPUT_TENSOR_ERROR = -102; /**< Failed to check input tensor. */ | |||||
| constexpr int RET_REENTRANT_ERROR = -103; /**< Exist executor running. */ | |||||
| /* Executor error code, range: [-100,-200) */ | |||||
| constexpr int RET_OUT_OF_TENSOR_RANGE = -100; /**< Failed to check range. */ | |||||
| constexpr int RET_INPUT_TENSOR_ERROR = -101; /**< Failed to check input tensor. */ | |||||
| constexpr int RET_REENTRANT_ERROR = -102; /**< Exist executor running. */ | |||||
| /* Graph error code, range: [-201,-300] */ | |||||
| constexpr int RET_GRAPH_FILE_ERR = -201; /**< Failed to verify graph file. */ | |||||
| /* Graph error code, range: [-200,-300) */ | |||||
| constexpr int RET_GRAPH_FILE_ERR = -200; /**< Failed to verify graph file. */ | |||||
| /* Node error code, range: [-301,-400] */ | |||||
| constexpr int RET_NOT_FIND_OP = -301; /**< Failed to find operator. */ | |||||
| constexpr int RET_INVALID_OP_NAME = -302; /**< Invalid operator name. */ | |||||
| constexpr int RET_INVALID_OP_ATTR = -303; /**< Invalid operator attr. */ | |||||
| constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution operator. */ | |||||
| /* Node error code, range: [-300,-400) */ | |||||
| constexpr int RET_NOT_FIND_OP = -300; /**< Failed to find operator. */ | |||||
| constexpr int RET_INVALID_OP_NAME = -301; /**< Invalid operator name. */ | |||||
| constexpr int RET_INVALID_OP_ATTR = -302; /**< Invalid operator attr. */ | |||||
| constexpr int RET_OP_EXECUTE_FAILURE = -303; /**< Failed to execution operator. */ | |||||
| /* Tensor error code, range: [-401,-500] */ | |||||
| constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */ | |||||
| /* Tensor error code, range: [-400,-500) */ | |||||
| constexpr int RET_FORMAT_ERR = -400; /**< Failed to checking tensor format. */ | |||||
| /* InferShape error code, range: [-501,-600] */ | |||||
| constexpr int RET_INFER_ERR = -501; /**< Failed to infer shape. */ | |||||
| constexpr int RET_INFER_INVALID = -502; /**< Invalid infer shape before runtime. */ | |||||
| /* InferShape error code, range: [-500,-600) */ | |||||
| constexpr int RET_INFER_ERR = -500; /**< Failed to infer shape. */ | |||||
| constexpr int RET_INFER_INVALID = -501; /**< Invalid infer shape before runtime. */ | |||||
| /* User input param error code, range: [-601, 700]*/ | |||||
| constexpr int RET_INPUT_PARAM_INVALID = -601; /**< Invalid input param by user. */ | |||||
| constexpr int RET_INPUT_PARAM_LACK = -602; /**< LACK input param by user. */ | |||||
| /* User input param error code, range: [-600, 700)*/ | |||||
| constexpr int RET_INPUT_PARAM_INVALID = -600; /**< Invalid input param by user. */ | |||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -29,8 +29,8 @@ namespace mindspore { | |||||
| namespace session { | namespace session { | ||||
| /// \brief CallBackParam defined input arguments for callBack function. | /// \brief CallBackParam defined input arguments for callBack function. | ||||
| struct CallBackParam { | struct CallBackParam { | ||||
| std::string name_callback_param; /**< node name argument */ | |||||
| std::string type_callback_param; /**< node type argument */ | |||||
| std::string node_name; /**< node name argument */ | |||||
| std::string node_type; /**< node type argument */ | |||||
| }; | }; | ||||
| /// \brief KernelCallBack defined the function pointer for callBack. | /// \brief KernelCallBack defined the function pointer for callBack. | ||||
| @@ -69,12 +69,12 @@ class MS_API LiteSession { | |||||
| /// \return The vector of MindSpore Lite MSTensor. | /// \return The vector of MindSpore Lite MSTensor. | ||||
| virtual std::vector<tensor::MSTensor *> GetInputs() const = 0; | virtual std::vector<tensor::MSTensor *> GetInputs() const = 0; | ||||
| /// \brief Get input MindSpore Lite MSTensors of model by node name. | |||||
| /// \brief Get input MindSpore Lite MSTensors of model by tensor name. | |||||
| /// | /// | ||||
| /// \param[in] node_name Define node name. | |||||
| /// \param[in] node_name Define tensor name. | |||||
| /// | /// | ||||
| /// \return The vector of MindSpore Lite MSTensor. | /// \return The vector of MindSpore Lite MSTensor. | ||||
| virtual std::vector<tensor::MSTensor *> GetInputsByName(const std::string &node_name) const = 0; | |||||
| virtual mindspore::tensor::MSTensor *GetInputsByTensorName(const std::string &tensor_name) const = 0; | |||||
| /// \brief Run session with callback. | /// \brief Run session with callback. | ||||
| /// | /// | ||||
| @@ -90,8 +90,9 @@ class MS_API LiteSession { | |||||
| /// | /// | ||||
| /// \param[in] node_name Define node name. | /// \param[in] node_name Define node name. | ||||
| /// | /// | ||||
| /// \note Deprecated, replace with GetOutputByTensorName | |||||
| /// | |||||
| /// \return The vector of MindSpore Lite MSTensor. | /// \return The vector of MindSpore Lite MSTensor. | ||||
| /// deprecated, replace with GetOutputByTensorName | |||||
| virtual std::vector<tensor::MSTensor *> GetOutputsByNodeName(const std::string &node_name) const = 0; | virtual std::vector<tensor::MSTensor *> GetOutputsByNodeName(const std::string &node_name) const = 0; | ||||
| /// \brief Get output MindSpore Lite MSTensors of model mapped by tensor name. | /// \brief Get output MindSpore Lite MSTensors of model mapped by tensor name. | ||||
| @@ -117,7 +118,7 @@ class MS_API LiteSession { | |||||
| /// \param[in] dims Define the inputs new shape. | /// \param[in] dims Define the inputs new shape. | ||||
| /// | /// | ||||
| /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h. | /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h. | ||||
| virtual int Resize(const std::vector<tensor::MSTensor *> &inputs, const std::vector<std::vector<int>>& dims) = 0; | |||||
| virtual int Resize(const std::vector<tensor::MSTensor *> &inputs, const std::vector<std::vector<int>> &dims) = 0; | |||||
| }; | }; | ||||
| } // namespace session | } // namespace session | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -18,12 +18,19 @@ | |||||
| #define MINDSPORE_LITE_INCLUDE_LITE_UTILS_H_ | #define MINDSPORE_LITE_INCLUDE_LITE_UTILS_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include <memory> | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| /// \brief Allocator defined a memory pool for malloc memory and free memory dynamically. | |||||
| /// | |||||
| /// \note List public class and interface for reference. | |||||
| class Allocator; | |||||
| using TensorPtrVector = std::vector<mindspore::schema::Tensor *>; | using TensorPtrVector = std::vector<mindspore::schema::Tensor *>; | ||||
| using Uint32Vector = std::vector<uint32_t>; | using Uint32Vector = std::vector<uint32_t>; | ||||
| using String = std::string; | using String = std::string; | ||||
| using NodeType = schema::NodeType; | using NodeType = schema::NodeType; | ||||
| using AllocatorPtr = std::shared_ptr<Allocator>; | |||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| #endif // MINDSPORE_LITE_INCLUDE_LITE_UTILS_H_ | #endif // MINDSPORE_LITE_INCLUDE_LITE_UTILS_H_ | ||||
| @@ -18,11 +18,11 @@ | |||||
| #define MINDSPORE_LITE_SRC_LITE_KERNEL_H_ | #define MINDSPORE_LITE_SRC_LITE_KERNEL_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include "src/ops/primitive_c.h" | |||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #ifdef ENABLE_ARM | #ifdef ENABLE_ARM | ||||
| #include <arm_neon.h> | #include <arm_neon.h> | ||||
| #endif | #endif | ||||
| #include "src/ops/primitive_c.h" | |||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| #include "src/inner_context.h" | #include "src/inner_context.h" | ||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| @@ -176,7 +176,8 @@ void LiteSession::InitGraphInputMap(const lite::Model *model) { | |||||
| MS_LOG(ERROR) << "in_tensor is null!"; | MS_LOG(ERROR) << "in_tensor is null!"; | ||||
| return; | return; | ||||
| } | } | ||||
| this->input_map_[in_node->name_].emplace_back(in_tensor); | |||||
| auto tensor_name = in_node->name_ + std::to_string(i); | |||||
| this->input_map_[tensor_name] = in_tensor; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -315,6 +316,19 @@ int LiteSession::Init(Context *context) { | |||||
| } | } | ||||
| MS_ASSERT(nullptr != context); | MS_ASSERT(nullptr != context); | ||||
| if (context->device_type_ == DT_NPU) { | |||||
| MS_LOG(ERROR) << "NPU is not supported."; | |||||
| is_running_.store(false); | |||||
| return RET_NOT_SUPPORT; | |||||
| } | |||||
| #ifndef SUPPORT_GPU | |||||
| if (context->device_type_ == DT_GPU) { | |||||
| MS_LOG(ERROR) << "GPU is not supported."; | |||||
| is_running_.store(false); | |||||
| return RET_NOT_SUPPORT; | |||||
| } | |||||
| #endif | |||||
| this->context_ = new (std::nothrow) InnerContext(); | this->context_ = new (std::nothrow) InnerContext(); | ||||
| if (this->context_ == nullptr) { | if (this->context_ == nullptr) { | ||||
| MS_LOG(ERROR) << "New Context failed"; | MS_LOG(ERROR) << "New Context failed"; | ||||
| @@ -325,7 +339,7 @@ int LiteSession::Init(Context *context) { | |||||
| this->context_->thread_num_ = context->thread_num_; | this->context_->thread_num_ = context->thread_num_; | ||||
| this->context_->cpu_bind_mode_ = context->cpu_bind_mode_; | this->context_->cpu_bind_mode_ = context->cpu_bind_mode_; | ||||
| this->context_->device_type_ = context->device_type_; | this->context_->device_type_ = context->device_type_; | ||||
| this->context_->float16_priority = context->float16_priority; | |||||
| this->context_->enable_float16_ = context->enable_float16_; | |||||
| auto ret = this->context_->Init(); | auto ret = this->context_->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init Context failed"; | MS_LOG(ERROR) << "Init Context failed"; | ||||
| @@ -341,7 +355,7 @@ int LiteSession::Init(Context *context) { | |||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| if (context_->device_type_ == DT_GPU) { | if (context_->device_type_ == DT_GPU) { | ||||
| auto opencl_runtime = ocl_runtime_wrap_.GetInstance(); | auto opencl_runtime = ocl_runtime_wrap_.GetInstance(); | ||||
| opencl_runtime->SetFp16Enable(context_->float16_priority); | |||||
| opencl_runtime->SetFp16Enable(context_->enable_float16_); | |||||
| if (opencl_runtime->Init() != RET_OK) { | if (opencl_runtime->Init() != RET_OK) { | ||||
| context_->device_type_ = DT_CPU; | context_->device_type_ = DT_CPU; | ||||
| MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode."; | MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode."; | ||||
| @@ -397,12 +411,11 @@ LiteSession::~LiteSession() { | |||||
| is_running_.store(false); | is_running_.store(false); | ||||
| } | } | ||||
| std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputsByName(const std::string &name) const { | |||||
| mindspore::tensor::MSTensor *LiteSession::GetInputsByTensorName(const std::string &name) const { | |||||
| auto ret = input_map_.find(name); | auto ret = input_map_.find(name); | ||||
| if (ret == input_map_.end()) { | if (ret == input_map_.end()) { | ||||
| MS_LOG(WARNING) << "Node " << name << " is not an input node"; | |||||
| std::vector<mindspore::tensor::MSTensor *> empty_ret; | |||||
| return empty_ret; | |||||
| MS_LOG(WARNING) << "Tensor " << name << " is not exist"; | |||||
| return nullptr; | |||||
| } | } | ||||
| return ret->second; | return ret->second; | ||||
| } | } | ||||
| @@ -50,7 +50,7 @@ class LiteSession : public session::LiteSession { | |||||
| std::vector<mindspore::tensor::MSTensor *> GetInputs() const override; | std::vector<mindspore::tensor::MSTensor *> GetInputs() const override; | ||||
| std::vector<mindspore::tensor::MSTensor *> GetInputsByName(const std::string &name) const override; | |||||
| mindspore::tensor::MSTensor *GetInputsByTensorName(const std::string &name) const override; | |||||
| int RunGraph(const session::KernelCallBack &before = nullptr, | int RunGraph(const session::KernelCallBack &before = nullptr, | ||||
| const session::KernelCallBack &after = nullptr) override; | const session::KernelCallBack &after = nullptr) override; | ||||
| @@ -101,8 +101,8 @@ class LiteSession : public session::LiteSession { | |||||
| std::vector<Tensor *> outputs_; | std::vector<Tensor *> outputs_; | ||||
| // graph input MSTensors | // graph input MSTensors | ||||
| std::vector<mindspore::tensor::MSTensor *> input_vec_; | std::vector<mindspore::tensor::MSTensor *> input_vec_; | ||||
| // graph input node name -- input tensors | |||||
| std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> input_map_; | |||||
| // graph input tensor name -- input tensors | |||||
| std::unordered_map<std::string, mindspore::tensor::MSTensor *> input_map_; | |||||
| // graph output node name -- output tensors | // graph output node name -- output tensors | ||||
| std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> output_node_map_; | std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> output_node_map_; | ||||
| @@ -27,7 +27,6 @@ | |||||
| #else | #else | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #endif | #endif | ||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/common/log_adapter.h" | #include "src/common/log_adapter.h" | ||||
| @@ -137,9 +137,9 @@ int FullconnectionCPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | ||||
| return prepare_ret; | return prepare_ret; | ||||
| } | } | ||||
| auto a_ptr = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||||
| auto b_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||||
| c_r_ptr = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData()); | |||||
| auto a_ptr = reinterpret_cast<float *>(in_tensors_.at(0)->data_c()); | |||||
| auto b_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->data_c()); | |||||
| c_r_ptr = reinterpret_cast<float *>(out_tensors_.at(0)->data_c()); | |||||
| if (!fc_param_->a_const_) InitMatrixA(a_ptr, a_c12_ptr_); | if (!fc_param_->a_const_) InitMatrixA(a_ptr, a_c12_ptr_); | ||||
| if (!fc_param_->b_const_) InitMatrixB(b_ptr, b_r8_ptr_); | if (!fc_param_->b_const_) InitMatrixB(b_ptr, b_r8_ptr_); | ||||
| @@ -18,10 +18,10 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_FULLCONNECTION_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_FULLCONNECTION_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/kernel/arm/base/fullconnection_base.h" | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "nnacl/fp32/matmul.h" | #include "nnacl/fp32/matmul.h" | ||||
| #include "src/runtime/kernel/arm/base/fullconnection_base.h" | |||||
| using mindspore::lite::InnerContext; | using mindspore::lite::InnerContext; | ||||
| @@ -18,8 +18,8 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "include/context.h" | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "include/context.h" | |||||
| #include "nnacl/l2_norm_parameter.h" | #include "nnacl/l2_norm_parameter.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| @@ -18,8 +18,8 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POWER_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_POWER_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "include/context.h" | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "include/context.h" | |||||
| #include "nnacl/power.h" | #include "nnacl/power.h" | ||||
| #include "src/runtime/kernel/arm/base/power_base.h" | #include "src/runtime/kernel/arm/base/power_base.h" | ||||
| @@ -18,9 +18,9 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_FULLCONNECTION_INT8_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_FULLCONNECTION_INT8_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/kernel/arm/base/fullconnection_base.h" | |||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "nnacl/quantization/quantize.h" | #include "nnacl/quantization/quantize.h" | ||||
| #include "src/runtime/kernel/arm/base/fullconnection_base.h" | |||||
| #include "nnacl/int8/common_func.h" | #include "nnacl/int8/common_func.h" | ||||
| using mindspore::lite::InnerContext; | using mindspore::lite::InnerContext; | ||||
| @@ -18,9 +18,9 @@ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ | #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_MATMUL_INT8_H_ | ||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/kernel/arm/base/matmul_base.h" | |||||
| #include "include/context.h" | #include "include/context.h" | ||||
| #include "nnacl/quantization/quantize.h" | #include "nnacl/quantization/quantize.h" | ||||
| #include "src/runtime/kernel/arm/base/matmul_base.h" | |||||
| using mindspore::lite::InnerContext; | using mindspore::lite::InnerContext; | ||||
| @@ -30,7 +30,7 @@ int OpenCLExecutor::Run(std::vector<Tensor *> &inputs, std::vector<Tensor *> &ou | |||||
| for (auto *kernel : kernels) { | for (auto *kernel : kernels) { | ||||
| MS_ASSERT(nullptr != kernel); | MS_ASSERT(nullptr != kernel); | ||||
| session::CallBackParam callbackParam; | session::CallBackParam callbackParam; | ||||
| callbackParam.name_callback_param = kernel->name(); | |||||
| callbackParam.node_name = kernel->name(); | |||||
| if (before != nullptr) { | if (before != nullptr) { | ||||
| if (!before(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), callbackParam)) { | if (!before(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), callbackParam)) { | ||||
| @@ -195,7 +195,7 @@ void Scheduler::ConstructSubgraphs(std::vector<kernel::LiteKernel *> *kernels) { | |||||
| for (auto temp_kernels : sub_kernels_list) { | for (auto temp_kernels : sub_kernels_list) { | ||||
| std::vector<Tensor *> output_tensor = kernel::LiteKernelUtil::SubgraphOutputTensors(temp_kernels); | std::vector<Tensor *> output_tensor = kernel::LiteKernelUtil::SubgraphOutputTensors(temp_kernels); | ||||
| for (auto tensor : output_tensor) { | for (auto tensor : output_tensor) { | ||||
| if (context_->float16_priority && tensor->data_type() == kNumberTypeFloat16) { | |||||
| if (context_->enable_float16_ && tensor->data_type() == kNumberTypeFloat16) { | |||||
| tensor->set_data_type(kNumberTypeFloat32); | tensor->set_data_type(kNumberTypeFloat32); | ||||
| } | } | ||||
| } | } | ||||
| @@ -262,7 +262,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens | |||||
| #endif | #endif | ||||
| desc.arch = kernel::KERNEL_ARCH::kCPU; | desc.arch = kernel::KERNEL_ARCH::kCPU; | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if ((context_->float16_priority && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16) { | |||||
| if ((context_->enable_float16_ && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16) { | |||||
| // check if support fp16 | // check if support fp16 | ||||
| kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type}; | kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type}; | ||||
| kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, key); | kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, key); | ||||
| @@ -106,7 +106,7 @@ function Run_Converter() { | |||||
| echo ${model_name} >> "${run_converter_log_file}" | echo ${model_name} >> "${run_converter_log_file}" | ||||
| echo 'convert mode name: '${model_name}' begin.' | echo 'convert mode name: '${model_name}' begin.' | ||||
| echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}_posttraining' --quantType=PostTraining --config_file='${models_path}'/'${model_name}'_posttraining.config' >> "${run_converter_log_file}" | echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}_posttraining' --quantType=PostTraining --config_file='${models_path}'/'${model_name}'_posttraining.config' >> "${run_converter_log_file}" | ||||
| ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_posttraining --quantType=PostTraining --config_file=${models_path}/${model_name}_posttraining.config | |||||
| ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_posttraining --quantType=PostTraining --configFile=${models_path}/${model_name}_posttraining.config | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| converter_result='converter post_training '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} | converter_result='converter post_training '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} | ||||
| else | else | ||||
| @@ -152,8 +152,8 @@ function Run_Converter() { | |||||
| continue | continue | ||||
| fi | fi | ||||
| echo ${model_name} >> "${run_converter_log_file}" | echo ${model_name} >> "${run_converter_log_file}" | ||||
| echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16' >> "${run_converter_log_file}" | |||||
| ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16 | |||||
| echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" | |||||
| ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16 | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} | converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} | ||||
| else | else | ||||
| @@ -173,8 +173,8 @@ function Run_x86() { | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "{run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "{run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -191,8 +191,8 @@ function Run_x86() { | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -209,8 +209,8 @@ function Run_x86() { | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -227,8 +227,8 @@ function Run_x86() { | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'_posttraining.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/quantTraining/mnist_calibration_data/00099.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'_posttraining.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}_posttraining.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/quantTraining/mnist_calibration_data/00099.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}_posttraining.ms.out >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'_posttraining.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/quantTraining/mnist_calibration_data/00099.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'_posttraining.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_posttraining.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/quantTraining/mnist_calibration_data/00099.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}_posttraining.ms.out >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -245,8 +245,8 @@ function Run_x86() { | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -263,8 +263,8 @@ function Run_x86() { | |||||
| echo ${model_name}'_train' >> "${run_x86_log_file}" | echo ${model_name}'_train' >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'_train.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}'_train'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=1.5 >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}'_train'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=1.5 >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -281,8 +281,8 @@ function Run_x86() { | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=1.5 >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=1.5 >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -299,8 +299,8 @@ function Run_x86() { | |||||
| echo ${model_name} >> "${run_x86_log_file}" | echo ${model_name} >> "${run_x86_log_file}" | ||||
| echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_x86_log_file}" | ||||
| cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}_weightquant.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" | |||||
| export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}" | |||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| else | else | ||||
| @@ -349,8 +349,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -359,8 +359,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -377,8 +377,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -388,8 +388,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -406,8 +406,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -417,8 +417,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -435,8 +435,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --fp16Priority=true --accuracyThreshold=5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=5' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -446,8 +446,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -464,8 +464,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -475,8 +475,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -493,8 +493,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64_gpu: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64_gpu: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -504,8 +504,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| #echo ${model_name} | #echo ${model_name} | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64_gpu: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64_gpu: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -522,8 +522,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --fp16Priority=true --accuracyThreshold=5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true --accuracyThreshold=5' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64_gpu_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64_gpu_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -533,8 +533,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --enableFp16=true' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64_gpu_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64_gpu_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -552,8 +552,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -563,8 +563,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -581,8 +581,8 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| echo ${model_name}'_train' >> "${run_arm64_log_file}" | echo ${model_name}'_train' >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'_train.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'_train.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold=1.5' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -592,8 +592,8 @@ function Run_arm64() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm64_log_file}" | echo ${model_name} >> "${run_arm64_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'_train.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'_train.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_train.ms --warmUpLoopCount=1 --loopCount=2' >> "{run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_train.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -641,8 +641,8 @@ function Run_arm32() { | |||||
| fi | fi | ||||
| echo ${model_name} >> "${run_arm32_log_file}" | echo ${model_name} >> "${run_arm32_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm32_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> "${run_arm32_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm32_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm32_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm32: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm32: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -652,8 +652,8 @@ function Run_arm32() { | |||||
| # run benchmark test without clib data | # run benchmark test without clib data | ||||
| echo ${model_name} >> "${run_arm32_log_file}" | echo ${model_name} >> "${run_arm32_log_file}" | ||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | ||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm32_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> "${run_arm32_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2' >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm32_log_file}" | adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm32_log_file}" | ||||
| if [ $? = 0 ]; then | if [ $? = 0 ]; then | ||||
| run_result='arm32: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | run_result='arm32: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | ||||
| @@ -34,24 +34,24 @@ static const char *DELIM_SLASH = "/"; | |||||
| int Benchmark::GenerateRandomData(size_t size, void *data) { | int Benchmark::GenerateRandomData(size_t size, void *data) { | ||||
| MS_ASSERT(data != nullptr); | MS_ASSERT(data != nullptr); | ||||
| char *castedData = static_cast<char *>(data); | |||||
| char *casted_data = static_cast<char *>(data); | |||||
| for (size_t i = 0; i < size; i++) { | for (size_t i = 0; i < size; i++) { | ||||
| castedData[i] = static_cast<char>(i); | |||||
| casted_data[i] = static_cast<char>(i); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int Benchmark::GenerateInputData() { | int Benchmark::GenerateInputData() { | ||||
| for (auto tensor : msInputs) { | |||||
| for (auto tensor : ms_inputs_) { | |||||
| MS_ASSERT(tensor != nullptr); | MS_ASSERT(tensor != nullptr); | ||||
| auto inputData = tensor->MutableData(); | |||||
| if (inputData == nullptr) { | |||||
| auto input_data = tensor->MutableData(); | |||||
| if (input_data == nullptr) { | |||||
| MS_LOG(ERROR) << "MallocData for inTensor failed"; | MS_LOG(ERROR) << "MallocData for inTensor failed"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| MS_ASSERT(tensor->GetData() != nullptr); | MS_ASSERT(tensor->GetData() != nullptr); | ||||
| auto tensorByteSize = tensor->Size(); | |||||
| auto status = GenerateRandomData(tensorByteSize, inputData); | |||||
| auto tensor_byte_size = tensor->Size(); | |||||
| auto status = GenerateRandomData(tensor_byte_size, input_data); | |||||
| if (status != 0) { | if (status != 0) { | ||||
| std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; | std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; | ||||
| MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status; | MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status; | ||||
| @@ -62,7 +62,7 @@ int Benchmark::GenerateInputData() { | |||||
| } | } | ||||
| int Benchmark::LoadInput() { | int Benchmark::LoadInput() { | ||||
| if (_flags->inDataPath.empty()) { | |||||
| if (flags_->in_data_file_.empty()) { | |||||
| auto status = GenerateInputData(); | auto status = GenerateInputData(); | ||||
| if (status != 0) { | if (status != 0) { | ||||
| std::cerr << "Generate input data error " << status << std::endl; | std::cerr << "Generate input data error " << status << std::endl; | ||||
| @@ -81,33 +81,34 @@ int Benchmark::LoadInput() { | |||||
| } | } | ||||
| int Benchmark::ReadInputFile() { | int Benchmark::ReadInputFile() { | ||||
| if (msInputs.empty()) { | |||||
| if (ms_inputs_.empty()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (this->_flags->inDataType == kImage) { | |||||
| if (this->flags_->in_data_type_ == kImage) { | |||||
| MS_LOG(ERROR) << "Not supported image input"; | MS_LOG(ERROR) << "Not supported image input"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } else { | } else { | ||||
| for (size_t i = 0; i < _flags->input_data_list.size(); i++) { | |||||
| auto cur_tensor = msInputs.at(i); | |||||
| for (size_t i = 0; i < flags_->input_data_list_.size(); i++) { | |||||
| auto cur_tensor = ms_inputs_.at(i); | |||||
| MS_ASSERT(cur_tensor != nullptr); | MS_ASSERT(cur_tensor != nullptr); | ||||
| size_t size; | size_t size; | ||||
| char *binBuf = ReadFile(_flags->input_data_list[i].c_str(), &size); | |||||
| if (binBuf == nullptr) { | |||||
| char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size); | |||||
| if (bin_buf == nullptr) { | |||||
| MS_LOG(ERROR) << "ReadFile return nullptr"; | MS_LOG(ERROR) << "ReadFile return nullptr"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto tensorDataSize = cur_tensor->Size(); | |||||
| if (size != tensorDataSize) { | |||||
| std::cerr << "Input binary file size error, required: " << tensorDataSize << ", in fact: " << size << std::endl; | |||||
| MS_LOG(ERROR) << "Input binary file size error, required: " << tensorDataSize << ", in fact: " << size; | |||||
| delete binBuf; | |||||
| auto tensor_data_size = cur_tensor->Size(); | |||||
| if (size != tensor_data_size) { | |||||
| std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size | |||||
| << std::endl; | |||||
| MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size; | |||||
| delete bin_buf; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto inputData = cur_tensor->MutableData(); | |||||
| memcpy(inputData, binBuf, tensorDataSize); | |||||
| delete[](binBuf); | |||||
| auto input_data = cur_tensor->MutableData(); | |||||
| memcpy(input_data, bin_buf, tensor_data_size); | |||||
| delete[](bin_buf); | |||||
| } | } | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -115,94 +116,96 @@ int Benchmark::ReadInputFile() { | |||||
| // calibData is FP32 | // calibData is FP32 | ||||
| int Benchmark::ReadCalibData() { | int Benchmark::ReadCalibData() { | ||||
| const char *calibDataPath = _flags->calibDataPath.c_str(); | |||||
| const char *calib_data_path = flags_->benchmark_data_file_.c_str(); | |||||
| // read calib data | // read calib data | ||||
| std::ifstream inFile(calibDataPath); | |||||
| if (!inFile.good()) { | |||||
| std::cerr << "file: " << calibDataPath << " is not exist" << std::endl; | |||||
| MS_LOG(ERROR) << "file: " << calibDataPath << " is not exist"; | |||||
| std::ifstream in_file(calib_data_path); | |||||
| if (!in_file.good()) { | |||||
| std::cerr << "file: " << calib_data_path << " is not exist" << std::endl; | |||||
| MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist"; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (!inFile.is_open()) { | |||||
| std::cerr << "file: " << calibDataPath << " open failed" << std::endl; | |||||
| MS_LOG(ERROR) << "file: " << calibDataPath << " open failed"; | |||||
| inFile.close(); | |||||
| if (!in_file.is_open()) { | |||||
| std::cerr << "file: " << calib_data_path << " open failed" << std::endl; | |||||
| MS_LOG(ERROR) << "file: " << calib_data_path << " open failed"; | |||||
| in_file.close(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::string line; | std::string line; | ||||
| MS_LOG(INFO) << "Start reading calibData file"; | MS_LOG(INFO) << "Start reading calibData file"; | ||||
| std::string tensorName; | |||||
| while (!inFile.eof()) { | |||||
| getline(inFile, line); | |||||
| std::stringstream stringLine1(line); | |||||
| std::string tensor_name; | |||||
| while (!in_file.eof()) { | |||||
| getline(in_file, line); | |||||
| std::stringstream string_line1(line); | |||||
| size_t dim = 0; | size_t dim = 0; | ||||
| stringLine1 >> tensorName >> dim; | |||||
| string_line1 >> tensor_name >> dim; | |||||
| std::vector<size_t> dims; | std::vector<size_t> dims; | ||||
| size_t shapeSize = 1; | |||||
| size_t shape_size = 1; | |||||
| for (size_t i = 0; i < dim; i++) { | for (size_t i = 0; i < dim; i++) { | ||||
| size_t tmpDim; | |||||
| stringLine1 >> tmpDim; | |||||
| dims.push_back(tmpDim); | |||||
| shapeSize *= tmpDim; | |||||
| size_t tmp_dim; | |||||
| string_line1 >> tmp_dim; | |||||
| dims.push_back(tmp_dim); | |||||
| shape_size *= tmp_dim; | |||||
| } | } | ||||
| getline(inFile, line); | |||||
| std::stringstream stringLine2(line); | |||||
| std::vector<float> tensorData; | |||||
| for (size_t i = 0; i < shapeSize; i++) { | |||||
| float tmpData; | |||||
| stringLine2 >> tmpData; | |||||
| tensorData.push_back(tmpData); | |||||
| getline(in_file, line); | |||||
| std::stringstream string_line2(line); | |||||
| std::vector<float> tensor_data; | |||||
| for (size_t i = 0; i < shape_size; i++) { | |||||
| float tmp_data; | |||||
| string_line2 >> tmp_data; | |||||
| tensor_data.push_back(tmp_data); | |||||
| } | } | ||||
| auto *checkTensor = new CheckTensor(dims, tensorData); | |||||
| this->calibData.insert(std::make_pair(tensorName, checkTensor)); | |||||
| auto *check_tensor = new CheckTensor(dims, tensor_data); | |||||
| this->benchmark_data_.insert(std::make_pair(tensor_name, check_tensor)); | |||||
| } | } | ||||
| inFile.close(); | |||||
| in_file.close(); | |||||
| MS_LOG(INFO) << "Finish reading calibData file"; | MS_LOG(INFO) << "Finish reading calibData file"; | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int Benchmark::CompareOutput() { | int Benchmark::CompareOutput() { | ||||
| std::cout << "================ Comparing Output data ================" << std::endl; | std::cout << "================ Comparing Output data ================" << std::endl; | ||||
| float totalBias = 0; | |||||
| int totalSize = 0; | |||||
| bool hasError = false; | |||||
| for (const auto &calibTensor : calibData) { | |||||
| std::string nodeOrTensorName = calibTensor.first; | |||||
| auto tensors = session->GetOutputsByNodeName(nodeOrTensorName); | |||||
| float total_bias = 0; | |||||
| int total_size = 0; | |||||
| bool has_error = false; | |||||
| for (const auto &calib_tensor : benchmark_data_) { | |||||
| std::string node_or_tensor_name = calib_tensor.first; | |||||
| auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name); | |||||
| mindspore::tensor::MSTensor *tensor = nullptr; | mindspore::tensor::MSTensor *tensor = nullptr; | ||||
| if (tensors.empty() || tensors.size() != 1) { | if (tensors.empty() || tensors.size() != 1) { | ||||
| MS_LOG(INFO) << "Cannot find output node: " << nodeOrTensorName | |||||
| MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name | |||||
| << " or node has more than one output tensor, switch to GetOutputByTensorName"; | << " or node has more than one output tensor, switch to GetOutputByTensorName"; | ||||
| tensor = session->GetOutputByTensorName(nodeOrTensorName); | |||||
| tensor = session_->GetOutputByTensorName(node_or_tensor_name); | |||||
| if (tensor == nullptr) { | if (tensor == nullptr) { | ||||
| MS_LOG(ERROR) << "Cannot find output tensor " << nodeOrTensorName << ", get model output failed"; | |||||
| MS_LOG(ERROR) << "Cannot find output tensor " << node_or_tensor_name << ", get model output failed"; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } else { | } else { | ||||
| tensor = tensors.front(); | tensor = tensors.front(); | ||||
| } | } | ||||
| MS_ASSERT(tensor->GetData() != nullptr); | |||||
| MS_ASSERT(tensor->MutableData() != nullptr); | |||||
| float bias = 0; | float bias = 0; | ||||
| switch (msCalibDataType) { | switch (msCalibDataType) { | ||||
| case TypeId::kNumberTypeFloat: { | case TypeId::kNumberTypeFloat: { | ||||
| bias = CompareData<float>(nodeOrTensorName, tensor->shape(), static_cast<float *>(tensor->MutableData())); | |||||
| bias = CompareData<float>(node_or_tensor_name, tensor->shape(), static_cast<float *>(tensor->MutableData())); | |||||
| break; | break; | ||||
| } | } | ||||
| case TypeId::kNumberTypeInt8: { | case TypeId::kNumberTypeInt8: { | ||||
| bias = CompareData<int8_t>(nodeOrTensorName, tensor->shape(), static_cast<int8_t *>(tensor->MutableData())); | |||||
| bias = CompareData<int8_t>(node_or_tensor_name, tensor->shape(), static_cast<int8_t *>(tensor->MutableData())); | |||||
| break; | break; | ||||
| } | } | ||||
| case TypeId::kNumberTypeUInt8: { | case TypeId::kNumberTypeUInt8: { | ||||
| bias = CompareData<uint8_t>(nodeOrTensorName, tensor->shape(), static_cast<uint8_t *>(tensor->MutableData())); | |||||
| bias = | |||||
| CompareData<uint8_t>(node_or_tensor_name, tensor->shape(), static_cast<uint8_t *>(tensor->MutableData())); | |||||
| break; | break; | ||||
| } | } | ||||
| case TypeId::kNumberTypeInt32: { | case TypeId::kNumberTypeInt32: { | ||||
| bias = CompareData<int32_t>(nodeOrTensorName, tensor->shape(), static_cast<int32_t *>(tensor->MutableData())); | |||||
| bias = | |||||
| CompareData<int32_t>(node_or_tensor_name, tensor->shape(), static_cast<int32_t *>(tensor->MutableData())); | |||||
| break; | break; | ||||
| } | } | ||||
| default: | default: | ||||
| @@ -210,28 +213,28 @@ int Benchmark::CompareOutput() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (bias >= 0) { | if (bias >= 0) { | ||||
| totalBias += bias; | |||||
| totalSize++; | |||||
| total_bias += bias; | |||||
| total_size++; | |||||
| } else { | } else { | ||||
| hasError = true; | |||||
| has_error = true; | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| if (!hasError) { | |||||
| float meanBias; | |||||
| if (totalSize != 0) { | |||||
| meanBias = totalBias / totalSize * 100; | |||||
| if (!has_error) { | |||||
| float mean_bias; | |||||
| if (total_size != 0) { | |||||
| mean_bias = total_bias / total_size * 100; | |||||
| } else { | } else { | ||||
| meanBias = 0; | |||||
| mean_bias = 0; | |||||
| } | } | ||||
| std::cout << "Mean bias of all nodes/tensors: " << meanBias << "%" << std::endl; | |||||
| std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl; | |||||
| std::cout << "=======================================================" << std::endl << std::endl; | std::cout << "=======================================================" << std::endl << std::endl; | ||||
| if (meanBias > this->_flags->accuracyThreshold) { | |||||
| MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << meanBias << "%"; | |||||
| std::cerr << "Mean bias of all nodes/tensors is too big: " << meanBias << "%" << std::endl; | |||||
| if (mean_bias > this->flags_->accuracy_threshold_) { | |||||
| MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; | |||||
| std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } else { | } else { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -247,8 +250,8 @@ int Benchmark::CompareOutput() { | |||||
| int Benchmark::MarkPerformance() { | int Benchmark::MarkPerformance() { | ||||
| MS_LOG(INFO) << "Running warm up loops..."; | MS_LOG(INFO) << "Running warm up loops..."; | ||||
| std::cout << "Running warm up loops..." << std::endl; | std::cout << "Running warm up loops..." << std::endl; | ||||
| for (int i = 0; i < _flags->warmUpLoopCount; i++) { | |||||
| auto status = session->RunGraph(); | |||||
| for (int i = 0; i < flags_->warm_up_loop_count_; i++) { | |||||
| auto status = session_->RunGraph(); | |||||
| if (status != 0) { | if (status != 0) { | ||||
| MS_LOG(ERROR) << "Inference error " << status; | MS_LOG(ERROR) << "Inference error " << status; | ||||
| std::cerr << "Inference error " << status << std::endl; | std::cerr << "Inference error " << status << std::endl; | ||||
| @@ -258,15 +261,15 @@ int Benchmark::MarkPerformance() { | |||||
| MS_LOG(INFO) << "Running benchmark loops..."; | MS_LOG(INFO) << "Running benchmark loops..."; | ||||
| std::cout << "Running benchmark loops..." << std::endl; | std::cout << "Running benchmark loops..." << std::endl; | ||||
| uint64_t timeMin = 1000000; | |||||
| uint64_t timeMax = 0; | |||||
| uint64_t timeAvg = 0; | |||||
| uint64_t time_min = 1000000; | |||||
| uint64_t time_max = 0; | |||||
| uint64_t time_avg = 0; | |||||
| for (int i = 0; i < _flags->loopCount; i++) { | |||||
| session->BindThread(true); | |||||
| for (int i = 0; i < flags_->loop_count_; i++) { | |||||
| session_->BindThread(true); | |||||
| auto start = GetTimeUs(); | auto start = GetTimeUs(); | ||||
| auto status = | auto status = | ||||
| _flags->runTimeProfiler ? session->RunGraph(before_call_back_, after_call_back_) : session->RunGraph(); | |||||
| flags_->time_profiling_ ? session_->RunGraph(before_call_back_, after_call_back_) : session_->RunGraph(); | |||||
| if (status != 0) { | if (status != 0) { | ||||
| MS_LOG(ERROR) << "Inference error " << status; | MS_LOG(ERROR) << "Inference error " << status; | ||||
| std::cerr << "Inference error " << status; | std::cerr << "Inference error " << status; | ||||
| @@ -275,28 +278,28 @@ int Benchmark::MarkPerformance() { | |||||
| auto end = GetTimeUs(); | auto end = GetTimeUs(); | ||||
| auto time = end - start; | auto time = end - start; | ||||
| timeMin = std::min(timeMin, time); | |||||
| timeMax = std::max(timeMax, time); | |||||
| timeAvg += time; | |||||
| time_min = std::min(time_min, time); | |||||
| time_max = std::max(time_max, time); | |||||
| time_avg += time; | |||||
| session->BindThread(false); | |||||
| session_->BindThread(false); | |||||
| } | } | ||||
| if (_flags->runTimeProfiler) { | |||||
| if (flags_->time_profiling_) { | |||||
| const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; | const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; | ||||
| const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; | const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; | ||||
| PrintResult(per_op_name, op_times_by_name_); | PrintResult(per_op_name, op_times_by_name_); | ||||
| PrintResult(per_op_type, op_times_by_type_); | PrintResult(per_op_type, op_times_by_type_); | ||||
| } | } | ||||
| if (_flags->loopCount > 0) { | |||||
| timeAvg /= _flags->loopCount; | |||||
| MS_LOG(INFO) << "Model = " << _flags->modelPath.substr(_flags->modelPath.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| << ", NumThreads = " << _flags->numThreads << ", MinRunTime = " << timeMin / 1000.0f | |||||
| << ", MaxRuntime = " << timeMax / 1000.0f << ", AvgRunTime = " << timeAvg / 1000.0f; | |||||
| if (flags_->loop_count_ > 0) { | |||||
| time_avg /= flags_->loop_count_; | |||||
| MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / 1000.0f | |||||
| << ", MaxRuntime = " << time_max / 1000.0f << ", AvgRunTime = " << time_avg / 1000.0f; | |||||
| printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n", | printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n", | ||||
| _flags->modelPath.substr(_flags->modelPath.find_last_of(DELIM_SLASH) + 1).c_str(), _flags->numThreads, | |||||
| timeMin / 1000.0f, timeMax / 1000.0f, timeAvg / 1000.0f); | |||||
| flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_, | |||||
| time_min / 1000.0f, time_max / 1000.0f, time_avg / 1000.0f); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -304,7 +307,7 @@ int Benchmark::MarkPerformance() { | |||||
| int Benchmark::MarkAccuracy() { | int Benchmark::MarkAccuracy() { | ||||
| MS_LOG(INFO) << "MarkAccuracy"; | MS_LOG(INFO) << "MarkAccuracy"; | ||||
| std::cout << "MarkAccuracy" << std::endl; | std::cout << "MarkAccuracy" << std::endl; | ||||
| for (auto &msInput : msInputs) { | |||||
| for (auto &msInput : ms_inputs_) { | |||||
| switch (msInput->data_type()) { | switch (msInput->data_type()) { | ||||
| case TypeId::kNumberTypeFloat: | case TypeId::kNumberTypeFloat: | ||||
| PrintInputData<float>(msInput); | PrintInputData<float>(msInput); | ||||
| @@ -326,7 +329,7 @@ int Benchmark::MarkAccuracy() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| } | } | ||||
| auto status = session->RunGraph(); | |||||
| auto status = session_->RunGraph(); | |||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "Inference error " << status; | MS_LOG(ERROR) << "Inference error " << status; | ||||
| std::cerr << "Inference error " << status << std::endl; | std::cerr << "Inference error " << status << std::endl; | ||||
| @@ -350,90 +353,83 @@ int Benchmark::MarkAccuracy() { | |||||
| } | } | ||||
| int Benchmark::RunBenchmark() { | int Benchmark::RunBenchmark() { | ||||
| auto startPrepareTime = GetTimeUs(); | |||||
| auto start_prepare_time = GetTimeUs(); | |||||
| // Load graph | // Load graph | ||||
| std::string modelName = _flags->modelPath.substr(_flags->modelPath.find_last_of(DELIM_SLASH) + 1); | |||||
| std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1); | |||||
| MS_LOG(INFO) << "start reading model file"; | MS_LOG(INFO) << "start reading model file"; | ||||
| std::cout << "start reading model file" << std::endl; | std::cout << "start reading model file" << std::endl; | ||||
| size_t size = 0; | size_t size = 0; | ||||
| char *graphBuf = ReadFile(_flags->modelPath.c_str(), &size); | |||||
| if (graphBuf == nullptr) { | |||||
| MS_LOG(ERROR) << "Read model file failed while running " << modelName.c_str(); | |||||
| std::cerr << "Read model file failed while running " << modelName.c_str() << std::endl; | |||||
| char *graph_buf = ReadFile(flags_->model_file_.c_str(), &size); | |||||
| if (graph_buf == nullptr) { | |||||
| MS_LOG(ERROR) << "Read model file failed while running " << model_name.c_str(); | |||||
| std::cerr << "Read model file failed while running " << model_name.c_str() << std::endl; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto model = lite::Model::Import(graphBuf, size); | |||||
| delete[](graphBuf); | |||||
| auto model = std::shared_ptr<Model>(lite::Model::Import(graph_buf, size)); | |||||
| delete[](graph_buf); | |||||
| if (model == nullptr) { | if (model == nullptr) { | ||||
| MS_LOG(ERROR) << "Import model file failed while running " << modelName.c_str(); | |||||
| std::cerr << "Import model file failed while running " << modelName.c_str() << std::endl; | |||||
| MS_LOG(ERROR) << "Import model file failed while running " << model_name.c_str(); | |||||
| std::cerr << "Import model file failed while running " << model_name.c_str() << std::endl; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto context = new (std::nothrow) lite::Context; | |||||
| auto context = std::make_shared<Context>(); | |||||
| if (context == nullptr) { | if (context == nullptr) { | ||||
| MS_LOG(ERROR) << "New context failed while running " << modelName.c_str(); | |||||
| std::cerr << "New context failed while running " << modelName.c_str() << std::endl; | |||||
| MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); | |||||
| std::cerr << "New context failed while running " << model_name.c_str() << std::endl; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (_flags->device == "CPU") { | |||||
| if (flags_->device_ == "CPU") { | |||||
| context->device_type_ = lite::DT_CPU; | context->device_type_ = lite::DT_CPU; | ||||
| } else if (_flags->device == "GPU") { | |||||
| } else if (flags_->device_ == "GPU") { | |||||
| context->device_type_ = lite::DT_GPU; | context->device_type_ = lite::DT_GPU; | ||||
| } | } | ||||
| if (_flags->cpuBindMode == -1) { | |||||
| if (flags_->cpu_bind_mode_ == -1) { | |||||
| context->cpu_bind_mode_ = MID_CPU; | context->cpu_bind_mode_ = MID_CPU; | ||||
| } else if (_flags->cpuBindMode == 0) { | |||||
| } else if (flags_->cpu_bind_mode_ == 0) { | |||||
| context->cpu_bind_mode_ = HIGHER_CPU; | context->cpu_bind_mode_ = HIGHER_CPU; | ||||
| } else { | } else { | ||||
| context->cpu_bind_mode_ = NO_BIND; | context->cpu_bind_mode_ = NO_BIND; | ||||
| } | } | ||||
| context->thread_num_ = _flags->numThreads; | |||||
| context->float16_priority = _flags->fp16Priority; | |||||
| session = session::LiteSession::CreateSession(context); | |||||
| delete (context); | |||||
| if (session == nullptr) { | |||||
| MS_LOG(ERROR) << "CreateSession failed while running ", modelName.c_str(); | |||||
| std::cout << "CreateSession failed while running ", modelName.c_str(); | |||||
| context->thread_num_ = flags_->num_threads_; | |||||
| context->enable_float16_ = flags_->enable_fp16_; | |||||
| session_ = session::LiteSession::CreateSession(context.get()); | |||||
| if (session_ == nullptr) { | |||||
| MS_LOG(ERROR) << "CreateSession failed while running ", model_name.c_str(); | |||||
| std::cout << "CreateSession failed while running ", model_name.c_str(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto ret = session->CompileGraph(model); | |||||
| auto ret = session_->CompileGraph(model.get()); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "CompileGraph failed while running ", modelName.c_str(); | |||||
| std::cout << "CompileGraph failed while running ", modelName.c_str(); | |||||
| delete (session); | |||||
| delete (model); | |||||
| MS_LOG(ERROR) << "CompileGraph failed while running ", model_name.c_str(); | |||||
| std::cout << "CompileGraph failed while running ", model_name.c_str(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| model->Free(); | model->Free(); | ||||
| msInputs = session->GetInputs(); | |||||
| auto endPrepareTime = GetTimeUs(); | |||||
| MS_LOG(INFO) << "PrepareTime = " << (endPrepareTime - startPrepareTime) / 1000 << " ms"; | |||||
| std::cout << "PrepareTime = " << (endPrepareTime - startPrepareTime) / 1000 << " ms" << std::endl; | |||||
| ms_inputs_ = session_->GetInputs(); | |||||
| auto end_prepare_time = GetTimeUs(); | |||||
| MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; | |||||
| std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; | |||||
| // Load input | // Load input | ||||
| MS_LOG(INFO) << "start generate input data"; | MS_LOG(INFO) << "start generate input data"; | ||||
| auto status = LoadInput(); | auto status = LoadInput(); | ||||
| if (status != 0) { | if (status != 0) { | ||||
| MS_LOG(ERROR) << "Generate input data error"; | MS_LOG(ERROR) << "Generate input data error"; | ||||
| delete (session); | |||||
| delete (model); | |||||
| return status; | return status; | ||||
| } | } | ||||
| if (!_flags->calibDataPath.empty()) { | |||||
| if (!flags_->benchmark_data_file_.empty()) { | |||||
| status = MarkAccuracy(); | status = MarkAccuracy(); | ||||
| for (auto &data : calibData) { | |||||
| for (auto &data : benchmark_data_) { | |||||
| data.second->shape.clear(); | data.second->shape.clear(); | ||||
| data.second->data.clear(); | data.second->data.clear(); | ||||
| delete data.second; | delete data.second; | ||||
| } | } | ||||
| calibData.clear(); | |||||
| benchmark_data_.clear(); | |||||
| if (status != 0) { | if (status != 0) { | ||||
| MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; | MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; | ||||
| std::cout << "Run MarkAccuracy error: " << status << std::endl; | std::cout << "Run MarkAccuracy error: " << status << std::endl; | ||||
| delete (session); | |||||
| delete (model); | |||||
| return status; | return status; | ||||
| } | } | ||||
| } else { | } else { | ||||
| @@ -441,24 +437,20 @@ int Benchmark::RunBenchmark() { | |||||
| if (status != 0) { | if (status != 0) { | ||||
| MS_LOG(ERROR) << "Run MarkPerformance error: " << status; | MS_LOG(ERROR) << "Run MarkPerformance error: " << status; | ||||
| std::cout << "Run MarkPerformance error: " << status << std::endl; | std::cout << "Run MarkPerformance error: " << status << std::endl; | ||||
| delete (session); | |||||
| delete (model); | |||||
| return status; | return status; | ||||
| } | } | ||||
| } | } | ||||
| delete (session); | |||||
| delete (model); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void BenchmarkFlags::InitInputDataList() { | void BenchmarkFlags::InitInputDataList() { | ||||
| char *input_list = new char[this->inDataPath.length() + 1]; | |||||
| snprintf(input_list, this->inDataPath.length() + 1, "%s", this->inDataPath.c_str()); | |||||
| char *input_list = new char[this->in_data_file_.length() + 1]; | |||||
| snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str()); | |||||
| char *cur_input; | char *cur_input; | ||||
| const char *split_c = ","; | const char *split_c = ","; | ||||
| cur_input = strtok(input_list, split_c); | cur_input = strtok(input_list, split_c); | ||||
| while (cur_input != nullptr) { | while (cur_input != nullptr) { | ||||
| input_data_list.emplace_back(cur_input); | |||||
| input_data_list_.emplace_back(cur_input); | |||||
| cur_input = strtok(nullptr, split_c); | cur_input = strtok(nullptr, split_c); | ||||
| } | } | ||||
| delete[] input_list; | delete[] input_list; | ||||
| @@ -466,19 +458,19 @@ void BenchmarkFlags::InitInputDataList() { | |||||
| void BenchmarkFlags::InitResizeDimsList() { | void BenchmarkFlags::InitResizeDimsList() { | ||||
| std::string content; | std::string content; | ||||
| content = this->resizeDimsIn; | |||||
| content = this->resize_dims_in_; | |||||
| std::vector<int64_t> shape; | std::vector<int64_t> shape; | ||||
| auto shapeStrs = StringSplit(content, std::string(DELIM_COLON)); | |||||
| for (const auto &shapeStr : shapeStrs) { | |||||
| auto shape_strs = StringSplit(content, std::string(DELIM_COLON)); | |||||
| for (const auto &shape_str : shape_strs) { | |||||
| shape.clear(); | shape.clear(); | ||||
| auto dimStrs = StringSplit(shapeStr, std::string(DELIM_COMMA)); | |||||
| auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA)); | |||||
| std::cout << "Resize Dims: "; | std::cout << "Resize Dims: "; | ||||
| for (const auto &dimStr : dimStrs) { | |||||
| std::cout << dimStr << " "; | |||||
| shape.emplace_back(static_cast<int64_t>(std::stoi(dimStr))); | |||||
| for (const auto &dim_str : dim_strs) { | |||||
| std::cout << dim_str << " "; | |||||
| shape.emplace_back(static_cast<int64_t>(std::stoi(dim_str))); | |||||
| } | } | ||||
| std::cout << std::endl; | std::cout << std::endl; | ||||
| this->resizeDims.emplace_back(shape); | |||||
| this->resize_dims_.emplace_back(shape); | |||||
| } | } | ||||
| } | } | ||||
| @@ -493,11 +485,11 @@ int Benchmark::InitCallbackParameter() { | |||||
| if (before_outputs.empty()) { | if (before_outputs.empty()) { | ||||
| MS_LOG(INFO) << "The num of beforeOutputs is empty"; | MS_LOG(INFO) << "The num of beforeOutputs is empty"; | ||||
| } | } | ||||
| if (op_times_by_type_.find(callParam.type_callback_param) == op_times_by_type_.end()) { | |||||
| op_times_by_type_.insert(std::make_pair(callParam.type_callback_param, std::make_pair(0, 0.0f))); | |||||
| if (op_times_by_type_.find(callParam.node_type) == op_times_by_type_.end()) { | |||||
| op_times_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, 0.0f))); | |||||
| } | } | ||||
| if (op_times_by_name_.find(callParam.name_callback_param) == op_times_by_name_.end()) { | |||||
| op_times_by_name_.insert(std::make_pair(callParam.name_callback_param, std::make_pair(0, 0.0f))); | |||||
| if (op_times_by_name_.find(callParam.node_name) == op_times_by_name_.end()) { | |||||
| op_times_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, 0.0f))); | |||||
| } | } | ||||
| op_call_times_total_++; | op_call_times_total_++; | ||||
| @@ -520,10 +512,10 @@ int Benchmark::InitCallbackParameter() { | |||||
| float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f; | float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f; | ||||
| op_cost_total_ += cost; | op_cost_total_ += cost; | ||||
| op_times_by_type_[call_param.type_callback_param].first++; | |||||
| op_times_by_type_[call_param.type_callback_param].second += cost; | |||||
| op_times_by_name_[call_param.name_callback_param].first++; | |||||
| op_times_by_name_[call_param.name_callback_param].second += cost; | |||||
| op_times_by_type_[call_param.node_type].first++; | |||||
| op_times_by_type_[call_param.node_type].second += cost; | |||||
| op_times_by_name_[call_param.node_name].first++; | |||||
| op_times_by_name_[call_param.node_name].second += cost; | |||||
| return true; | return true; | ||||
| }; | }; | ||||
| @@ -531,36 +523,36 @@ int Benchmark::InitCallbackParameter() { | |||||
| } | } | ||||
| int Benchmark::Init() { | int Benchmark::Init() { | ||||
| if (this->_flags == nullptr) { | |||||
| if (this->flags_ == nullptr) { | |||||
| return 1; | return 1; | ||||
| } | } | ||||
| MS_LOG(INFO) << "ModelPath = " << this->_flags->modelPath; | |||||
| MS_LOG(INFO) << "InDataPath = " << this->_flags->inDataPath; | |||||
| MS_LOG(INFO) << "InDataType = " << this->_flags->inDataTypeIn; | |||||
| MS_LOG(INFO) << "LoopCount = " << this->_flags->loopCount; | |||||
| MS_LOG(INFO) << "DeviceType = " << this->_flags->device; | |||||
| MS_LOG(INFO) << "AccuracyThreshold = " << this->_flags->accuracyThreshold; | |||||
| MS_LOG(INFO) << "WarmUpLoopCount = " << this->_flags->warmUpLoopCount; | |||||
| MS_LOG(INFO) << "NumThreads = " << this->_flags->numThreads; | |||||
| MS_LOG(INFO) << "Fp16Priority = " << this->_flags->fp16Priority; | |||||
| MS_LOG(INFO) << "calibDataPath = " << this->_flags->calibDataPath; | |||||
| if (this->_flags->loopCount < 1) { | |||||
| MS_LOG(ERROR) << "LoopCount:" << this->_flags->loopCount << " must be greater than 0"; | |||||
| std::cerr << "LoopCount:" << this->_flags->loopCount << " must be greater than 0" << std::endl; | |||||
| MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_; | |||||
| MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_; | |||||
| MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_; | |||||
| MS_LOG(INFO) << "LoopCount = " << this->flags_->loop_count_; | |||||
| MS_LOG(INFO) << "DeviceType = " << this->flags_->device_; | |||||
| MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_; | |||||
| MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_; | |||||
| MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_; | |||||
| MS_LOG(INFO) << "Fp16Priority = " << this->flags_->enable_fp16_; | |||||
| MS_LOG(INFO) << "calibDataPath = " << this->flags_->benchmark_data_file_; | |||||
| if (this->flags_->loop_count_ < 1) { | |||||
| MS_LOG(ERROR) << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0"; | |||||
| std::cerr << "LoopCount:" << this->flags_->loop_count_ << " must be greater than 0" << std::endl; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (this->_flags->numThreads < 1) { | |||||
| MS_LOG(ERROR) << "numThreads:" << this->_flags->numThreads << " must be greater than 0"; | |||||
| std::cerr << "numThreads:" << this->_flags->numThreads << " must be greater than 0" << std::endl; | |||||
| if (this->flags_->num_threads_ < 1) { | |||||
| MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0"; | |||||
| std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (this->_flags->cpuBindMode == -1) { | |||||
| if (this->flags_->cpu_bind_mode_ == -1) { | |||||
| MS_LOG(INFO) << "cpuBindMode = MID_CPU"; | MS_LOG(INFO) << "cpuBindMode = MID_CPU"; | ||||
| std::cout << "cpuBindMode = MID_CPU" << std::endl; | std::cout << "cpuBindMode = MID_CPU" << std::endl; | ||||
| } else if (this->_flags->cpuBindMode == 1) { | |||||
| } else if (this->flags_->cpu_bind_mode_ == 1) { | |||||
| MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU"; | MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU"; | ||||
| std::cout << "cpuBindMode = HIGHER_CPU" << std::endl; | std::cout << "cpuBindMode = HIGHER_CPU" << std::endl; | ||||
| } else { | } else { | ||||
| @@ -568,38 +560,38 @@ int Benchmark::Init() { | |||||
| std::cout << "cpuBindMode = NO_BIND" << std::endl; | std::cout << "cpuBindMode = NO_BIND" << std::endl; | ||||
| } | } | ||||
| this->_flags->inDataType = this->_flags->inDataTypeIn == "img" ? kImage : kBinary; | |||||
| this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary; | |||||
| if (!_flags->calibDataType.empty()) { | |||||
| if (dataTypeMap.find(_flags->calibDataType) == dataTypeMap.end()) { | |||||
| MS_LOG(ERROR) << "CalibDataType not supported: " << _flags->calibDataType.c_str(); | |||||
| if (!flags_->benchmark_data_type_.empty()) { | |||||
| if (data_type_map_.find(flags_->benchmark_data_type_) == data_type_map_.end()) { | |||||
| MS_LOG(ERROR) << "CalibDataType not supported: " << flags_->benchmark_data_type_.c_str(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| msCalibDataType = dataTypeMap.at(_flags->calibDataType); | |||||
| MS_LOG(INFO) << "CalibDataType = " << _flags->calibDataType.c_str(); | |||||
| std::cout << "CalibDataType = " << _flags->calibDataType.c_str() << std::endl; | |||||
| msCalibDataType = data_type_map_.at(flags_->benchmark_data_type_); | |||||
| MS_LOG(INFO) << "CalibDataType = " << flags_->benchmark_data_type_.c_str(); | |||||
| std::cout << "CalibDataType = " << flags_->benchmark_data_type_.c_str() << std::endl; | |||||
| } | } | ||||
| if (_flags->modelPath.empty()) { | |||||
| if (flags_->model_file_.empty()) { | |||||
| MS_LOG(ERROR) << "modelPath is required"; | MS_LOG(ERROR) << "modelPath is required"; | ||||
| std::cerr << "modelPath is required" << std::endl; | std::cerr << "modelPath is required" << std::endl; | ||||
| return 1; | return 1; | ||||
| } | } | ||||
| _flags->InitInputDataList(); | |||||
| _flags->InitResizeDimsList(); | |||||
| if (!_flags->resizeDims.empty() && _flags->resizeDims.size() != _flags->input_data_list.size()) { | |||||
| flags_->InitInputDataList(); | |||||
| flags_->InitResizeDimsList(); | |||||
| if (!flags_->resize_dims_.empty() && flags_->resize_dims_.size() != flags_->input_data_list_.size()) { | |||||
| MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; | MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; | ||||
| std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; | std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (_flags->device != "CPU" && _flags->device != "GPU") { | |||||
| MS_LOG(ERROR) << "Device type:" << _flags->device << " is not supported."; | |||||
| std::cerr << "Device type:" << _flags->device << " is not supported." << std::endl; | |||||
| if (flags_->device_ != "CPU" && flags_->device_ != "GPU") { | |||||
| MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported."; | |||||
| std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (_flags->runTimeProfiler) { | |||||
| if (flags_->time_profiling_) { | |||||
| auto status = InitCallbackParameter(); | auto status = InitCallbackParameter(); | ||||
| if (status != RET_OK) { | if (status != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init callback Parameter failed."; | MS_LOG(ERROR) << "Init callback Parameter failed."; | ||||
| @@ -627,7 +619,7 @@ int Benchmark::PrintResult(const std::vector<std::string> &title, | |||||
| } | } | ||||
| columns.push_back(iter.first); | columns.push_back(iter.first); | ||||
| len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / _flags->loopCount); | |||||
| len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / flags_->loop_count_); | |||||
| if (len > columnLenMax.at(1)) { | if (len > columnLenMax.at(1)) { | ||||
| columnLenMax.at(1) = len + 4; | columnLenMax.at(1) = len + 4; | ||||
| } | } | ||||
| @@ -676,10 +668,11 @@ int Benchmark::PrintResult(const std::vector<std::string> &title, | |||||
| } | } | ||||
| Benchmark::~Benchmark() { | Benchmark::~Benchmark() { | ||||
| for (auto iter : this->calibData) { | |||||
| for (auto iter : this->benchmark_data_) { | |||||
| delete (iter.second); | delete (iter.second); | ||||
| } | } | ||||
| this->calibData.clear(); | |||||
| this->benchmark_data_.clear(); | |||||
| delete (session_); | |||||
| } | } | ||||
| int RunBenchmark(int argc, const char **argv) { | int RunBenchmark(int argc, const char **argv) { | ||||
| @@ -697,26 +690,27 @@ int RunBenchmark(int argc, const char **argv) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| Benchmark mBenchmark(&flags); | |||||
| auto status = mBenchmark.Init(); | |||||
| Benchmark benchmark(&flags); | |||||
| auto status = benchmark.Init(); | |||||
| if (status != 0) { | if (status != 0) { | ||||
| MS_LOG(ERROR) << "Benchmark init Error : " << status; | MS_LOG(ERROR) << "Benchmark init Error : " << status; | ||||
| std::cerr << "Benchmark init Error : " << status << std::endl; | std::cerr << "Benchmark init Error : " << status << std::endl; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| status = mBenchmark.RunBenchmark(); | |||||
| status = benchmark.RunBenchmark(); | |||||
| if (status != 0) { | if (status != 0) { | ||||
| MS_LOG(ERROR) << "Run Benchmark " << flags.modelPath.substr(flags.modelPath.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| MS_LOG(ERROR) << "Run Benchmark " | |||||
| << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| << " Failed : " << status; | << " Failed : " << status; | ||||
| std::cerr << "Run Benchmark " << flags.modelPath.substr(flags.modelPath.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| std::cerr << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| << " Failed : " << status << std::endl; | << " Failed : " << status << std::endl; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Run Benchmark " << flags.modelPath.substr(flags.modelPath.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| MS_LOG(INFO) << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| << " Success."; | << " Success."; | ||||
| std::cout << "Run Benchmark " << flags.modelPath.substr(flags.modelPath.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| std::cout << "Run Benchmark " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() | |||||
| << " Success." << std::endl; | << " Success." << std::endl; | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -54,22 +54,22 @@ class MS_API BenchmarkFlags : public virtual FlagParser { | |||||
| public: | public: | ||||
| BenchmarkFlags() { | BenchmarkFlags() { | ||||
| // common | // common | ||||
| AddFlag(&BenchmarkFlags::modelPath, "modelPath", "Input model path", ""); | |||||
| AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", ""); | |||||
| AddFlag(&BenchmarkFlags::device, "device", "CPU | GPU", "CPU"); | |||||
| AddFlag(&BenchmarkFlags::cpuBindMode, "cpuBindMode", | |||||
| "Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1); | |||||
| AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", ""); | |||||
| AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); | |||||
| AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU", "CPU"); | |||||
| AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", | |||||
| "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, defalut value: 1", 1); | |||||
| // MarkPerformance | // MarkPerformance | ||||
| AddFlag(&BenchmarkFlags::loopCount, "loopCount", "Run loop count", 10); | |||||
| AddFlag(&BenchmarkFlags::numThreads, "numThreads", "Run threads number", 2); | |||||
| AddFlag(&BenchmarkFlags::fp16Priority, "fp16Priority", "Priority float16", false); | |||||
| AddFlag(&BenchmarkFlags::warmUpLoopCount, "warmUpLoopCount", "Run warm up loop", 3); | |||||
| AddFlag(&BenchmarkFlags::runTimeProfiler, "runTimeProfiler", "Run time profiler", false); | |||||
| AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); | |||||
| AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); | |||||
| AddFlag(&BenchmarkFlags::enable_fp16_, "enableFp16", "Enable float16", false); | |||||
| AddFlag(&BenchmarkFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 3); | |||||
| AddFlag(&BenchmarkFlags::time_profiling_, "timeProfiling", "Run time profiling", false); | |||||
| // MarkAccuracy | // MarkAccuracy | ||||
| AddFlag(&BenchmarkFlags::calibDataPath, "calibDataPath", "Calibration data file path", ""); | |||||
| AddFlag(&BenchmarkFlags::calibDataType, "calibDataType", "Calibration data type. FLOAT | INT32 | INT8 | UINT8", | |||||
| "FLOAT"); | |||||
| AddFlag(&BenchmarkFlags::accuracyThreshold, "accuracyThreshold", "Threshold of accuracy", 0.5); | |||||
| AddFlag(&BenchmarkFlags::benchmark_data_file_, "benchmarkDataFile", "Benchmark data file path", ""); | |||||
| AddFlag(&BenchmarkFlags::benchmark_data_type_, "benchmarkDataType", | |||||
| "Benchmark data type. FLOAT | INT32 | INT8 | UINT8", "FLOAT"); | |||||
| AddFlag(&BenchmarkFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); | |||||
| } | } | ||||
| ~BenchmarkFlags() override = default; | ~BenchmarkFlags() override = default; | ||||
| @@ -80,32 +80,32 @@ class MS_API BenchmarkFlags : public virtual FlagParser { | |||||
| public: | public: | ||||
| // common | // common | ||||
| std::string modelPath; | |||||
| std::string inDataPath; | |||||
| std::vector<std::string> input_data_list; | |||||
| InDataType inDataType; | |||||
| std::string inDataTypeIn = "bin"; | |||||
| int cpuBindMode = 1; | |||||
| std::string model_file_; | |||||
| std::string in_data_file_; | |||||
| std::vector<std::string> input_data_list_; | |||||
| InDataType in_data_type_; | |||||
| std::string in_data_type_in_ = "bin"; | |||||
| int cpu_bind_mode_ = 1; | |||||
| // MarkPerformance | // MarkPerformance | ||||
| int loopCount; | |||||
| int numThreads; | |||||
| bool fp16Priority; | |||||
| int warmUpLoopCount; | |||||
| bool runTimeProfiler; | |||||
| int loop_count_; | |||||
| int num_threads_; | |||||
| bool enable_fp16_; | |||||
| int warm_up_loop_count_; | |||||
| bool time_profiling_; | |||||
| // MarkAccuracy | // MarkAccuracy | ||||
| std::string calibDataPath; | |||||
| std::string calibDataType; | |||||
| float accuracyThreshold; | |||||
| std::string benchmark_data_file_; | |||||
| std::string benchmark_data_type_; | |||||
| float accuracy_threshold_; | |||||
| // Resize | // Resize | ||||
| std::string resizeDimsIn = ""; | |||||
| std::vector<std::vector<int64_t>> resizeDims; | |||||
| std::string resize_dims_in_ = ""; | |||||
| std::vector<std::vector<int64_t>> resize_dims_; | |||||
| std::string device; | |||||
| std::string device_; | |||||
| }; | }; | ||||
| class MS_API Benchmark { | class MS_API Benchmark { | ||||
| public: | public: | ||||
| explicit Benchmark(BenchmarkFlags *flags) : _flags(flags) {} | |||||
| explicit Benchmark(BenchmarkFlags *flags) : flags_(flags) {} | |||||
| virtual ~Benchmark(); | virtual ~Benchmark(); | ||||
| @@ -146,8 +146,8 @@ class MS_API Benchmark { | |||||
| // tensorData need to be converter first | // tensorData need to be converter first | ||||
| template <typename T> | template <typename T> | ||||
| float CompareData(const std::string &nodeName, std::vector<int> msShape, T *msTensorData) { | float CompareData(const std::string &nodeName, std::vector<int> msShape, T *msTensorData) { | ||||
| auto iter = this->calibData.find(nodeName); | |||||
| if (iter != this->calibData.end()) { | |||||
| auto iter = this->benchmark_data_.find(nodeName); | |||||
| if (iter != this->benchmark_data_.end()) { | |||||
| std::vector<size_t> castedMSShape; | std::vector<size_t> castedMSShape; | ||||
| size_t shapeSize = 1; | size_t shapeSize = 1; | ||||
| for (int64_t dim : msShape) { | for (int64_t dim : msShape) { | ||||
| @@ -224,15 +224,15 @@ class MS_API Benchmark { | |||||
| int MarkAccuracy(); | int MarkAccuracy(); | ||||
| private: | private: | ||||
| BenchmarkFlags *_flags; | |||||
| session::LiteSession *session; | |||||
| std::vector<mindspore::tensor::MSTensor *> msInputs; | |||||
| std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> msOutputs; | |||||
| std::unordered_map<std::string, CheckTensor *> calibData; | |||||
| std::unordered_map<std::string, TypeId> dataTypeMap{{"FLOAT", TypeId::kNumberTypeFloat}, | |||||
| {"INT8", TypeId::kNumberTypeInt8}, | |||||
| {"INT32", TypeId::kNumberTypeInt32}, | |||||
| {"UINT8", TypeId::kNumberTypeUInt8}}; | |||||
| BenchmarkFlags *flags_; | |||||
| session::LiteSession *session_; | |||||
| std::vector<mindspore::tensor::MSTensor *> ms_inputs_; | |||||
| std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> ms_outputs_; | |||||
| std::unordered_map<std::string, CheckTensor *> benchmark_data_; | |||||
| std::unordered_map<std::string, TypeId> data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat}, | |||||
| {"INT8", TypeId::kNumberTypeInt8}, | |||||
| {"INT32", TypeId::kNumberTypeInt32}, | |||||
| {"UINT8", TypeId::kNumberTypeUInt8}}; | |||||
| TypeId msCalibDataType = TypeId::kNumberTypeFloat; | TypeId msCalibDataType = TypeId::kNumberTypeFloat; | ||||
| // callback parameters | // callback parameters | ||||
| @@ -101,8 +101,8 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver | |||||
| ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR); | ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR); | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| this->mQuantizer = std::make_unique<quant::WeightQuantizer>( | |||||
| new_graph, config->quantSize, config->convWeightQuantChannelThreshold, config->bitNum); | |||||
| this->mQuantizer = std::make_unique<quant::WeightQuantizer>(new_graph, config->quantWeightSize, | |||||
| config->quantWeightChannel, config->bitNum); | |||||
| if (mQuantizer == nullptr) { | if (mQuantizer == nullptr) { | ||||
| MS_LOG(ERROR) << "New WeightQuantizer failed"; | MS_LOG(ERROR) << "New WeightQuantizer failed"; | ||||
| ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_MEMORY_FAILED); | ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_MEMORY_FAILED); | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MS_ANF_TRANSFORM_H | |||||
| #define MS_ANF_TRANSFORM_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_ANF_TRANSFORM_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_ANF_TRANSFORM_H | |||||
| #include <memory> | #include <memory> | ||||
| #include "schema/inner/model_generated.h" | #include "schema/inner/model_generated.h" | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MS_CONVERTER_H | |||||
| #define MS_CONVERTER_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONVERTER_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_CONVERTER_H | |||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef LITE_RETURN_CODE_H | |||||
| #define LITE_RETURN_CODE_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_RETURN_CODE_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_RETURN_CODE_H | |||||
| #include <string> | #include <string> | ||||
| #include <set> | #include <set> | ||||
| @@ -70,4 +70,4 @@ class NoSupportOp { | |||||
| }; | }; | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // LITE_RETURN_CODE_H | |||||
| #endif // MINDSPORE_LITE_TOOLS_CONVERTER_RETURN_CODE_H | |||||
| @@ -25,23 +25,20 @@ namespace converter { | |||||
| Flags::Flags() { | Flags::Flags() { | ||||
| AddFlag(&Flags::fmkIn, "fmk", "Input model framework type. TFLITE | CAFFE | MINDIR | ONNX", ""); | AddFlag(&Flags::fmkIn, "fmk", "Input model framework type. TFLITE | CAFFE | MINDIR | ONNX", ""); | ||||
| AddFlag(&Flags::modelFile, "modelFile", | AddFlag(&Flags::modelFile, "modelFile", | ||||
| "Input model file path. TFLITE: *.tflite | CAFFE: *.prototxt | MINDIR: *.mindir | ONNX: *.onnx", ""); | |||||
| "Input model file. TFLITE: *.tflite | CAFFE: *.prototxt | MINDIR: *.mindir | ONNX: *.onnx", ""); | |||||
| AddFlag(&Flags::outputFile, "outputFile", "Output model file path. Will add .ms automatically", ""); | AddFlag(&Flags::outputFile, "outputFile", "Output model file path. Will add .ms automatically", ""); | ||||
| AddFlag(&Flags::weightFile, "weightFile", | |||||
| "Input model weight file path. Needed when fmk is CAFFE. CAFFE: *.caffemodel", ""); | |||||
| AddFlag(&Flags::weightFile, "weightFile", "Input model weight file. Needed when fmk is CAFFE. CAFFE: *.caffemodel", | |||||
| ""); | |||||
| AddFlag(&Flags::inferenceTypeIn, "inferenceType", "Data type of input and output tensors. FLOAT | INT8 | UINT8", | AddFlag(&Flags::inferenceTypeIn, "inferenceType", "Data type of input and output tensors. FLOAT | INT8 | UINT8", | ||||
| "FLOAT"); | "FLOAT"); | ||||
| AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining | WeightQuant", ""); | AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining | WeightQuant", ""); | ||||
| AddFlag(&Flags::stdDev, "stdDev", "Standard deviation value for aware-quantization", "128"); | |||||
| AddFlag(&Flags::mean, "mean", "Mean value for aware-quantization", "-0.5"); | |||||
| AddFlag(&Flags::bitNum, "bitNum", "Weight quantization bitNum", "8"); | AddFlag(&Flags::bitNum, "bitNum", "Weight quantization bitNum", "8"); | ||||
| AddFlag(&Flags::quantSize, "quantSize", "Weight quantization size threshold", "0"); | |||||
| AddFlag(&Flags::convWeightQuantChannelThreshold, "convWeightQuantChannelThreshold", "convWeightQuantChannelThreshold", | |||||
| "16"); | |||||
| AddFlag(&Flags::configFile, "config_file", "Configuration for post-training.", ""); | |||||
| AddFlag(&Flags::quantWeightSize, "quantWeightSize", "Weight quantization size threshold", "0"); | |||||
| AddFlag(&Flags::quantWeightChannel, "quantWeightChannel", "Channel threshold for weight quantization", "16"); | |||||
| AddFlag(&Flags::configFile, "configFile", "Configuration for post-training.", ""); | |||||
| AddFlag(&Flags::trainModelIn, "trainModel", | AddFlag(&Flags::trainModelIn, "trainModel", | ||||
| "whether the model is going to be trained on device." | "whether the model is going to be trained on device." | ||||
| " true | false", | |||||
| "true | false", | |||||
| "false"); | "false"); | ||||
| } | } | ||||
| @@ -64,11 +61,11 @@ int Flags::Init(int argc, const char **argv) { | |||||
| } | } | ||||
| if (this->modelFile.empty()) { | if (this->modelFile.empty()) { | ||||
| std::cerr << "INPUT MISSING: model file path is necessary"; | std::cerr << "INPUT MISSING: model file path is necessary"; | ||||
| return RET_INPUT_PARAM_LACK; | |||||
| return RET_INPUT_PARAM_INVALID; | |||||
| } | } | ||||
| if (this->outputFile.empty()) { | if (this->outputFile.empty()) { | ||||
| std::cerr << "INPUT MISSING: output file path is necessary"; | std::cerr << "INPUT MISSING: output file path is necessary"; | ||||
| return RET_INPUT_PARAM_LACK; | |||||
| return RET_INPUT_PARAM_INVALID; | |||||
| } | } | ||||
| if (this->outputFile.rfind('/') == this->outputFile.length() - 1) { | if (this->outputFile.rfind('/') == this->outputFile.length() - 1) { | ||||
| @@ -78,7 +75,7 @@ int Flags::Init(int argc, const char **argv) { | |||||
| if (this->fmkIn.empty()) { | if (this->fmkIn.empty()) { | ||||
| std::cerr << "INPUT MISSING: fmk is necessary"; | std::cerr << "INPUT MISSING: fmk is necessary"; | ||||
| return RET_INPUT_PARAM_LACK; | |||||
| return RET_INPUT_PARAM_INVALID; | |||||
| } | } | ||||
| if (this->inferenceTypeIn == "FLOAT") { | if (this->inferenceTypeIn == "FLOAT") { | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef CONVERTER_FLAGS_H | |||||
| #define CONVERTER_FLAGS_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONVERTER_FLAGS_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_CONVERTER_FLAGS_H | |||||
| #include <string> | #include <string> | ||||
| #include "tools/common/flag_parser.h" | #include "tools/common/flag_parser.h" | ||||
| @@ -40,9 +40,6 @@ class Flags : public virtual mindspore::lite::FlagParser { | |||||
| int Init(int argc, const char **argv); | int Init(int argc, const char **argv); | ||||
| private: | |||||
| bool ValidateString(std::string pattern, std::string input); | |||||
| public: | public: | ||||
| std::string modelFile; | std::string modelFile; | ||||
| std::string outputFile; | std::string outputFile; | ||||
| @@ -58,13 +55,11 @@ class Flags : public virtual mindspore::lite::FlagParser { | |||||
| std::string inferenceTypeIn; | std::string inferenceTypeIn; | ||||
| // used for parse aware trainning | // used for parse aware trainning | ||||
| TypeId inferenceType = TypeId::kNumberTypeFloat; | TypeId inferenceType = TypeId::kNumberTypeFloat; | ||||
| std::string stdDev; | |||||
| std::string mean; | |||||
| // used for post-trainning-weight | // used for post-trainning-weight | ||||
| std::string quantSize; | |||||
| std::string quantWeightSize; | |||||
| std::string bitNum; | std::string bitNum; | ||||
| std::string configFile; | std::string configFile; | ||||
| std::string convWeightQuantChannelThreshold; | |||||
| std::string quantWeightChannel; | |||||
| std::string trainModelIn; | std::string trainModelIn; | ||||
| bool trainModel = false; | bool trainModel = false; | ||||
| }; | }; | ||||
| @@ -48,8 +48,7 @@ void GraphDefTransform::CreateQuantizer(const converter::Flags *flags) { | |||||
| switch (type) { | switch (type) { | ||||
| case QuantType::QuantType_AwareTraining: { | case QuantType::QuantType_AwareTraining: { | ||||
| MS_LOG(INFO) << "create AwareTrainingQuantizer!"; | MS_LOG(INFO) << "create AwareTrainingQuantizer!"; | ||||
| fbQuantizer = | |||||
| std::make_unique<quant::AwareQuantizer>(graphDefT, flags->inferenceType, flags->stdDev, flags->mean); | |||||
| fbQuantizer = std::make_unique<quant::AwareQuantizer>(graphDefT, flags->inferenceType); | |||||
| break; | break; | ||||
| } | } | ||||
| default: | default: | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MS_GRAPHDEF_TRANSFORM_H | |||||
| #define MS_GRAPHDEF_TRANSFORM_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_GRAPHDEF_TRANSFORM_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_GRAPHDEF_TRANSFORM_H | |||||
| #include <memory> | #include <memory> | ||||
| #include "tools/converter/optimizer.h" | #include "tools/converter/optimizer.h" | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MS_MODEL_PARSER_H | |||||
| #define MS_MODEL_PARSER_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_MODEL_PARSER_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_MODEL_PARSER_H | |||||
| #include <google/protobuf/message.h> | #include <google/protobuf/message.h> | ||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MS_OPTIMIZER_H | |||||
| #define MS_OPTIMIZER_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_OPTIMIZER_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_OPTIMIZER_H | |||||
| #include <vector> | #include <vector> | ||||
| #include "schema/inner/model_generated.h" | #include "schema/inner/model_generated.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| @@ -40,49 +40,7 @@ const std::array<schema::PrimitiveType, 7> AwareQuantizer::propagatedOps = { | |||||
| schema::PrimitiveType_Squeeze, schema::PrimitiveType_RealDiv, schema::PrimitiveType_Activation, | schema::PrimitiveType_Squeeze, schema::PrimitiveType_RealDiv, schema::PrimitiveType_Activation, | ||||
| schema::PrimitiveType_DetectionPostProcess}}; | schema::PrimitiveType_DetectionPostProcess}}; | ||||
| STATUS InputArray::InitQuantParam() { | |||||
| this->quantParam = std::make_unique<schema::QuantParamT>(); | |||||
| auto status = CalQuantizationParams(quantParam.get(), mMin, mMax, narrowRange, numBits); | |||||
| if (status != RET_OK) { | |||||
| return status; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| STATUS InputArray::SetInputArrayQP(schema::MetaGraphT *graph, size_t inputTensorIdx) { | |||||
| MS_ASSERT(graph != nullptr); | |||||
| auto &tensor = graph->allTensors.at(inputTensorIdx); | |||||
| MS_ASSERT(tensor != nullptr); | |||||
| if (!tensor->quantParams.empty()) { | |||||
| auto param = GetTensorQuantParam(tensor); | |||||
| if (param != nullptr && param->inited) { | |||||
| MS_LOG(DEBUG) << "tensor " << inputTensorIdx << " already has quantParam"; | |||||
| return RET_OK; | |||||
| } | |||||
| tensor->quantParams.clear(); | |||||
| } | |||||
| std::unique_ptr<schema::QuantParamT> tmpQuantParam(new QuantParamT()); | |||||
| tmpQuantParam->inited = this->quantParam->inited; | |||||
| tmpQuantParam->scale = this->quantParam->scale; | |||||
| tmpQuantParam->zeroPoint = this->quantParam->zeroPoint; | |||||
| tmpQuantParam->min = this->quantParam->min; | |||||
| tmpQuantParam->max = this->quantParam->max; | |||||
| tensor->quantParams.push_back(std::move(tmpQuantParam)); | |||||
| return RET_OK; | |||||
| } | |||||
| AwareQuantizer::AwareQuantizer(schema::MetaGraphT *graph, const TypeId &inferType, const string &stdValues, | |||||
| const string &meanValues) | |||||
| : FbQuantizer(graph) { | |||||
| MS_ASSERT(graph != nullptr); | |||||
| string::size_type sz; | |||||
| const float stdValue = std::stof(stdValues, &sz); | |||||
| sz = 0; | |||||
| const float mean = std::stof(meanValues, &sz); | |||||
| mInputArray = new (std::nothrow) InputArray(mean, stdValue); | |||||
| mInputArray->dataType = inferType; | |||||
| mInputArray->InitQuantParam(); | |||||
| } | |||||
| AwareQuantizer::AwareQuantizer(schema::MetaGraphT *graph, const TypeId &inferType) : FbQuantizer(graph) {} | |||||
| STATUS AwareQuantizer::RemoveFakeQuant() { return RET_OK; } | STATUS AwareQuantizer::RemoveFakeQuant() { return RET_OK; } | ||||
| @@ -101,15 +59,6 @@ STATUS AwareQuantizer::GenerateDefaultQuantParam(const schema::MetaGraphT *subGr | |||||
| STATUS AwareQuantizer::SetAttrToConvolution(const schema::MetaGraphT *subGraph, schema::CNodeT *node) { return RET_OK; } | STATUS AwareQuantizer::SetAttrToConvolution(const schema::MetaGraphT *subGraph, schema::CNodeT *node) { return RET_OK; } | ||||
| STATUS AwareQuantizer::GenerateQuantParam() { | STATUS AwareQuantizer::GenerateQuantParam() { | ||||
| MS_ASSERT(graph->inputIndex.size() == 1); | |||||
| // set graphInputNode input | |||||
| for (auto graphInputIndex : graph->inputIndex) { | |||||
| auto status = mInputArray->SetInputArrayQP(graph, graphInputIndex); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(WARNING) << "SetInputArrayQP failed"; | |||||
| return status; | |||||
| } | |||||
| } | |||||
| auto *quantParamRegister = QuantParamCalcRegister::GetInstance(); | auto *quantParamRegister = QuantParamCalcRegister::GetInstance(); | ||||
| for (auto iter = graph->nodes.begin(); iter != graph->nodes.end(); iter++) { | for (auto iter = graph->nodes.begin(); iter != graph->nodes.end(); iter++) { | ||||
| @@ -379,6 +328,7 @@ STATUS AwareQuantizer::QuantConvWeight(const schema::MetaGraphT *subGraph, schem | |||||
| weightTensor->quantParams.emplace_back(weightQauntParam.release()); | weightTensor->quantParams.emplace_back(weightQauntParam.release()); | ||||
| } | } | ||||
| weightTensor->data.resize(wShapeSize * sizeof(uint8_t)); | |||||
| ::memcpy(weightTensor->data.data(), qDatas.data(), wShapeSize); | ::memcpy(weightTensor->data.data(), qDatas.data(), wShapeSize); | ||||
| weightTensor->dataType = TypeId::kNumberTypeInt8; | weightTensor->dataType = TypeId::kNumberTypeInt8; | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MS_AWARE_QUANTIZER_H | |||||
| #define MS_AWARE_QUANTIZER_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_AWARE_QUANTIZER_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_AWARE_QUANTIZER_H | |||||
| #include <array> | #include <array> | ||||
| #include <string> | #include <string> | ||||
| @@ -26,32 +26,11 @@ | |||||
| #include "tools/converter/quantizer/quantize_util.h" | #include "tools/converter/quantizer/quantize_util.h" | ||||
| namespace mindspore::lite::quant { | namespace mindspore::lite::quant { | ||||
| struct InputArray { | |||||
| std::unique_ptr<schema::QuantParamT> quantParam; | |||||
| float mMin = 0.0f; | |||||
| float mMax = 0.0f; | |||||
| bool narrowRange = false; | |||||
| int numBits = 8; | |||||
| TypeId dataType = TypeId::kTypeUnknown; | |||||
| InputArray(float mean, float stdDev, TypeId dataType = TypeId::kNumberTypeFloat) { | |||||
| this->dataType = dataType; | |||||
| constexpr float qmin = -128; | |||||
| constexpr float qmax = 127; | |||||
| mMin = (qmin - mean) / stdDev; | |||||
| mMax = (qmax - mean) / stdDev; | |||||
| } | |||||
| STATUS InitQuantParam(); | |||||
| STATUS SetInputArrayQP(schema::MetaGraphT *graph, size_t inputTensorIdx); | |||||
| }; | |||||
| class AwareQuantizer : public FbQuantizer { | class AwareQuantizer : public FbQuantizer { | ||||
| public: | public: | ||||
| AwareQuantizer(schema::MetaGraphT *graph, const TypeId &inferType, const std::string &stdValues, | |||||
| const std::string &meanValues); | |||||
| AwareQuantizer(schema::MetaGraphT *graph, const TypeId &inferType); | |||||
| ~AwareQuantizer() { delete (mInputArray); } | |||||
| ~AwareQuantizer() override = default; | |||||
| STATUS RemoveFakeQuant() override; | STATUS RemoveFakeQuant() override; | ||||
| @@ -77,8 +56,6 @@ class AwareQuantizer : public FbQuantizer { | |||||
| float inputScale = 0.0f; | float inputScale = 0.0f; | ||||
| InputArray *mInputArray; | |||||
| static const std::array<schema::PrimitiveType, 7> propagatedOps; | static const std::array<schema::PrimitiveType, 7> propagatedOps; | ||||
| }; | }; | ||||
| } // namespace mindspore::lite::quant | } // namespace mindspore::lite::quant | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef CALC_QUANT_PARAM_H | |||||
| #define CALC_QUANT_PARAM_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_CALC_QUANT_PARAM_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_CALC_QUANT_PARAM_H | |||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <memory> | #include <memory> | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_GENERAL_BITPACKING_H | |||||
| #define MINDSPORE_GENERAL_BITPACKING_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER__GENERAL_BITPACKING_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER__GENERAL_BITPACKING_H | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include <stack> | #include <stack> | ||||
| #include <queue> | #include <queue> | ||||
| @@ -861,14 +861,14 @@ STATUS PostTrainingQuantizer::DoInference() { | |||||
| [&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | [&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | ||||
| const mindspore::session::CallBackParam &callParam) -> bool { | const mindspore::session::CallBackParam &callParam) -> bool { | ||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.node_name, beforeInputs) != RET_OK) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| auto tensor = beforeInputs[0]; | auto tensor = beforeInputs[0]; | ||||
| const float *tData = static_cast<const float *>(tensor->MutableData()); | const float *tData = static_cast<const float *>(tensor->MutableData()); | ||||
| size_t elem_count = tensor->ElementsNum(); | size_t elem_count = tensor->ElementsNum(); | ||||
| vector<float> data(tData, tData + elem_count); | vector<float> data(tData, tData + elem_count); | ||||
| this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetInputDivergInfo()); | |||||
| this->calibrator_->RecordMaxValue(callParam.node_name, data, this->calibrator_->GetInputDivergInfo()); | |||||
| return true; | return true; | ||||
| }; | }; | ||||
| // func | // func | ||||
| @@ -876,14 +876,14 @@ STATUS PostTrainingQuantizer::DoInference() { | |||||
| const std::vector<mindspore::tensor::MSTensor *> &afterInputs, | const std::vector<mindspore::tensor::MSTensor *> &afterInputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &afterOutputs, | const std::vector<mindspore::tensor::MSTensor *> &afterOutputs, | ||||
| const mindspore::session::CallBackParam &callParam) -> bool { | const mindspore::session::CallBackParam &callParam) -> bool { | ||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.name_callback_param, afterOutputs) != RET_OK) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.node_name, afterOutputs) != RET_OK) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| auto tensor = afterOutputs[0]; | auto tensor = afterOutputs[0]; | ||||
| const float *tensor_data = static_cast<const float *>(tensor->MutableData()); | const float *tensor_data = static_cast<const float *>(tensor->MutableData()); | ||||
| size_t elem_count = tensor->ElementsNum(); | size_t elem_count = tensor->ElementsNum(); | ||||
| vector<float> data(tensor_data, tensor_data + elem_count); | vector<float> data(tensor_data, tensor_data + elem_count); | ||||
| this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetOutputDivergInfo()); | |||||
| this->calibrator_->RecordMaxValue(callParam.node_name, data, this->calibrator_->GetOutputDivergInfo()); | |||||
| return true; | return true; | ||||
| }; | }; | ||||
| status = fp32_session_->RunGraph(beforeCallBack, afterCallBack); | status = fp32_session_->RunGraph(beforeCallBack, afterCallBack); | ||||
| @@ -918,9 +918,9 @@ STATUS PostTrainingQuantizer::Int8Inference() { | |||||
| [this](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | [this](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | ||||
| const mindspore::session::CallBackParam &callParam) -> bool { | const mindspore::session::CallBackParam &callParam) -> bool { | ||||
| if (callParam.type_callback_param == kTypeConv2D || callParam.type_callback_param == kTypeDepthwiseConv2D) { | |||||
| if (callParam.node_type == kTypeConv2D || callParam.node_type == kTypeDepthwiseConv2D) { | |||||
| vector<float> fp32_op_input; | vector<float> fp32_op_input; | ||||
| while (!OpInputDataHandle(FETCH, callParam.name_callback_param, &fp32_op_input)) { | |||||
| while (!OpInputDataHandle(FETCH, callParam.node_name, &fp32_op_input)) { | |||||
| std::this_thread::sleep_for(std::chrono::milliseconds(10)); | std::this_thread::sleep_for(std::chrono::milliseconds(10)); | ||||
| } | } | ||||
| auto tensor = beforeInputs[0]; | auto tensor = beforeInputs[0]; | ||||
| @@ -966,9 +966,9 @@ STATUS PostTrainingQuantizer::Int8Inference() { | |||||
| const std::vector<mindspore::tensor::MSTensor *> &afterInputs, | const std::vector<mindspore::tensor::MSTensor *> &afterInputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &afterOutputs, | const std::vector<mindspore::tensor::MSTensor *> &afterOutputs, | ||||
| const mindspore::session::CallBackParam &callParam) -> bool { | const mindspore::session::CallBackParam &callParam) -> bool { | ||||
| if (callParam.type_callback_param == kTypeConv2D || callParam.type_callback_param == kTypeDepthwiseConv2D) { | |||||
| if (callParam.node_type == kTypeConv2D || callParam.node_type == kTypeDepthwiseConv2D) { | |||||
| vector<float> fp32_op_output_ch_mean; | vector<float> fp32_op_output_ch_mean; | ||||
| while (!OpOutputChMeanDataHandle(FETCH, callParam.name_callback_param, &fp32_op_output_ch_mean)) { | |||||
| while (!OpOutputChMeanDataHandle(FETCH, callParam.node_name, &fp32_op_output_ch_mean)) { | |||||
| std::this_thread::sleep_for(std::chrono::milliseconds(10)); | std::this_thread::sleep_for(std::chrono::milliseconds(10)); | ||||
| } | } | ||||
| auto tensor = afterOutputs[0]; | auto tensor = afterOutputs[0]; | ||||
| @@ -1020,12 +1020,12 @@ STATUS PostTrainingQuantizer::Int8Inference() { | |||||
| std::transform(fp32_op_output_ch_mean.begin(), fp32_op_output_ch_mean.end(), dequant_op_output_ch_mean.begin(), | std::transform(fp32_op_output_ch_mean.begin(), fp32_op_output_ch_mean.end(), dequant_op_output_ch_mean.begin(), | ||||
| dequant_op_output_ch_mean.begin(), std::minus<>()); | dequant_op_output_ch_mean.begin(), std::minus<>()); | ||||
| if (op_bias_diff_map.find(callParam.name_callback_param) != op_bias_diff_map.end()) { | |||||
| auto &bias_diff = op_bias_diff_map[callParam.name_callback_param]; | |||||
| if (op_bias_diff_map.find(callParam.node_name) != op_bias_diff_map.end()) { | |||||
| auto &bias_diff = op_bias_diff_map[callParam.node_name]; | |||||
| std::transform(bias_diff.begin(), bias_diff.end(), dequant_op_output_ch_mean.begin(), bias_diff.begin(), | std::transform(bias_diff.begin(), bias_diff.end(), dequant_op_output_ch_mean.begin(), bias_diff.begin(), | ||||
| std::plus<>()); | std::plus<>()); | ||||
| } else { | } else { | ||||
| op_bias_diff_map[callParam.name_callback_param] = dequant_op_output_ch_mean; | |||||
| op_bias_diff_map[callParam.node_name] = dequant_op_output_ch_mean; | |||||
| } | } | ||||
| } | } | ||||
| return true; | return true; | ||||
| @@ -1060,8 +1060,8 @@ STATUS PostTrainingQuantizer::BiasCorrection(FuncGraphPtr func_graph) { | |||||
| [this](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | [this](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | ||||
| const mindspore::session::CallBackParam &callParam) -> bool { | const mindspore::session::CallBackParam &callParam) -> bool { | ||||
| if (callParam.type_callback_param == kTypeConv2D || callParam.type_callback_param == kTypeDepthwiseConv2D) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) { | |||||
| if (callParam.node_type == kTypeConv2D || callParam.node_type == kTypeDepthwiseConv2D) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.node_name, beforeInputs) != RET_OK) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| auto tensor = beforeInputs[0]; | auto tensor = beforeInputs[0]; | ||||
| @@ -1073,7 +1073,7 @@ STATUS PostTrainingQuantizer::BiasCorrection(FuncGraphPtr func_graph) { | |||||
| MS_LOG(ERROR) << "memcpy error: " << ret; | MS_LOG(ERROR) << "memcpy error: " << ret; | ||||
| return false; | return false; | ||||
| } | } | ||||
| while (!OpInputDataHandle(STORE, callParam.name_callback_param, &fp32_op_input)) { | |||||
| while (!OpInputDataHandle(STORE, callParam.node_name, &fp32_op_input)) { | |||||
| std::this_thread::sleep_for(std::chrono::milliseconds(10)); | std::this_thread::sleep_for(std::chrono::milliseconds(10)); | ||||
| } | } | ||||
| } | } | ||||
| @@ -1084,8 +1084,8 @@ STATUS PostTrainingQuantizer::BiasCorrection(FuncGraphPtr func_graph) { | |||||
| const std::vector<mindspore::tensor::MSTensor *> &afterInputs, | const std::vector<mindspore::tensor::MSTensor *> &afterInputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &afterOutputs, | const std::vector<mindspore::tensor::MSTensor *> &afterOutputs, | ||||
| const mindspore::session::CallBackParam &callParam) -> bool { | const mindspore::session::CallBackParam &callParam) -> bool { | ||||
| if (callParam.type_callback_param == kTypeConv2D || callParam.type_callback_param == kTypeDepthwiseConv2D) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.name_callback_param, afterOutputs) != RET_OK) { | |||||
| if (callParam.node_type == kTypeConv2D || callParam.node_type == kTypeDepthwiseConv2D) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.node_name, afterOutputs) != RET_OK) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| auto tensor = afterOutputs[0]; | auto tensor = afterOutputs[0]; | ||||
| @@ -1117,7 +1117,7 @@ STATUS PostTrainingQuantizer::BiasCorrection(FuncGraphPtr func_graph) { | |||||
| sum = sum / one_filter_size; | sum = sum / one_filter_size; | ||||
| fp32_op_output_ch_mean[i] = sum; | fp32_op_output_ch_mean[i] = sum; | ||||
| } | } | ||||
| while (!OpOutputChMeanDataHandle(STORE, callParam.name_callback_param, &fp32_op_output_ch_mean)) { | |||||
| while (!OpOutputChMeanDataHandle(STORE, callParam.node_name, &fp32_op_output_ch_mean)) { | |||||
| std::this_thread::sleep_for(std::chrono::milliseconds(10)); | std::this_thread::sleep_for(std::chrono::milliseconds(10)); | ||||
| } | } | ||||
| } | } | ||||
| @@ -1264,15 +1264,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() { | |||||
| [&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | [&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs, | ||||
| const mindspore::session::CallBackParam &callParam) { | const mindspore::session::CallBackParam &callParam) { | ||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(callParam.node_name, beforeInputs) != RET_OK) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| auto tensor = beforeInputs[0]; | auto tensor = beforeInputs[0]; | ||||
| const float *tensor_data = static_cast<const float *>(tensor->MutableData()); | const float *tensor_data = static_cast<const float *>(tensor->MutableData()); | ||||
| size_t shape_size = tensor->ElementsNum(); | size_t shape_size = tensor->ElementsNum(); | ||||
| vector<float> data(tensor_data, tensor_data + shape_size); | vector<float> data(tensor_data, tensor_data + shape_size); | ||||
| this->calibrator_->UpdateDataFrequency(callParam.name_callback_param, data, | |||||
| this->calibrator_->GetInputDivergInfo()); | |||||
| this->calibrator_->UpdateDataFrequency(callParam.node_name, data, this->calibrator_->GetInputDivergInfo()); | |||||
| return true; | return true; | ||||
| }; | }; | ||||
| @@ -1280,15 +1279,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() { | |||||
| [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs, | [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs, | ||||
| const std::vector<mindspore::tensor::MSTensor *> &after_outputs, | const std::vector<mindspore::tensor::MSTensor *> &after_outputs, | ||||
| const mindspore::session::CallBackParam &call_param) { | const mindspore::session::CallBackParam &call_param) { | ||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(call_param.name_callback_param, after_outputs) != RET_OK) { | |||||
| if (PostTrainingQuantizer::CheckFp32TensorVec(call_param.node_name, after_outputs) != RET_OK) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| auto tensor = after_outputs[0]; | auto tensor = after_outputs[0]; | ||||
| const float *tenosr_data = static_cast<const float *>(tensor->MutableData()); | const float *tenosr_data = static_cast<const float *>(tensor->MutableData()); | ||||
| size_t shape_size = tensor->ElementsNum(); | size_t shape_size = tensor->ElementsNum(); | ||||
| vector<float> data(tenosr_data, tenosr_data + shape_size); | vector<float> data(tenosr_data, tenosr_data + shape_size); | ||||
| this->calibrator_->UpdateDataFrequency(call_param.name_callback_param, data, | |||||
| this->calibrator_->GetOutputDivergInfo()); | |||||
| this->calibrator_->UpdateDataFrequency(call_param.node_name, data, this->calibrator_->GetOutputDivergInfo()); | |||||
| return true; | return true; | ||||
| }; | }; | ||||
| status = fp32_session_->RunGraph(beforeCallBack, afterCallBack); | status = fp32_session_->RunGraph(beforeCallBack, afterCallBack); | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef POSTRAINING_QUANTIZER_H | |||||
| #define POSTRAINING_QUANTIZER_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_POSTRAINING_QUANTIZER_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_POSTRAINING_QUANTIZER_H | |||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| @@ -221,4 +221,4 @@ class Calibrator { | |||||
| } // namespace quant | } // namespace quant | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // POSTRAINING_QUANTIZER_H | |||||
| #endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_POSTRAINING_QUANTIZER_H | |||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef LITE_QUANT_CAST_H | |||||
| #define LITE_QUANT_CAST_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER__QUANT_CAST_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER__QUANT_CAST_H | |||||
| #include "mindspore/core/ir/anf.h" | #include "mindspore/core/ir/anf.h" | ||||
| #include "mindspore/lite/include/errorcode.h" | #include "mindspore/lite/include/errorcode.h" | ||||
| @@ -36,4 +36,4 @@ class QuantCast { | |||||
| } // namespace mindspore::lite::quant | } // namespace mindspore::lite::quant | ||||
| #endif // LITE_QUANT_CAST_H | |||||
| #endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER__QUANT_CAST_H | |||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef QUANTIZER_UTIL_H | |||||
| #define QUANTIZER_UTIL_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_UTIL_H | |||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MS_QUANTIZER_H | |||||
| #define MS_QUANTIZER_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZER_H | |||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <utility> | #include <utility> | ||||
| @@ -63,7 +63,7 @@ class FbQuantizer { | |||||
| public: | public: | ||||
| explicit FbQuantizer(schema::MetaGraphT *graph) : graph(graph) {} | explicit FbQuantizer(schema::MetaGraphT *graph) : graph(graph) {} | ||||
| ~FbQuantizer() = default; | |||||
| virtual ~FbQuantizer() = default; | |||||
| virtual STATUS RemoveFakeQuant(); | virtual STATUS RemoveFakeQuant(); | ||||
| @@ -40,11 +40,11 @@ bool WeightQuantizer::IsPosNum(const std::string &str) { | |||||
| } | } | ||||
| STATUS WeightQuantizer::WeightQuantInputCheck(const converter::Flags *config) { | STATUS WeightQuantizer::WeightQuantInputCheck(const converter::Flags *config) { | ||||
| MS_ASSERT(config != nullptr); | MS_ASSERT(config != nullptr); | ||||
| if (!WeightQuantizer::IsPosNum(config->convWeightQuantChannelThreshold)) { | |||||
| if (!WeightQuantizer::IsPosNum(config->quantWeightChannel)) { | |||||
| MS_LOG(ERROR) << "convWeightQuantChannelThreshold must be valid pos num."; | MS_LOG(ERROR) << "convWeightQuantChannelThreshold must be valid pos num."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (!WeightQuantizer::IsPosNum(config->quantSize)) { | |||||
| if (!WeightQuantizer::IsPosNum(config->quantWeightSize)) { | |||||
| MS_LOG(ERROR) << "quantSize must be valid pos num."; | MS_LOG(ERROR) << "quantSize must be valid pos num."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -14,8 +14,8 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef WEIGHT_QUANTIZER_H | |||||
| #define WEIGHT_QUANTIZER_H | |||||
| #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H | |||||
| #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_WEIGHT_QUANTIZER_H | |||||
| #include <memory> | #include <memory> | ||||
| #include <list> | #include <list> | ||||
| @@ -45,6 +45,7 @@ class WeightQuantizer : public Quantizer { | |||||
| static bool IsPosNum(const std::string &str); | static bool IsPosNum(const std::string &str); | ||||
| int quant_max{INT8_MAX}; | int quant_max{INT8_MAX}; | ||||
| int quant_min{INT8_MIN}; | int quant_min{INT8_MIN}; | ||||
| private: | private: | ||||
| std::unique_ptr<QuantStrategy> mStrategy; | std::unique_ptr<QuantStrategy> mStrategy; | ||||
| size_t bitNum; | size_t bitNum; | ||||
| @@ -1,26 +0,0 @@ | |||||
| # add shared link library | |||||
| set(COMMON_SRC | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc | |||||
| ) | |||||
| add_executable(timeprofiler | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/main.cc | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/time_profiler.cc | |||||
| ${COMMON_SRC}) | |||||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||||
| target_link_libraries(timeprofiler mindspore-lite) | |||||
| else() | |||||
| target_link_libraries(timeprofiler mindspore-lite pthread) | |||||
| endif() | |||||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||||
| install(TARGETS timeprofiler | |||||
| RUNTIME DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/time_profiler COMPONENT ${COMPONENT_NAME}) | |||||
| else() | |||||
| install(TARGETS timeprofiler | |||||
| RUNTIME DESTINATION ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/time_profiler COMPONENT ${RUN_X86_COMPONENT_NAME}) | |||||
| endif() | |||||
| @@ -1,19 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "tools/time_profiler/time_profiler.h" | |||||
| int main(int argc, const char **argv) { return mindspore::lite::RunTimeProfiler(argc, argv); } | |||||
| @@ -1,415 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "tools/time_profiler/time_profiler.h" | |||||
| #define __STDC_FORMAT_MACROS | |||||
| #include <cinttypes> | |||||
| #undef __STDC_FORMAT_MACROS | |||||
| #include <cmath> | |||||
| #include <algorithm> | |||||
| #include <utility> | |||||
| #include "include/ms_tensor.h" | |||||
| #include "src/common/log_adapter.h" | |||||
| #include "include/context.h" | |||||
| namespace mindspore { | |||||
| namespace lite { | |||||
| int TimeProfiler::GenerateRandomData(size_t size, void *data) { | |||||
| MS_ASSERT(data != nullptr); | |||||
| char *castedData = static_cast<char *>(data); | |||||
| for (size_t i = 0; i < size; i++) { | |||||
| castedData[i] = static_cast<char>(i); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::GenerateInputData() { | |||||
| for (auto tensor : ms_inputs_) { | |||||
| MS_ASSERT(tensor != nullptr); | |||||
| auto input_data = tensor->MutableData(); | |||||
| if (input_data == nullptr) { | |||||
| MS_LOG(ERROR) << "MallocData for inTensor failed"; | |||||
| std::cerr << "MallocData for inTensor failed" << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| MS_ASSERT(tensor->GetData() != nullptr); | |||||
| auto tensor_byte_size = tensor->Size(); | |||||
| auto status = GenerateRandomData(tensor_byte_size, input_data); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Generate RandomData for inTensor failed " << status; | |||||
| std::cerr << "Generate RandomData for inTensor failed " << status << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::ReadInputFile() { | |||||
| if (ms_inputs_.empty()) { | |||||
| return RET_OK; | |||||
| } | |||||
| auto inTensor = ms_inputs_.at(0); | |||||
| MS_ASSERT(inTensor != nullptr); | |||||
| size_t size; | |||||
| char *bin_buf = ReadFile(_flags->in_data_path_.c_str(), &size); | |||||
| if (bin_buf == nullptr) { | |||||
| MS_LOG(ERROR) << "Read input data failed."; | |||||
| std::cerr << "Read input data failed." << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto tensor_data_size = inTensor->Size(); | |||||
| if (size != tensor_data_size) { | |||||
| MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << " in fact: " << size; | |||||
| std::cerr << "Input binary file size error, required: " << tensor_data_size << " in fact: " << size << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto input_data = inTensor->MutableData(); | |||||
| memcpy(input_data, bin_buf, tensor_data_size); | |||||
| delete bin_buf; | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::LoadInput() { | |||||
| ms_inputs_ = session_->GetInputs(); | |||||
| if (_flags->in_data_path_.empty()) { | |||||
| auto status = GenerateInputData(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Generate input data error " << status; | |||||
| std::cerr << "Generate input data error " << status << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } else { | |||||
| auto status = ReadInputFile(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "ReadInputFile error " << status; | |||||
| std::cerr << "ReadInputFile error " << status << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::InitSession() { | |||||
| ctx = new (std::nothrow) lite::Context; | |||||
| if (ctx == nullptr) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| ctx->cpu_bind_mode_ = static_cast<CpuBindMode>(_flags->cpu_bind_mode_); | |||||
| ctx->device_type_ = lite::DT_CPU; | |||||
| ctx->thread_num_ = _flags->num_threads_; | |||||
| ctx->float16_priority = _flags->fp16_priority; | |||||
| session_ = session::LiteSession::CreateSession(ctx); | |||||
| if (session_ == nullptr) { | |||||
| MS_LOG(ERROR) << "New session failed while running."; | |||||
| std::cerr << "New session failed while running." << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::InitCallbackParameter() { | |||||
| // before callback | |||||
| before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs, | |||||
| const std::vector<mindspore::tensor::MSTensor *> &before_outputs, | |||||
| const session::CallBackParam &callParam) { | |||||
| if (before_inputs.empty()) { | |||||
| MS_LOG(INFO) << "The num of beforeInputs is empty"; | |||||
| } | |||||
| if (before_outputs.empty()) { | |||||
| MS_LOG(INFO) << "The num of beforeOutputs is empty"; | |||||
| } | |||||
| if (op_times_by_type_.find(callParam.type_callback_param) == op_times_by_type_.end()) { | |||||
| op_times_by_type_.insert(std::make_pair(callParam.type_callback_param, std::make_pair(0, 0.0f))); | |||||
| } | |||||
| if (op_times_by_name_.find(callParam.name_callback_param) == op_times_by_name_.end()) { | |||||
| op_times_by_name_.insert(std::make_pair(callParam.name_callback_param, std::make_pair(0, 0.0f))); | |||||
| } | |||||
| op_call_times_total_++; | |||||
| op_begin_ = GetTimeUs(); | |||||
| return true; | |||||
| }; | |||||
| // after callback | |||||
| after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs, | |||||
| const std::vector<mindspore::tensor::MSTensor *> &after_outputs, | |||||
| const session::CallBackParam &call_param) { | |||||
| uint64_t opEnd = GetTimeUs(); | |||||
| if (after_inputs.empty()) { | |||||
| MS_LOG(INFO) << "The num of after inputs is empty"; | |||||
| } | |||||
| if (after_outputs.empty()) { | |||||
| MS_LOG(INFO) << "The num of after outputs is empty"; | |||||
| } | |||||
| float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f; | |||||
| op_cost_total_ += cost; | |||||
| op_times_by_type_[call_param.type_callback_param].first++; | |||||
| op_times_by_type_[call_param.type_callback_param].second += cost; | |||||
| op_times_by_name_[call_param.name_callback_param].first++; | |||||
| op_times_by_name_[call_param.name_callback_param].second += cost; | |||||
| return true; | |||||
| }; | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::Init() { | |||||
| if (this->_flags == nullptr) { | |||||
| return 1; | |||||
| } | |||||
| MS_LOG(INFO) << "ModelPath = " << _flags->model_path_; | |||||
| MS_LOG(INFO) << "InDataPath = " << _flags->in_data_path_; | |||||
| MS_LOG(INFO) << "LoopCount = " << _flags->loop_count_; | |||||
| MS_LOG(INFO) << "NumThreads = " << _flags->num_threads_; | |||||
| MS_LOG(INFO) << "Fp16Priority = " << _flags->fp16_priority; | |||||
| if (_flags->num_threads_ < 1) { | |||||
| MS_LOG(ERROR) << "NumThreads: " << _flags->num_threads_ << " must greater than or equal 1"; | |||||
| std::cerr << "NumThreads: " << _flags->num_threads_ << " must greater than or equal 1" << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (_flags->loop_count_ < 1) { | |||||
| MS_LOG(ERROR) << "LoopCount: " << _flags->loop_count_ << " must greater than or equal 1"; | |||||
| std::cerr << "LoopCount: " << _flags->loop_count_ << " must greater than or equal 1" << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (_flags->cpu_bind_mode_ == CpuBindMode::MID_CPU) { | |||||
| MS_LOG(INFO) << "cpuBindMode = MID_CPU"; | |||||
| } else if (_flags->cpu_bind_mode_ == CpuBindMode::HIGHER_CPU) { | |||||
| MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU"; | |||||
| } else if (_flags->cpu_bind_mode_ == CpuBindMode::NO_BIND) { | |||||
| MS_LOG(INFO) << "cpuBindMode = NO_BIND"; | |||||
| } else { | |||||
| MS_LOG(ERROR) << "cpuBindMode Error"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (_flags->model_path_.empty()) { | |||||
| MS_LOG(ERROR) << "modelPath is required"; | |||||
| std::cerr << "modelPath is required" << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto status = InitSession(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init session failed."; | |||||
| std::cerr << "Init session failed." << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| status = this->LoadInput(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Load input failed."; | |||||
| std::cerr << "Load input failed." << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| status = InitCallbackParameter(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init callback Parameter failed."; | |||||
| std::cerr << "Init callback Parameter failed." << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::PrintResult(const std::vector<std::string> &title, | |||||
| const std::map<std::string, std::pair<int, float>> &result) { | |||||
| std::vector<size_t> columnLenMax(5); | |||||
| std::vector<std::vector<std::string>> rows; | |||||
| for (auto &iter : result) { | |||||
| char stringBuf[5][100] = {}; | |||||
| std::vector<std::string> columns; | |||||
| size_t len; | |||||
| len = iter.first.size(); | |||||
| if (len > columnLenMax.at(0)) { | |||||
| columnLenMax.at(0) = len + 4; | |||||
| } | |||||
| columns.push_back(iter.first); | |||||
| len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / _flags->loop_count_); | |||||
| if (len > columnLenMax.at(1)) { | |||||
| columnLenMax.at(1) = len + 4; | |||||
| } | |||||
| columns.emplace_back(stringBuf[1]); | |||||
| len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second / op_cost_total_); | |||||
| if (len > columnLenMax.at(2)) { | |||||
| columnLenMax.at(2) = len + 4; | |||||
| } | |||||
| columns.emplace_back(stringBuf[2]); | |||||
| len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%d", iter.second.first); | |||||
| if (len > columnLenMax.at(3)) { | |||||
| columnLenMax.at(3) = len + 4; | |||||
| } | |||||
| columns.emplace_back(stringBuf[3]); | |||||
| len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second); | |||||
| if (len > columnLenMax.at(4)) { | |||||
| columnLenMax.at(4) = len + 4; | |||||
| } | |||||
| columns.emplace_back(stringBuf[4]); | |||||
| rows.push_back(columns); | |||||
| } | |||||
| printf("-------------------------------------------------------------------------\n"); | |||||
| for (int i = 0; i < 5; i++) { | |||||
| auto printBuf = title[i]; | |||||
| if (printBuf.size() > columnLenMax.at(i)) { | |||||
| columnLenMax.at(i) = printBuf.size(); | |||||
| } | |||||
| printBuf.resize(columnLenMax.at(i), ' '); | |||||
| printf("%s\t", printBuf.c_str()); | |||||
| } | |||||
| printf("\n"); | |||||
| for (size_t i = 0; i < rows.size(); i++) { | |||||
| for (int j = 0; j < 5; j++) { | |||||
| auto printBuf = rows[i][j]; | |||||
| printBuf.resize(columnLenMax.at(j), ' '); | |||||
| printf("%s\t", printBuf.c_str()); | |||||
| } | |||||
| printf("\n"); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int TimeProfiler::RunTimeProfiler() { | |||||
| uint64_t time_avg = 0; | |||||
| // Load graph | |||||
| std::string modelName = _flags->model_path_.substr(_flags->model_path_.find_last_of("/") + 1); | |||||
| MS_LOG(INFO) << "start reading model file"; | |||||
| size_t size = 0; | |||||
| char *graphBuf = ReadFile(_flags->model_path_.c_str(), &size); | |||||
| if (graphBuf == nullptr) { | |||||
| MS_LOG(ERROR) << "Load graph failed while running " << modelName.c_str(); | |||||
| std::cerr << "Load graph failed while running " << modelName.c_str() << std::endl; | |||||
| delete graphBuf; | |||||
| delete session_; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto model = lite::Model::Import(graphBuf, size); | |||||
| delete graphBuf; | |||||
| if (model == nullptr) { | |||||
| MS_LOG(ERROR) << "Import model file failed while running " << modelName.c_str(); | |||||
| std::cerr << "Import model file failed while running " << modelName.c_str() << std::endl; | |||||
| delete session_; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto ret = session_->CompileGraph(model); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Compile graph failed."; | |||||
| std::cerr << "Compile graph failed." << std::endl; | |||||
| delete session_; | |||||
| delete model; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // load input | |||||
| MS_LOG(INFO) << "start generate input data"; | |||||
| auto status = LoadInput(); | |||||
| if (status != RET_OK) { | |||||
| MS_LOG(ERROR) << "Generate input data error"; | |||||
| std::cerr << "Generate input data error" << std::endl; | |||||
| delete session_; | |||||
| delete model; | |||||
| return status; | |||||
| } | |||||
| // run graph and test | |||||
| for (int i = 0; i < _flags->loop_count_; i++) { | |||||
| session_->BindThread(true); | |||||
| uint64_t run_begin = GetTimeUs(); | |||||
| ret = session_->RunGraph(before_call_back_, after_call_back_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run graph failed."; | |||||
| std::cerr << "Run graph failed." << std::endl; | |||||
| delete session_; | |||||
| delete model; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto outputs = session_->GetOutputs(); | |||||
| uint64_t run_end = GetTimeUs(); | |||||
| uint64_t time = run_end - run_begin; | |||||
| time_avg += time; | |||||
| session_->BindThread(false); | |||||
| outputs.clear(); | |||||
| } | |||||
| time_avg /= _flags->loop_count_; | |||||
| float runCost = static_cast<float>(time_avg) / 1000.0f; | |||||
| const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; | |||||
| const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; | |||||
| PrintResult(per_op_name, op_times_by_name_); | |||||
| PrintResult(per_op_type, op_times_by_type_); | |||||
| printf("\n total time: %5.5f ms, kernel cost: %5.5f ms \n\n", runCost, op_cost_total_ / _flags->loop_count_); | |||||
| printf("-------------------------------------------------------------------------\n"); | |||||
| delete model; | |||||
| delete session_; | |||||
| return ret; | |||||
| } | |||||
| int RunTimeProfiler(int argc, const char **argv) { | |||||
| TimeProfilerFlags flags; | |||||
| Option<std::string> err = flags.ParseFlags(argc, argv); | |||||
| if (err.IsSome()) { | |||||
| std::cerr << err.Get() << std::endl; | |||||
| std::cerr << flags.Usage() << std::endl; | |||||
| return -1; | |||||
| } | |||||
| if (flags.help) { | |||||
| std::cerr << flags.Usage() << std::endl; | |||||
| return 0; | |||||
| } | |||||
| TimeProfiler time_profiler(&flags); | |||||
| auto ret = time_profiler.Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init TimeProfile failed."; | |||||
| std::cerr << "Init TimeProfile failed." << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = time_profiler.RunTimeProfiler(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run TimeProfile failed."; | |||||
| std::cerr << "Run TimeProfile failed." << std::endl; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace lite | |||||
| } // namespace mindspore | |||||
| @@ -1,98 +0,0 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINNIE_TIMEPROFILE_TIMEPROFILE_H_ | |||||
| #define MINNIE_TIMEPROFILE_TIMEPROFILE_H_ | |||||
| #include <getopt.h> | |||||
| #include <signal.h> | |||||
| #include <iostream> | |||||
| #include <map> | |||||
| #include <string> | |||||
| #include <vector> | |||||
| #include <utility> | |||||
| #include "include/lite_session.h" | |||||
| #include "tools/common/flag_parser.h" | |||||
| #include "src/common/file_utils.h" | |||||
| #include "src/common/utils.h" | |||||
| #include "include/model.h" | |||||
| namespace mindspore { | |||||
| namespace lite { | |||||
| class MS_API TimeProfilerFlags : public virtual FlagParser { | |||||
| public: | |||||
| TimeProfilerFlags() { | |||||
| AddFlag(&TimeProfilerFlags::model_path_, "modelPath", "Input model path", ""); | |||||
| AddFlag(&TimeProfilerFlags::in_data_path_, "inDataPath", "Input data path, if not set, use random input", ""); | |||||
| AddFlag(&TimeProfilerFlags::cpu_bind_mode_, "cpuBindMode", | |||||
| "Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1); | |||||
| AddFlag(&TimeProfilerFlags::loop_count_, "loopCount", "Run loop count", 10); | |||||
| AddFlag(&TimeProfilerFlags::num_threads_, "numThreads", "Run threads number", 2); | |||||
| AddFlag(&TimeProfilerFlags::fp16_priority, "fp16Priority", "Run fp16 ops prior", false); | |||||
| } | |||||
| ~TimeProfilerFlags() override = default; | |||||
| public: | |||||
| std::string model_path_; | |||||
| std::string in_data_path_; | |||||
| int cpu_bind_mode_ = 1; | |||||
| int loop_count_; | |||||
| int num_threads_; | |||||
| bool fp16_priority; | |||||
| }; | |||||
| class MS_API TimeProfiler { | |||||
| public: | |||||
| explicit TimeProfiler(TimeProfilerFlags *flags) : _flags(flags) {} | |||||
| ~TimeProfiler() { | |||||
| if (ctx != nullptr) { | |||||
| delete ctx; | |||||
| } | |||||
| } | |||||
| int Init(); | |||||
| int RunTimeProfiler(); | |||||
| private: | |||||
| int GenerateRandomData(size_t size, void *data); | |||||
| int GenerateInputData(); | |||||
| int LoadInput(); | |||||
| int ReadInputFile(); | |||||
| int InitCallbackParameter(); | |||||
| int InitSession(); | |||||
| int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result); | |||||
| private: | |||||
| Context *ctx = nullptr; | |||||
| TimeProfilerFlags *_flags; | |||||
| std::vector<mindspore::tensor::MSTensor *> ms_inputs_; | |||||
| session::LiteSession *session_; | |||||
| // callback parameters | |||||
| uint64_t op_begin_ = 0; | |||||
| int op_call_times_total_ = 0; | |||||
| float op_cost_total_ = 0.0f; | |||||
| std::map<std::string, std::pair<int, float>> op_times_by_type_; | |||||
| std::map<std::string, std::pair<int, float>> op_times_by_name_; | |||||
| session::KernelCallBack before_call_back_; | |||||
| session::KernelCallBack after_call_back_; | |||||
| }; | |||||
| int MS_API RunTimeProfiler(int argc, const char **argv); | |||||
| } // namespace lite | |||||
| } // namespace mindspore | |||||
| #endif // MINNIE_TIMEPROFILE_TIMEPROFILE_H_ | |||||