| @@ -34,7 +34,7 @@ typedef enum { | |||||
| typedef enum { | typedef enum { | ||||
| DT_CPU, /**< CPU device type */ | DT_CPU, /**< CPU device type */ | ||||
| DT_GPU, /**< GPU device type */ | DT_GPU, /**< GPU device type */ | ||||
| DT_NPU /**< NPU device type, not supported yet */ | |||||
| DT_NPU /**< NPU device type */ | |||||
| } DeviceType; | } DeviceType; | ||||
| /// \brief CpuDeviceInfo defined for CPU's configuration information. | /// \brief CpuDeviceInfo defined for CPU's configuration information. | ||||
| @@ -88,10 +88,11 @@ int NPUExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector< | |||||
| if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) { | if (std::find(trans_tensors.begin(), trans_tensors.end(), out_tensors[i]) != trans_tensors.end()) { | ||||
| // Change data&tensor shape nc->nh | // Change data&tensor shape nc->nh | ||||
| PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, out_tensors[i]->Batch(), | |||||
| out_tensors[i]->Width() * out_tensors[i]->Height(), out_tensors[i]->Channel()); | |||||
| out_tensors[i]->set_shape({out_tensors[i]->shape()[0], out_tensors[i]->shape()[2], out_tensors[i]->shape()[3], | |||||
| out_tensors[i]->shape()[1]}); | |||||
| PackNCHWToNHWCFp32(npu_output_tensors_[i]->GetBuffer(), data, | |||||
| npu_output_tensors_[i]->GetTensorDimension().GetNumber(), | |||||
| npu_output_tensors_[i]->GetTensorDimension().GetWidth() * | |||||
| npu_output_tensors_[i]->GetTensorDimension().GetHeight(), | |||||
| npu_output_tensors_[i]->GetTensorDimension().GetChannel()); | |||||
| } else { | } else { | ||||
| memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | memcpy(data, npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize()); | ||||
| out_tensors[i]->ResetRefCount(); | out_tensors[i]->ResetRefCount(); | ||||
| @@ -47,7 +47,7 @@ bool NPUManager::CheckEMUIVersion() { | |||||
| int pos = emui_str.find('_'); | int pos = emui_str.find('_'); | ||||
| if (pos != std::string::npos) { | if (pos != std::string::npos) { | ||||
| auto version = emui_str.substr(pos + 1); | auto version = emui_str.substr(pos + 1); | ||||
| int ret = CompareVersion(version, "11.0.0"); | |||||
| int ret = CompareVersion(version, "10.0.0"); | |||||
| if (ret < 0) { | if (ret < 0) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -138,6 +138,7 @@ int NPUManager::AddModel(domi::ModelBufferData *model_buffer_data, const std::st | |||||
| index_++; | index_++; | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient() { | std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient() { | ||||
| auto client = std::make_shared<hiai::AiModelMngerClient>(); | auto client = std::make_shared<hiai::AiModelMngerClient>(); | ||||
| if (client == nullptr) { | if (client == nullptr) { | ||||
| @@ -151,6 +152,7 @@ std::shared_ptr<hiai::AiModelMngerClient> NPUManager::CreateAiModelMngerClient() | |||||
| } | } | ||||
| return client; | return client; | ||||
| } | } | ||||
| int NPUManager::LoadOMModel() { | int NPUManager::LoadOMModel() { | ||||
| std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc; | std::vector<std::shared_ptr<hiai::AiModelDescription>> models_desc; | ||||
| std::shared_ptr<hiai::AiModelMngerClient> client = nullptr; | std::shared_ptr<hiai::AiModelMngerClient> client = nullptr; | ||||
| @@ -78,7 +78,7 @@ domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() { | |||||
| } | } | ||||
| int SubGraphNpuKernel::Run() { | int SubGraphNpuKernel::Run() { | ||||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, out_kernels_, nodes_); | |||||
| return reinterpret_cast<lite::NPUExecutor *>(this->executor_)->Run(in_tensors_, out_tensors_, out_nodes_, nodes_); | |||||
| } | } | ||||
| int SubGraphNpuKernel::BuildNPUInputOp() { | int SubGraphNpuKernel::BuildNPUInputOp() { | ||||
| @@ -0,0 +1,3 @@ | |||||
| mobilenet_v1_1.0_224.tflite 3 | |||||
| squeezenet.tflite 3 | |||||
| inception_v3.tflite 3 | |||||
| @@ -1314,6 +1314,22 @@ function Run_arm64() { | |||||
| fi | fi | ||||
| done < ${models_mindspore_weightquant_config} | done < ${models_mindspore_weightquant_config} | ||||
| # Run npu converted models: | |||||
| while read line; do | |||||
| model_name=`echo ${mindspore_line_info}|awk -F ' ' '{print $1}'` | |||||
| accuracy_limit=`echo ${mindspore_line_info}|awk -F ' ' '{print $2}'` | |||||
| echo "mindspore run npu: ${model_name}, accuracy limit:${accuracy_limit}" >> "${run_arm64_log_file}" | |||||
| echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=NPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold='${accuracy_limit} >> "${run_arm64_log_file}" | |||||
| echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=NPU --modelFile='${model_name}'.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt | |||||
| adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}" | |||||
| if [ $? = 0 ]; then | |||||
| run_result='arm64_npu: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} | |||||
| else | |||||
| run_result='arm64_npu: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 | |||||
| fi | |||||
| done < ${models_npu_config} | |||||
| # Run converted models which has several inputs or does not need to be cared about the accuracy: | # Run converted models which has several inputs or does not need to be cared about the accuracy: | ||||
| while read line; do | while read line; do | ||||
| model_name=${line%%;*} | model_name=${line%%;*} | ||||
| @@ -1492,6 +1508,7 @@ models_gpu_fp16_config=${basepath}/models_gpu_fp16.cfg | |||||
| models_gpu_weightquant_config=${basepath}/models_gpu_weightquant.cfg | models_gpu_weightquant_config=${basepath}/models_gpu_weightquant.cfg | ||||
| models_mindspore_weightquant_config=${basepath}/models_mindspore_weightquant.cfg | models_mindspore_weightquant_config=${basepath}/models_mindspore_weightquant.cfg | ||||
| models_arm32_config=${basepath}/models_arm32.cfg | models_arm32_config=${basepath}/models_arm32.cfg | ||||
| models_npu_config=${basepath}/models_npu.cfg | |||||
| models_compatibility_config=${basepath}/models_compatibility.cfg | models_compatibility_config=${basepath}/models_compatibility.cfg | ||||
| models_only_for_process_config=${basepath}/models_with_several_inputs_or_without_outputs.cfg | models_only_for_process_config=${basepath}/models_with_several_inputs_or_without_outputs.cfg | ||||