| @@ -527,7 +527,7 @@ build_lite() | |||||
| { | { | ||||
| echo "start build mindspore lite project" | echo "start build mindspore lite project" | ||||
| if [[ "${ENABLE_GPU}" == "on" ]]; then | |||||
| if [ "${ENABLE_GPU}" == "on" ] || [ "${LITE_PLATFORM}" == "arm64" ]; then | |||||
| echo "start build opencl" | echo "start build opencl" | ||||
| build_opencl | build_opencl | ||||
| fi | fi | ||||
| @@ -554,7 +554,7 @@ build_lite() | |||||
| -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \ | -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \ | ||||
| -DANDROID_STL="c++_shared" -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \ | -DANDROID_STL="c++_shared" -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \ | ||||
| -DBUILD_DEVICE=on -DPLATFORM_ARM64=on -DBUILD_CONVERTER=off -DENABLE_NEON=on -DENABLE_FP16="off" \ | -DBUILD_DEVICE=on -DPLATFORM_ARM64=on -DBUILD_CONVERTER=off -DENABLE_NEON=on -DENABLE_FP16="off" \ | ||||
| -DSUPPORT_GPU=${ENABLE_GPU} -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} "${BASEPATH}/mindspore/lite" | |||||
| -DSUPPORT_GPU=on -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} "${BASEPATH}/mindspore/lite" | |||||
| elif [[ "${LITE_PLATFORM}" == "arm32" ]]; then | elif [[ "${LITE_PLATFORM}" == "arm32" ]]; then | ||||
| checkndk | checkndk | ||||
| cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \ | cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \ | ||||
| @@ -50,12 +50,12 @@ int SubGraphOpenCLKernel::UnInit() { | |||||
| } | } | ||||
| for (const auto tensor : inputs_) { | for (const auto tensor : inputs_) { | ||||
| if (tensor != nullptr) { | if (tensor != nullptr) { | ||||
| tensor->FreeData(allocator_); | |||||
| tensor->FreeData(); | |||||
| } | } | ||||
| } | } | ||||
| for (const auto tensor : outputs_) { | for (const auto tensor : outputs_) { | ||||
| if (tensor != nullptr) { | if (tensor != nullptr) { | ||||
| tensor->FreeData(allocator_); | |||||
| tensor->FreeData(); | |||||
| } | } | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -44,7 +44,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso | |||||
| auto &outputs = kernel->GetOutputs(); | auto &outputs = kernel->GetOutputs(); | ||||
| for (auto *output : outputs) { | for (auto *output : outputs) { | ||||
| MS_ASSERT(nullptr != output); | MS_ASSERT(nullptr != output); | ||||
| output->MallocData(allocator_); | |||||
| output->MallocData(); | |||||
| } | } | ||||
| kernel::CallBackParam callbackParam; | kernel::CallBackParam callbackParam; | ||||
| callbackParam.name_callback_aram = kernel->Name(); | callbackParam.name_callback_aram = kernel->Name(); | ||||
| @@ -67,7 +67,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso | |||||
| } | } | ||||
| for (auto input_kernel : kernel->GetInKernels()) { | for (auto input_kernel : kernel->GetInKernels()) { | ||||
| MS_EXCEPTION_IF_NULL(input_kernel); | MS_EXCEPTION_IF_NULL(input_kernel); | ||||
| ret = input_kernel->DecOutTensorRefCount(allocator_); | |||||
| ret = input_kernel->DecOutTensorRefCount(); | |||||
| if (0 != ret) { | if (0 != ret) { | ||||
| MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->Name() << " failed"; | MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->Name() << " failed"; | ||||
| } | } | ||||
| @@ -41,6 +41,14 @@ TEST_F(BenchmarkTest, TestOCR_02) { | |||||
| ASSERT_EQ(status, RET_OK); | ASSERT_EQ(status, RET_OK); | ||||
| } | } | ||||
| TEST_F(BenchmarkTest, TestOCR_02_GPU) { | |||||
| const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_cv_focusShootOCRMOdel_02.ms" | |||||
| "--inDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.bin" | |||||
| "--calibDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.txt"}; | |||||
| auto status = RunBenchmark(2, argv); | |||||
| ASSERT_EQ(status, RET_OK); | |||||
| } | |||||
| TEST_F(BenchmarkTest, TestHebing) { | TEST_F(BenchmarkTest, TestHebing) { | ||||
| const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms" | const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms" | ||||
| "--inDataPath=./hiai/model_hebing_3branch.bin" | "--inDataPath=./hiai/model_hebing_3branch.bin" | ||||
| @@ -52,7 +52,7 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) { | |||||
| auto *arith_kernel = new kernel::MatMulOpenCLKernel(nullptr, inputs, outputs, false); | auto *arith_kernel = new kernel::MatMulOpenCLKernel(nullptr, inputs, outputs, false); | ||||
| arith_kernel->Init(); | arith_kernel->Init(); | ||||
| std::vector<LiteKernel *> kernels{arith_kernel}; | |||||
| std::vector<kernel::LiteKernel *> kernels{arith_kernel}; | |||||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| @@ -51,7 +51,7 @@ TEST_F(TestSoftmaxOpenCL, SoftmaxFp32) { | |||||
| MS_LOG(INFO) << "create OpenCL Kernel"; | MS_LOG(INFO) << "create OpenCL Kernel"; | ||||
| auto *Softmax_kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | auto *Softmax_kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | ||||
| Softmax_kernel->Init(); | Softmax_kernel->Init(); | ||||
| std::vector<LiteKernel *> kernels{Softmax_kernel}; | |||||
| std::vector<kernel::LiteKernel *> kernels{Softmax_kernel}; | |||||
| MS_LOG(INFO) << "create SubGraphOpenCLKernel"; | MS_LOG(INFO) << "create SubGraphOpenCLKernel"; | ||||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | ||||
| @@ -351,6 +351,8 @@ int Benchmark::RunBenchmark(const std::string &deviceType) { | |||||
| auto context = new lite::Context; | auto context = new lite::Context; | ||||
| if (_flags->device == "CPU") { | if (_flags->device == "CPU") { | ||||
| context->device_ctx_.type = lite::DT_CPU; | context->device_ctx_.type = lite::DT_CPU; | ||||
| } else if (_flags->device == "GPU") { | |||||
| context->device_ctx_.type = lite::DT_GPU; | |||||
| } else { | } else { | ||||
| context->device_ctx_.type = lite::DT_NPU; | context->device_ctx_.type = lite::DT_NPU; | ||||
| } | } | ||||
| @@ -57,7 +57,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser { | |||||
| AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", ""); | AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", ""); | ||||
| AddFlag(&BenchmarkFlags::inDataTypeIn, "inDataType", "Input data type. img | bin", "bin"); | AddFlag(&BenchmarkFlags::inDataTypeIn, "inDataType", "Input data type. img | bin", "bin"); | ||||
| AddFlag(&BenchmarkFlags::omModelPath, "omModelPath", "OM model path, only required when device is NPU", ""); | AddFlag(&BenchmarkFlags::omModelPath, "omModelPath", "OM model path, only required when device is NPU", ""); | ||||
| AddFlag(&BenchmarkFlags::device, "device", "CPU | NPU", "CPU"); | |||||
| AddFlag(&BenchmarkFlags::device, "device", "CPU | NPU | GPU", "CPU"); | |||||
| AddFlag(&BenchmarkFlags::cpuBindMode, "cpuBindMode", | AddFlag(&BenchmarkFlags::cpuBindMode, "cpuBindMode", | ||||
| "Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1); | "Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1); | ||||
| // MarkPerformance | // MarkPerformance | ||||