diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index 49a99d1150..adde3fe982 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -44,6 +44,7 @@ int ActivationOpenClKernel::Init() { MS_LOG(ERROR) << "Activate fun only support dim=4, but your dim=" << in_tensors_[0]->shape().size(); return RET_ERROR; } + ori_format_ = out_tensors_[0]->GetFormat(); std::string program_name = ""; std::string kernel_name = ""; std::string source = activation_source_fp32; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index 6f7b9498ab..ffdc3b076e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -111,6 +111,7 @@ int ArithmeticOpenCLKernel::Init() { runtime_->LoadSource(program_name, source); runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); Image2dGetWorkGroupSize(); return 0; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index ecb8cb15da..7339b5bbae 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -56,6 +56,7 @@ int Conv2dTransposeOpenCLKernel::Init() { ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif PadWeight(); + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc index 726dc0ac36..f881b7c6f7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc @@ -40,6 +40,7 @@ int ConvolutionOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); this->InitBuffer(); + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 76445d0744..7194f2b775 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -43,6 +43,7 @@ int DepthwiseConv2dOpenCLKernel::Init() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::string kernel_name = "DepthwiseConv2d"; auto in_format = in_tensors_[0]->GetFormat(); + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(in_format); if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { MS_LOG(ERROR) << "input format(" << in_format << ") " diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index a6ae60ea72..06e993dc45 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -68,6 +68,7 @@ int MatMulOpenCLKernel::Init() { PadWeight(); allocator->UnmapBuffer(padWeight_); allocator->UnmapBuffer(bias_); + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return 0; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h index a1e3e6cbfa..ffd279e28f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h @@ -19,7 +19,6 @@ #include - #include "src/runtime/kernel/opencl/opencl_kernel.h" #include "src/runtime/kernel/arm/nnacl/conv_parameter.h" #include "src/runtime/opencl/opencl_runtime.h" @@ -32,11 +31,11 @@ using FLOAT_T = float; namespace mindspore::kernel { -class MatMulOpenCLKernel : public LiteKernel { +class MatMulOpenCLKernel : public OpenCLKernel { public: explicit MatMulOpenCLKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, bool hasBias) - : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { + : OpenCLKernel(parameter, inputs, outputs) { hasBias_ = hasBias; } ~MatMulOpenCLKernel() override{}; @@ -57,4 +56,3 @@ class MatMulOpenCLKernel : public LiteKernel { } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_BACKEND_OPENCL_MATMUL_H_ - diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 99607ff43e..fcee02ce94 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -73,6 +73,7 @@ int PoolingOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << kernel_name << " Init Done!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index 4394cde16b..a0fc919a6b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -48,6 +48,7 @@ int ReshapeOpenCLKernel::Init() { ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NHWC); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index 1396cae004..0f27337159 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -115,9 +115,10 @@ int SoftmaxOpenCLKernel::Init() { } std::set build_options; runtime_->LoadSource(program_name, source); - out_tensors_[0]->SetFormat(schema::Format_NHWC4); runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); #endif + ori_format_ = out_tensors_[0]->GetFormat(); + out_tensors_[0]->SetFormat(schema::Format_NHWC4); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index c76920c47f..200aed88ef 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -60,6 +60,7 @@ int TransposeOpenCLKernel::Init() { MS_LOG(ERROR) << "input H * W % 4 != 0 not support!"; return RET_ERROR; } + ori_format_ = out_tensors_[0]->GetFormat(); out_tensors_[0]->SetFormat(schema::Format_NCHW); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index d1facd0bc1..295638bc58 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -49,9 +49,11 @@ class OpenCLKernel : public LiteKernel { } OpenCLMemType GetMemType() { return out_mem_type_; } void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } + schema::Format GetOriFormat() { return ori_format_;} protected: OpenCLMemType out_mem_type_{OpenCLMemType::IMG}; + schema::Format ori_format_{schema::Format_NHWC4}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index 05514815d7..3542538cc5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -34,6 +34,15 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vectorclear(); out_convert_ops->clear(); for (size_t i = 0; i < in_tensors.size(); ++i) { + OpenCLKernel* cur_opencl_op = reinterpret_cast(in_kernels[i]); + schema::Format ori_format = cur_opencl_op->GetOriFormat(); + if (mem_type == cur_opencl_op->GetMemType() && in_tensors[i]->GetFormat() == ori_format) { + continue; + } + auto dst_format = + (mem_type == OpenCLMemType::IMG) ? in_kernels[i]->out_tensors()[0]->GetFormat() : ori_format; + auto src_format = + (mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i]->out_tensors()[0]->GetFormat(); lite::tensor::Tensor *new_tensor = new (std::nothrow) lite::tensor::Tensor(); MS_ASSERT(new_tensor); if (new_tensor == nullptr) { @@ -41,10 +50,6 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vectorCopyTensor(*in_tensors[i]); - auto dst_format = - (mem_type == OpenCLMemType::IMG) ? in_kernels.back()->out_tensors()[0]->GetFormat() : in_tensors[i]->GetFormat(); - auto src_format = - (mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels.front()->out_tensors()[0]->GetFormat(); if ((dst_format == schema::Format_NCHW || dst_format == schema::Format_NC4HW4) && (src_format == schema::Format_NHWC || src_format == schema::Format_NHWC4)) { auto &shape = new_tensor->shape(); diff --git a/mindspore/lite/test/st/benchmark_test.cc b/mindspore/lite/test/st/benchmark_test.cc index bc28a4a79a..99c2e42666 100644 --- a/mindspore/lite/test/st/benchmark_test.cc +++ b/mindspore/lite/test/st/benchmark_test.cc @@ -49,6 +49,13 @@ const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_02.ms", auto status = RunBenchmark(5, argv); ASSERT_EQ(status, RET_OK); } +TEST_F(BenchmarkTest, TestOCR_02_GPU_PERF) { +const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_02.ms", + "--inDataPath=./hiai/model_02_in.bin", + "--device=GPU"}; +auto status = RunBenchmark(4, argv); +ASSERT_EQ(status, RET_OK); +} TEST_F(BenchmarkTest, Test_MV2_GPU) { const char *argv[] = {"./benchmark", "--modelPath=./hiai/mobilenet_v2.ms", @@ -59,6 +66,14 @@ auto status = RunBenchmark(5, argv); ASSERT_EQ(status, RET_OK); } +TEST_F(BenchmarkTest, Test_MV2_GPU_PERF) { + const char *argv[] = {"./benchmark", "--modelPath=./hiai/mobilenet_v2.ms", + "--inDataPath=./hiai/mobilenet_v2_in.bin", + "--device=GPU"}; + auto status = RunBenchmark(4, argv); + ASSERT_EQ(status, RET_OK); +} + TEST_F(BenchmarkTest, TestHebing) { const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms", "--inDataPath=./hiai/model_hebing_3branch.bin",