Merge pull request !4553 from wandongdong/uptags/v0.7.0-beta
| @@ -44,6 +44,7 @@ int ActivationOpenClKernel::Init() { | |||||
| MS_LOG(ERROR) << "Activate fun only support dim=4, but your dim=" << in_tensors_[0]->shape().size(); | MS_LOG(ERROR) << "Activate fun only support dim=4, but your dim=" << in_tensors_[0]->shape().size(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| std::string program_name = ""; | std::string program_name = ""; | ||||
| std::string kernel_name = ""; | std::string kernel_name = ""; | ||||
| std::string source = activation_source_fp32; | std::string source = activation_source_fp32; | ||||
| @@ -111,6 +111,7 @@ int ArithmeticOpenCLKernel::Init() { | |||||
| runtime_->LoadSource(program_name, source); | runtime_->LoadSource(program_name, source); | ||||
| runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | ||||
| #endif | #endif | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC4); | out_tensors_[0]->SetFormat(schema::Format_NHWC4); | ||||
| Image2dGetWorkGroupSize(); | Image2dGetWorkGroupSize(); | ||||
| return 0; | return 0; | ||||
| @@ -56,6 +56,7 @@ int Conv2dTransposeOpenCLKernel::Init() { | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ||||
| #endif | #endif | ||||
| PadWeight(); | PadWeight(); | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC4); | out_tensors_[0]->SetFormat(schema::Format_NHWC4); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -40,6 +40,7 @@ int ConvolutionOpenCLKernel::Init() { | |||||
| ocl_runtime->LoadSource(program_name, source); | ocl_runtime->LoadSource(program_name, source); | ||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ||||
| this->InitBuffer(); | this->InitBuffer(); | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC4); | out_tensors_[0]->SetFormat(schema::Format_NHWC4); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -43,6 +43,7 @@ int DepthwiseConv2dOpenCLKernel::Init() { | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| std::string kernel_name = "DepthwiseConv2d"; | std::string kernel_name = "DepthwiseConv2d"; | ||||
| auto in_format = in_tensors_[0]->GetFormat(); | auto in_format = in_tensors_[0]->GetFormat(); | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(in_format); | out_tensors_[0]->SetFormat(in_format); | ||||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | ||||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | MS_LOG(ERROR) << "input format(" << in_format << ") " | ||||
| @@ -68,6 +68,7 @@ int MatMulOpenCLKernel::Init() { | |||||
| PadWeight(); | PadWeight(); | ||||
| allocator->UnmapBuffer(padWeight_); | allocator->UnmapBuffer(padWeight_); | ||||
| allocator->UnmapBuffer(bias_); | allocator->UnmapBuffer(bias_); | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC4); | out_tensors_[0]->SetFormat(schema::Format_NHWC4); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| return 0; | return 0; | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "src/runtime/kernel/arm/nnacl/conv_parameter.h" | #include "src/runtime/kernel/arm/nnacl/conv_parameter.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | #include "src/runtime/opencl/opencl_runtime.h" | ||||
| @@ -32,11 +31,11 @@ using FLOAT_T = float; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| class MatMulOpenCLKernel : public LiteKernel { | |||||
| class MatMulOpenCLKernel : public OpenCLKernel { | |||||
| public: | public: | ||||
| explicit MatMulOpenCLKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit MatMulOpenCLKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, bool hasBias) | const std::vector<lite::tensor::Tensor *> &outputs, bool hasBias) | ||||
| : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { | |||||
| : OpenCLKernel(parameter, inputs, outputs) { | |||||
| hasBias_ = hasBias; | hasBias_ = hasBias; | ||||
| } | } | ||||
| ~MatMulOpenCLKernel() override{}; | ~MatMulOpenCLKernel() override{}; | ||||
| @@ -57,4 +56,3 @@ class MatMulOpenCLKernel : public LiteKernel { | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_BACKEND_OPENCL_MATMUL_H_ | #endif // MINDSPORE_LITE_SRC_BACKEND_OPENCL_MATMUL_H_ | ||||
| @@ -73,6 +73,7 @@ int PoolingOpenCLKernel::Init() { | |||||
| ocl_runtime->LoadSource(program_name, source); | ocl_runtime->LoadSource(program_name, source); | ||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ||||
| #endif | #endif | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC4); | out_tensors_[0]->SetFormat(schema::Format_NHWC4); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| @@ -48,6 +48,7 @@ int ReshapeOpenCLKernel::Init() { | |||||
| ocl_runtime->LoadSource(program_name, source); | ocl_runtime->LoadSource(program_name, source); | ||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | ||||
| #endif | #endif | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC); | out_tensors_[0]->SetFormat(schema::Format_NHWC); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -115,9 +115,10 @@ int SoftmaxOpenCLKernel::Init() { | |||||
| } | } | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| runtime_->LoadSource(program_name, source); | runtime_->LoadSource(program_name, source); | ||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC4); | |||||
| runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | ||||
| #endif | #endif | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NHWC4); | |||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| return lite::RET_OK; | return lite::RET_OK; | ||||
| } | } | ||||
| @@ -60,6 +60,7 @@ int TransposeOpenCLKernel::Init() { | |||||
| MS_LOG(ERROR) << "input H * W % 4 != 0 not support!"; | MS_LOG(ERROR) << "input H * W % 4 != 0 not support!"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ori_format_ = out_tensors_[0]->GetFormat(); | |||||
| out_tensors_[0]->SetFormat(schema::Format_NCHW); | out_tensors_[0]->SetFormat(schema::Format_NCHW); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -49,9 +49,11 @@ class OpenCLKernel : public LiteKernel { | |||||
| } | } | ||||
| OpenCLMemType GetMemType() { return out_mem_type_; } | OpenCLMemType GetMemType() { return out_mem_type_; } | ||||
| void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } | void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } | ||||
| schema::Format GetOriFormat() { return ori_format_;} | |||||
| protected: | protected: | ||||
| OpenCLMemType out_mem_type_{OpenCLMemType::IMG}; | OpenCLMemType out_mem_type_{OpenCLMemType::IMG}; | ||||
| schema::Format ori_format_{schema::Format_NHWC4}; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -34,6 +34,15 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor * | |||||
| out_parameters->clear(); | out_parameters->clear(); | ||||
| out_convert_ops->clear(); | out_convert_ops->clear(); | ||||
| for (size_t i = 0; i < in_tensors.size(); ++i) { | for (size_t i = 0; i < in_tensors.size(); ++i) { | ||||
| OpenCLKernel* cur_opencl_op = reinterpret_cast<OpenCLKernel*>(in_kernels[i]); | |||||
| schema::Format ori_format = cur_opencl_op->GetOriFormat(); | |||||
| if (mem_type == cur_opencl_op->GetMemType() && in_tensors[i]->GetFormat() == ori_format) { | |||||
| continue; | |||||
| } | |||||
| auto dst_format = | |||||
| (mem_type == OpenCLMemType::IMG) ? in_kernels[i]->out_tensors()[0]->GetFormat() : ori_format; | |||||
| auto src_format = | |||||
| (mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i]->out_tensors()[0]->GetFormat(); | |||||
| lite::tensor::Tensor *new_tensor = new (std::nothrow) lite::tensor::Tensor(); | lite::tensor::Tensor *new_tensor = new (std::nothrow) lite::tensor::Tensor(); | ||||
| MS_ASSERT(new_tensor); | MS_ASSERT(new_tensor); | ||||
| if (new_tensor == nullptr) { | if (new_tensor == nullptr) { | ||||
| @@ -41,10 +50,6 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor * | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| new_tensor->CopyTensor(*in_tensors[i]); | new_tensor->CopyTensor(*in_tensors[i]); | ||||
| auto dst_format = | |||||
| (mem_type == OpenCLMemType::IMG) ? in_kernels.back()->out_tensors()[0]->GetFormat() : in_tensors[i]->GetFormat(); | |||||
| auto src_format = | |||||
| (mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels.front()->out_tensors()[0]->GetFormat(); | |||||
| if ((dst_format == schema::Format_NCHW || dst_format == schema::Format_NC4HW4) && | if ((dst_format == schema::Format_NCHW || dst_format == schema::Format_NC4HW4) && | ||||
| (src_format == schema::Format_NHWC || src_format == schema::Format_NHWC4)) { | (src_format == schema::Format_NHWC || src_format == schema::Format_NHWC4)) { | ||||
| auto &shape = new_tensor->shape(); | auto &shape = new_tensor->shape(); | ||||
| @@ -49,6 +49,13 @@ const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_02.ms", | |||||
| auto status = RunBenchmark(5, argv); | auto status = RunBenchmark(5, argv); | ||||
| ASSERT_EQ(status, RET_OK); | ASSERT_EQ(status, RET_OK); | ||||
| } | } | ||||
| TEST_F(BenchmarkTest, TestOCR_02_GPU_PERF) { | |||||
| const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_02.ms", | |||||
| "--inDataPath=./hiai/model_02_in.bin", | |||||
| "--device=GPU"}; | |||||
| auto status = RunBenchmark(4, argv); | |||||
| ASSERT_EQ(status, RET_OK); | |||||
| } | |||||
| TEST_F(BenchmarkTest, Test_MV2_GPU) { | TEST_F(BenchmarkTest, Test_MV2_GPU) { | ||||
| const char *argv[] = {"./benchmark", "--modelPath=./hiai/mobilenet_v2.ms", | const char *argv[] = {"./benchmark", "--modelPath=./hiai/mobilenet_v2.ms", | ||||
| @@ -59,6 +66,14 @@ auto status = RunBenchmark(5, argv); | |||||
| ASSERT_EQ(status, RET_OK); | ASSERT_EQ(status, RET_OK); | ||||
| } | } | ||||
| TEST_F(BenchmarkTest, Test_MV2_GPU_PERF) { | |||||
| const char *argv[] = {"./benchmark", "--modelPath=./hiai/mobilenet_v2.ms", | |||||
| "--inDataPath=./hiai/mobilenet_v2_in.bin", | |||||
| "--device=GPU"}; | |||||
| auto status = RunBenchmark(4, argv); | |||||
| ASSERT_EQ(status, RET_OK); | |||||
| } | |||||
| TEST_F(BenchmarkTest, TestHebing) { | TEST_F(BenchmarkTest, TestHebing) { | ||||
| const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms", | const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms", | ||||
| "--inDataPath=./hiai/model_hebing_3branch.bin", | "--inDataPath=./hiai/model_hebing_3branch.bin", | ||||