Merge pull request !4752 from chenzhongming/litetags/v0.7.0-beta
| @@ -112,7 +112,6 @@ int PoolingOpenCLKernel::Run() { | |||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| // attribute | |||||
| int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | ||||
| cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices}; | cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices}; | ||||
| cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices}; | cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices}; | ||||
| @@ -120,7 +119,6 @@ int PoolingOpenCLKernel::Run() { | |||||
| cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_}; | cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_}; | ||||
| cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; | cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; | ||||
| // binding parameters | |||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); | ||||
| @@ -130,14 +128,12 @@ int PoolingOpenCLKernel::Run() { | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding); | ||||
| // set work group size | |||||
| std::vector<size_t> local_size; | std::vector<size_t> local_size; | ||||
| std::vector<size_t> global_size = InitGlobalSize(); | std::vector<size_t> global_size = InitGlobalSize(); | ||||
| int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())()); | int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())()); | ||||
| local_size = GetCommonLocalSize(global_size, max_work_group_size); | local_size = GetCommonLocalSize(global_size, max_work_group_size); | ||||
| global_size = GetCommonGlobalSize(local_size, global_size); | global_size = GetCommonGlobalSize(local_size, global_size); | ||||
| // run opengl kernel | |||||
| ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr); | ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -171,6 +171,7 @@ kernel::LiteKernel *OpenCLSoftMaxKernelCreator(const std::vector<lite::tensor::T | |||||
| auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); | auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr."; | MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr."; | ||||
| delete kernel; | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (inputs[0]->shape()[0] > 1) { | if (inputs[0]->shape()[0] > 1) { | ||||
| @@ -58,7 +58,7 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) { | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| MS_LOG(INFO) << "create PoolingParameter"; | MS_LOG(INFO) << "create PoolingParameter"; | ||||
| auto param = new PoolingParameter(); | |||||
| auto param = new (std::nothrow) PoolingParameter(); | |||||
| InitAvgPoolingParam(param); | InitAvgPoolingParam(param); | ||||
| MS_LOG(INFO) << "create Tensors"; | MS_LOG(INFO) << "create Tensors"; | ||||
| @@ -76,18 +76,37 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) { | |||||
| }; | }; | ||||
| auto data_type = kNumberTypeFloat32; | auto data_type = kNumberTypeFloat32; | ||||
| auto tensorType = schema::NodeType_ValueNode; | auto tensorType = schema::NodeType_ValueNode; | ||||
| lite::tensor::Tensor *tensor_in = new lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType); | |||||
| lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType); | |||||
| lite::tensor::Tensor *tensor_in = | |||||
| new (std::nothrow) lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType); | |||||
| lite::tensor::Tensor *tensor_out = | |||||
| new (std::nothrow) lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType); | |||||
| if (tensor_in == nullptr) { | |||||
| MS_LOG(ERROR) << "tensor_in null"; | |||||
| return; | |||||
| } | |||||
| if (tensor_out == nullptr) { | |||||
| MS_LOG(ERROR) << "tensor_out null"; | |||||
| return; | |||||
| } | |||||
| std::vector<lite::tensor::Tensor *> inputs{tensor_in}; | std::vector<lite::tensor::Tensor *> inputs{tensor_in}; | ||||
| std::vector<lite::tensor::Tensor *> outputs{tensor_out}; | std::vector<lite::tensor::Tensor *> outputs{tensor_out}; | ||||
| MS_LOG(INFO) << "create OpenCL Kernel"; | MS_LOG(INFO) << "create OpenCL Kernel"; | ||||
| auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||||
| auto *pooling_kernel = | |||||
| new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||||
| if (pooling_kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "pooling_kernel null"; | |||||
| return; | |||||
| } | |||||
| pooling_kernel->Init(); | pooling_kernel->Init(); | ||||
| std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; | std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; | ||||
| MS_LOG(INFO) << "create SubGraphOpenCLKernel"; | MS_LOG(INFO) << "create SubGraphOpenCLKernel"; | ||||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| if (pGraph == nullptr) { | |||||
| MS_LOG(ERROR) << "pGraph null"; | |||||
| return; | |||||
| } | |||||
| pGraph->Init(); | pGraph->Init(); | ||||
| MS_LOG(INFO) << "initialize data"; | MS_LOG(INFO) << "initialize data"; | ||||
| @@ -46,7 +46,7 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) { | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| MS_LOG(INFO) << "PoolingParameter"; | MS_LOG(INFO) << "PoolingParameter"; | ||||
| auto param = new PoolingParameter; | |||||
| auto param = new (std::nothrow) PoolingParameter; | |||||
| InitParameter(param); | InitParameter(param); | ||||
| // define tensor | // define tensor | ||||
| @@ -56,21 +56,39 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) { | |||||
| auto data_type = kNumberTypeFloat32; | auto data_type = kNumberTypeFloat32; | ||||
| auto tensorType = schema::NodeType_ValueNode; | auto tensorType = schema::NodeType_ValueNode; | ||||
| MS_LOG(INFO) << "define tensor2"; | MS_LOG(INFO) << "define tensor2"; | ||||
| auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType); | |||||
| auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType); | |||||
| auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType); | |||||
| auto output_tensor = | |||||
| new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType); | |||||
| if (input_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "input_tensor null"; | |||||
| return; | |||||
| } | |||||
| if (output_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "output_tensor null"; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "define input"; | MS_LOG(INFO) << "define input"; | ||||
| std::vector<lite::tensor::Tensor *> inputs{input_tensor}; | std::vector<lite::tensor::Tensor *> inputs{input_tensor}; | ||||
| std::vector<lite::tensor::Tensor *> outputs{output_tensor}; | std::vector<lite::tensor::Tensor *> outputs{output_tensor}; | ||||
| // run | // run | ||||
| MS_LOG(INFO) << "pooling_kernel"; | MS_LOG(INFO) << "pooling_kernel"; | ||||
| auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||||
| auto *pooling_kernel = | |||||
| new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||||
| if (pooling_kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "pooling_kernel null"; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "pooling_kernel init"; | MS_LOG(INFO) << "pooling_kernel init"; | ||||
| pooling_kernel->Init(); | pooling_kernel->Init(); | ||||
| std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; | std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| if (pGraph == nullptr) { | |||||
| MS_LOG(ERROR) << "pGraph null"; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "pGraph init"; | MS_LOG(INFO) << "pGraph init"; | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| @@ -28,41 +28,49 @@ class TestSoftmaxOpenCL : public mindspore::CommonTest {}; | |||||
| void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file, | void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file, | ||||
| std::string expect_file, SoftmaxParameter *param, schema::Format format) { | std::string expect_file, SoftmaxParameter *param, schema::Format format) { | ||||
| std::cout << "runtime" << std::endl; | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| // define tensor | // define tensor | ||||
| MS_LOG(INFO) << "defineTensor"; | MS_LOG(INFO) << "defineTensor"; | ||||
| std::cout << "defineTensor" << std::endl; | |||||
| auto data_type = kNumberTypeFloat32; | auto data_type = kNumberTypeFloat32; | ||||
| auto tensorType = schema::NodeType_ValueNode; | auto tensorType = schema::NodeType_ValueNode; | ||||
| auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, format, tensorType); | |||||
| auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, format, tensorType); | |||||
| auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensorType); | |||||
| auto output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, format, tensorType); | |||||
| if (input_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "input tensor null"; | |||||
| return; | |||||
| } | |||||
| if (output_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "output tensor null"; | |||||
| return; | |||||
| } | |||||
| std::vector<lite::tensor::Tensor *> inputs{input_tensor}; | std::vector<lite::tensor::Tensor *> inputs{input_tensor}; | ||||
| std::vector<lite::tensor::Tensor *> outputs{output_tensor}; | std::vector<lite::tensor::Tensor *> outputs{output_tensor}; | ||||
| // run | // run | ||||
| MS_LOG(INFO) << "NewOpenCLKernel"; | MS_LOG(INFO) << "NewOpenCLKernel"; | ||||
| std::cout << "NewOpenCLKernel" << std::endl; | |||||
| auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | ||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "kernel null"; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "KernelInit"; | MS_LOG(INFO) << "KernelInit"; | ||||
| std::cout << "KernelInit" << std::endl; | |||||
| kernel->Init(); | kernel->Init(); | ||||
| std::cout << "LiteKernel" << std::endl; | |||||
| std::vector<kernel::LiteKernel *> kernels{kernel}; | std::vector<kernel::LiteKernel *> kernels{kernel}; | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| std::cout << "SubGraphOpenCLKernel" << std::endl; | |||||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| if (pGraph == nullptr) { | |||||
| MS_LOG(ERROR) << "pGraph null"; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "pGraphinit"; | MS_LOG(INFO) << "pGraphinit"; | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| // load data | // load data | ||||
| MS_LOG(INFO) << "load data1"; | MS_LOG(INFO) << "load data1"; | ||||
| LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file); | LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file); | ||||
| auto *input_data = reinterpret_cast<float *>(input_tensor->Data()); | auto *input_data = reinterpret_cast<float *>(input_tensor->Data()); | ||||
| printf("\ninput[0:10]:"); | printf("\ninput[0:10]:"); | ||||
| @@ -75,7 +83,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| MS_LOG(INFO) << "compare result"; | MS_LOG(INFO) << "compare result"; | ||||
| std::cout << "compare result" << std::endl; | |||||
| CompareOutput(output_tensor, expect_file); | CompareOutput(output_tensor, expect_file); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -93,23 +100,11 @@ TEST_F(TestSoftmaxOpenCL, Softmax_1) { | |||||
| std::vector<int> output_shape = {1, 2, 2, 8}; | std::vector<int> output_shape = {1, 2, 2, 8}; | ||||
| std::string input_file = "softmax_in.bin"; | std::string input_file = "softmax_in.bin"; | ||||
| std::string expect_file = "softmax_out.bin"; | std::string expect_file = "softmax_out.bin"; | ||||
| auto param = new SoftmaxParameter; | |||||
| auto param = new (std::nothrow) SoftmaxParameter; | |||||
| param->axis_ = 3; | param->axis_ = 3; | ||||
| schema::Format format = schema::Format_NHWC4; | schema::Format format = schema::Format_NHWC4; | ||||
| RunTestCase(input_shape, output_shape, input_file, expect_file, param, format); | RunTestCase(input_shape, output_shape, input_file, expect_file, param, format); | ||||
| } | } | ||||
| // TEST_F(TestSoftmaxOpenCL, Softmax_1x1) { | |||||
| // std::vector<int> input_shape = {1, 100}; | |||||
| // std::vector<int> output_shape = {1, 100}; | |||||
| // std::string input_file = "softmax1x1_in.bin"; | |||||
| // std::string expect_file = "softmax1x1_out.bin"; | |||||
| // auto param = new SoftmaxParameter; | |||||
| // param->axis_ = 1; | |||||
| // schema::Format format = schema::Format_NHWC4; | |||||
| // | |||||
| // RunTestCase(input_shape, output_shape, input_file, expect_file, param, format); | |||||
| //} | |||||
| } // namespace mindspore | } // namespace mindspore | ||||