Merge pull request !4752 from chenzhongming/litetags/v0.7.0-beta
| @@ -112,7 +112,6 @@ int PoolingOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||
| // attribute | |||
| int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices}; | |||
| cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices}; | |||
| @@ -120,7 +119,6 @@ int PoolingOpenCLKernel::Run() { | |||
| cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_}; | |||
| cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; | |||
| // binding parameters | |||
| int arg_idx = 0; | |||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); | |||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); | |||
| @@ -130,14 +128,12 @@ int PoolingOpenCLKernel::Run() { | |||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size); | |||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding); | |||
| // set work group size | |||
| std::vector<size_t> local_size; | |||
| std::vector<size_t> global_size = InitGlobalSize(); | |||
| int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())()); | |||
| local_size = GetCommonLocalSize(global_size, max_work_group_size); | |||
| global_size = GetCommonGlobalSize(local_size, global_size); | |||
| // run opengl kernel | |||
| ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr); | |||
| return RET_OK; | |||
| } | |||
| @@ -171,6 +171,7 @@ kernel::LiteKernel *OpenCLSoftMaxKernelCreator(const std::vector<lite::tensor::T | |||
| auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr."; | |||
| delete kernel; | |||
| return nullptr; | |||
| } | |||
| if (inputs[0]->shape()[0] > 1) { | |||
| @@ -58,7 +58,7 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) { | |||
| ocl_runtime->Init(); | |||
| MS_LOG(INFO) << "create PoolingParameter"; | |||
| auto param = new PoolingParameter(); | |||
| auto param = new (std::nothrow) PoolingParameter(); | |||
| InitAvgPoolingParam(param); | |||
| MS_LOG(INFO) << "create Tensors"; | |||
| @@ -76,18 +76,37 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) { | |||
| }; | |||
| auto data_type = kNumberTypeFloat32; | |||
| auto tensorType = schema::NodeType_ValueNode; | |||
| lite::tensor::Tensor *tensor_in = new lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType); | |||
| lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType); | |||
| lite::tensor::Tensor *tensor_in = | |||
| new (std::nothrow) lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType); | |||
| lite::tensor::Tensor *tensor_out = | |||
| new (std::nothrow) lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType); | |||
| if (tensor_in == nullptr) { | |||
| MS_LOG(ERROR) << "tensor_in null"; | |||
| return; | |||
| } | |||
| if (tensor_out == nullptr) { | |||
| MS_LOG(ERROR) << "tensor_out null"; | |||
| return; | |||
| } | |||
| std::vector<lite::tensor::Tensor *> inputs{tensor_in}; | |||
| std::vector<lite::tensor::Tensor *> outputs{tensor_out}; | |||
| MS_LOG(INFO) << "create OpenCL Kernel"; | |||
| auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| auto *pooling_kernel = | |||
| new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| if (pooling_kernel == nullptr) { | |||
| MS_LOG(ERROR) << "pooling_kernel null"; | |||
| return; | |||
| } | |||
| pooling_kernel->Init(); | |||
| std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; | |||
| MS_LOG(INFO) << "create SubGraphOpenCLKernel"; | |||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| if (pGraph == nullptr) { | |||
| MS_LOG(ERROR) << "pGraph null"; | |||
| return; | |||
| } | |||
| pGraph->Init(); | |||
| MS_LOG(INFO) << "initialize data"; | |||
| @@ -46,7 +46,7 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) { | |||
| auto allocator = ocl_runtime->GetAllocator(); | |||
| MS_LOG(INFO) << "PoolingParameter"; | |||
| auto param = new PoolingParameter; | |||
| auto param = new (std::nothrow) PoolingParameter; | |||
| InitParameter(param); | |||
| // define tensor | |||
| @@ -56,21 +56,39 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) { | |||
| auto data_type = kNumberTypeFloat32; | |||
| auto tensorType = schema::NodeType_ValueNode; | |||
| MS_LOG(INFO) << "define tensor2"; | |||
| auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType); | |||
| auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType); | |||
| auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType); | |||
| auto output_tensor = | |||
| new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType); | |||
| if (input_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "input_tensor null"; | |||
| return; | |||
| } | |||
| if (output_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "output_tensor null"; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "define input"; | |||
| std::vector<lite::tensor::Tensor *> inputs{input_tensor}; | |||
| std::vector<lite::tensor::Tensor *> outputs{output_tensor}; | |||
| // run | |||
| MS_LOG(INFO) << "pooling_kernel"; | |||
| auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| auto *pooling_kernel = | |||
| new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| if (pooling_kernel == nullptr) { | |||
| MS_LOG(ERROR) << "pooling_kernel null"; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "pooling_kernel init"; | |||
| pooling_kernel->Init(); | |||
| std::vector<kernel::LiteKernel *> kernels{pooling_kernel}; | |||
| inputs[0]->MallocData(allocator); | |||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| if (pGraph == nullptr) { | |||
| MS_LOG(ERROR) << "pGraph null"; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "pGraph init"; | |||
| pGraph->Init(); | |||
| @@ -28,41 +28,49 @@ class TestSoftmaxOpenCL : public mindspore::CommonTest {}; | |||
| void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file, | |||
| std::string expect_file, SoftmaxParameter *param, schema::Format format) { | |||
| std::cout << "runtime" << std::endl; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||
| ocl_runtime->Init(); | |||
| auto allocator = ocl_runtime->GetAllocator(); | |||
| // define tensor | |||
| MS_LOG(INFO) << "defineTensor"; | |||
| std::cout << "defineTensor" << std::endl; | |||
| auto data_type = kNumberTypeFloat32; | |||
| auto tensorType = schema::NodeType_ValueNode; | |||
| auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, format, tensorType); | |||
| auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, format, tensorType); | |||
| auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensorType); | |||
| auto output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, format, tensorType); | |||
| if (input_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "input tensor null"; | |||
| return; | |||
| } | |||
| if (output_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "output tensor null"; | |||
| return; | |||
| } | |||
| std::vector<lite::tensor::Tensor *> inputs{input_tensor}; | |||
| std::vector<lite::tensor::Tensor *> outputs{output_tensor}; | |||
| // run | |||
| MS_LOG(INFO) << "NewOpenCLKernel"; | |||
| std::cout << "NewOpenCLKernel" << std::endl; | |||
| auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel null"; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "KernelInit"; | |||
| std::cout << "KernelInit" << std::endl; | |||
| kernel->Init(); | |||
| std::cout << "LiteKernel" << std::endl; | |||
| std::vector<kernel::LiteKernel *> kernels{kernel}; | |||
| inputs[0]->MallocData(allocator); | |||
| std::cout << "SubGraphOpenCLKernel" << std::endl; | |||
| auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||
| if (pGraph == nullptr) { | |||
| MS_LOG(ERROR) << "pGraph null"; | |||
| return; | |||
| } | |||
| MS_LOG(INFO) << "pGraphinit"; | |||
| pGraph->Init(); | |||
| // load data | |||
| MS_LOG(INFO) << "load data1"; | |||
| LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file); | |||
| auto *input_data = reinterpret_cast<float *>(input_tensor->Data()); | |||
| printf("\ninput[0:10]:"); | |||
| @@ -75,7 +83,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st | |||
| pGraph->Run(); | |||
| MS_LOG(INFO) << "compare result"; | |||
| std::cout << "compare result" << std::endl; | |||
| CompareOutput(output_tensor, expect_file); | |||
| for (auto tensor : inputs) { | |||
| delete tensor; | |||
| @@ -93,23 +100,11 @@ TEST_F(TestSoftmaxOpenCL, Softmax_1) { | |||
| std::vector<int> output_shape = {1, 2, 2, 8}; | |||
| std::string input_file = "softmax_in.bin"; | |||
| std::string expect_file = "softmax_out.bin"; | |||
| auto param = new SoftmaxParameter; | |||
| auto param = new (std::nothrow) SoftmaxParameter; | |||
| param->axis_ = 3; | |||
| schema::Format format = schema::Format_NHWC4; | |||
| RunTestCase(input_shape, output_shape, input_file, expect_file, param, format); | |||
| } | |||
| // TEST_F(TestSoftmaxOpenCL, Softmax_1x1) { | |||
| // std::vector<int> input_shape = {1, 100}; | |||
| // std::vector<int> output_shape = {1, 100}; | |||
| // std::string input_file = "softmax1x1_in.bin"; | |||
| // std::string expect_file = "softmax1x1_out.bin"; | |||
| // auto param = new SoftmaxParameter; | |||
| // param->axis_ = 1; | |||
| // schema::Format format = schema::Format_NHWC4; | |||
| // | |||
| // RunTestCase(input_shape, output_shape, input_file, expect_file, param, format); | |||
| //} | |||
| } // namespace mindspore | |||