| @@ -55,8 +55,7 @@ int ActivationOpenClKernel::Init() { | |||||
| c = in_tensors_[0]->shape()[3]; | c = in_tensors_[0]->shape()[3]; | ||||
| } | } | ||||
| nhwc_shape_ = {n, h, w, c}; | nhwc_shape_ = {n, h, w, c}; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); | fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); | ||||
| if (in_size_ != 2 && in_size_ != 4) { | if (in_size_ != 2 && in_size_ != 4) { | ||||
| MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_; | MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_; | ||||
| @@ -75,9 +74,9 @@ int ActivationOpenClKernel::Init() { | |||||
| std::string source = activation_source; | std::string source = activation_source; | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| ocl_runtime->LoadSource(Program_Kernel[type_][0], source); | |||||
| ocl_runtime_->LoadSource(Program_Kernel[type_][0], source); | |||||
| std::string kernel_name = Program_Kernel[type_][1]; | std::string kernel_name = Program_Kernel[type_][1]; | ||||
| ocl_runtime->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options); | |||||
| ocl_runtime_->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options); | |||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | out_ori_format_ = out_tensors_[0]->GetFormat(); | ||||
| in_tensors_[0]->SetFormat(op_format_); | in_tensors_[0]->SetFormat(op_format_); | ||||
| @@ -89,17 +88,16 @@ int ActivationOpenClKernel::Init() { | |||||
| int ActivationOpenClKernel::Run() { | int ActivationOpenClKernel::Run() { | ||||
| MS_LOG(DEBUG) << op_parameter_->name_ << " begin running!"; | MS_LOG(DEBUG) << op_parameter_->name_ << " begin running!"; | ||||
| cl_int4 img2d_shape = GetImg2dShape(); | cl_int4 img2d_shape = GetImg2dShape(); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, img2d_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, img2d_shape); | |||||
| if (type_ == ActivationType_LEAKY_RELU) { | if (type_ == ActivationType_LEAKY_RELU) { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_); | |||||
| } | } | ||||
| std::vector<size_t> local = {}; | std::vector<size_t> local = {}; | ||||
| std::vector<size_t> global = {static_cast<size_t>(img2d_shape.s[1]), static_cast<size_t>(img2d_shape.s[2])}; | std::vector<size_t> global = {static_cast<size_t>(img2d_shape.s[1]), static_cast<size_t>(img2d_shape.s[2])}; | ||||
| auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Run kernel:" << op_parameter_->name_ << " fail."; | MS_LOG(ERROR) << "Run kernel:" << op_parameter_->name_ << " fail."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "nnacl/fp32/activation.h" | #include "nnacl/fp32/activation.h" | ||||
| @@ -34,7 +34,7 @@ namespace mindspore::kernel { | |||||
| ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() { | ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() { | ||||
| if (weight_ptr_ != nullptr) { | if (weight_ptr_ != nullptr) { | ||||
| auto allocator = runtime_->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| allocator->Free(weight_ptr_); | allocator->Free(weight_ptr_); | ||||
| weight_ptr_ = nullptr; | weight_ptr_ = nullptr; | ||||
| } | } | ||||
| @@ -106,7 +106,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||||
| const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | ||||
| if (!arithmetic_parameter->broadcasting_) { | if (!arithmetic_parameter->broadcasting_) { | ||||
| if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) { | if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) { | ||||
| auto allocator = runtime_->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| std::vector<size_t> img_size; | std::vector<size_t> img_size; | ||||
| GetImageSize(0, &img_size); | GetImageSize(0, &img_size); | ||||
| int pack_weight_size = in_tensors_[1]->ElementsC4Num(); | int pack_weight_size = in_tensors_[1]->ElementsC4Num(); | ||||
| @@ -194,7 +194,6 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||||
| } | } | ||||
| int ArithmeticOpenCLKernel::Init() { | int ArithmeticOpenCLKernel::Init() { | ||||
| runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::string kernel_name; | std::string kernel_name; | ||||
| const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | ||||
| @@ -265,7 +264,7 @@ int ArithmeticOpenCLKernel::Init() { | |||||
| lite::STATUS error_code = RET_OK; | lite::STATUS error_code = RET_OK; | ||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = runtime_->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| if (out_mem_type_ == OpenCLMemType::IMG) { | if (out_mem_type_ == OpenCLMemType::IMG) { | ||||
| kernel_name += "_IMG"; | kernel_name += "_IMG"; | ||||
| @@ -275,8 +274,8 @@ int ArithmeticOpenCLKernel::Init() { | |||||
| std::string program_name = "Arithmetic"; | std::string program_name = "Arithmetic"; | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = arithmetic_source; | std::string source = arithmetic_source; | ||||
| runtime_->LoadSource(program_name, source); | |||||
| error_code = runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| return error_code; | return error_code; | ||||
| @@ -302,10 +301,10 @@ int ArithmeticOpenCLKernel::Run() { | |||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (element_flag_) { | if (element_flag_) { | ||||
| void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->data_c() : weight_ptr_; | void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->data_c() : weight_ptr_; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, weight); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight); | |||||
| } else { | } else { | ||||
| float weight = 0.f; | float weight = 0.f; | ||||
| if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | ||||
| @@ -316,9 +315,9 @@ int ArithmeticOpenCLKernel::Run() { | |||||
| MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); | MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, weight); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight); | |||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| int H = 0; | int H = 0; | ||||
| int W = 0; | int W = 0; | ||||
| @@ -336,8 +335,8 @@ int ArithmeticOpenCLKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| cl_int2 output_shape{W, H}; | cl_int2 output_shape{W, H}; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); | |||||
| runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); | |||||
| ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/kernel/arm/fp32/arithmetic.h" | #include "src/runtime/kernel/arm/fp32/arithmetic.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -42,7 +41,6 @@ class ArithmeticOpenCLKernel : public OpenCLKernel { | |||||
| int InitBuffer(); | int InitBuffer(); | ||||
| cl::Kernel kernel_; | cl::Kernel kernel_; | ||||
| lite::opencl::OpenCLRuntime *runtime_; | |||||
| bool element_flag_{true}; | bool element_flag_{true}; | ||||
| void *weight_ptr_{nullptr}; | void *weight_ptr_{nullptr}; | ||||
| @@ -17,7 +17,6 @@ | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <set> | #include <set> | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/arithmetic_self.h" | #include "src/runtime/kernel/opencl/kernel/arithmetic_self.h" | ||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include "src/runtime/kernel/opencl/cl/arithmeticself.cl.inc" | #include "src/runtime/kernel/opencl/cl/arithmeticself.cl.inc" | ||||
| @@ -51,8 +50,7 @@ int ArithmeticSelfOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *im | |||||
| im_dst_x = out_tensors_[0]->Width(); | im_dst_x = out_tensors_[0]->Width(); | ||||
| } | } | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| } | } | ||||
| @@ -136,9 +134,8 @@ int ArithmeticSelfOpenCLKernel::Init() { | |||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = arithmeticself_source; | std::string source = arithmeticself_source; | ||||
| std::string program_name = "ArithmeticSelf"; | std::string program_name = "ArithmeticSelf"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -162,7 +159,6 @@ void ArithmeticSelfGetWorkGroup(const std::vector<size_t> &global, std::vector<s | |||||
| int ArithmeticSelfOpenCLKernel::Run() { | int ArithmeticSelfOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running! "; | MS_LOG(DEBUG) << this->name() << " Running! "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto output_shape = out_tensors_[0]->shape(); | auto output_shape = out_tensors_[0]->shape(); | ||||
| cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)}; | cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)}; | ||||
| @@ -170,17 +166,17 @@ int ArithmeticSelfOpenCLKernel::Run() { | |||||
| uint32_t OW = output_shape[2]; | uint32_t OW = output_shape[2]; | ||||
| uint32_t OC = UP_DIV(output_shape[3], C4NUM); | uint32_t OC = UP_DIV(output_shape[3], C4NUM); | ||||
| const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize(); | |||||
| const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | |||||
| std::vector<size_t> local = {1, 1, 1}; // init local | std::vector<size_t> local = {1, 1, 1}; // init local | ||||
| std::vector<size_t> global = {OH, OW, OC}; | std::vector<size_t> global = {OH, OW, OC}; | ||||
| ArithmeticSelfGetWorkGroup(global, &local, max_global[0]); | ArithmeticSelfGetWorkGroup(global, &local, max_global[0]); | ||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "nnacl/arithmetic_self_parameter.h" | #include "nnacl/arithmetic_self_parameter.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <string> | #include <string> | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/batchnorm.h" | #include "src/runtime/kernel/opencl/kernel/batchnorm.h" | ||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include "src/runtime/kernel/opencl/cl/batchnorm.cl.inc" | #include "src/runtime/kernel/opencl/cl/batchnorm.cl.inc" | ||||
| @@ -40,8 +39,7 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_siz | |||||
| im_dst_x = out_tensors_[0]->Width(); | im_dst_x = out_tensors_[0]->Width(); | ||||
| } | } | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| } | } | ||||
| @@ -72,9 +70,8 @@ int BatchNormOpenCLKernel::Init() { | |||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = batchnorm_source; | std::string source = batchnorm_source; | ||||
| std::string program_name = "Batch_normalization"; | std::string program_name = "Batch_normalization"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -98,7 +95,6 @@ void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t | |||||
| int BatchNormOpenCLKernel::Run() { | int BatchNormOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running! "; | MS_LOG(DEBUG) << this->name() << " Running! "; | ||||
| auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_); | auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto input0_shape = in_tensors_[0]->shape(); | auto input0_shape = in_tensors_[0]->shape(); | ||||
| auto output_shape = out_tensors_[0]->shape(); | auto output_shape = out_tensors_[0]->shape(); | ||||
| cl_int4 input_shape_ = {input0_shape[0], input0_shape[1], input0_shape[2], UP_DIV(input0_shape[3], C4NUM)}; | cl_int4 input_shape_ = {input0_shape[0], input0_shape[1], input0_shape[2], UP_DIV(input0_shape[3], C4NUM)}; | ||||
| @@ -107,20 +103,20 @@ int BatchNormOpenCLKernel::Run() { | |||||
| uint32_t OW = output_shape[2]; | uint32_t OW = output_shape[2]; | ||||
| uint32_t OC = UP_DIV(output_shape[3], C4NUM); | uint32_t OC = UP_DIV(output_shape[3], C4NUM); | ||||
| const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize(); | |||||
| const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | |||||
| std::vector<size_t> local = {1, 1, 1}; // init local | std::vector<size_t> local = {1, 1, 1}; // init local | ||||
| std::vector<size_t> global = {OH, OW, OC}; | std::vector<size_t> global = {OH, OW, OC}; | ||||
| BatchNormGetWorkGroup(global, &local, max_global[0]); | BatchNormGetWorkGroup(global, &local, max_global[0]); | ||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->epsilon_); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "nnacl/fp32/batchnorm.h" | #include "nnacl/fp32/batchnorm.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -16,6 +16,7 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/opencl/kernel/biasadd.h" | |||||
| #include <string> | #include <string> | ||||
| #include <map> | #include <map> | ||||
| #include <set> | #include <set> | ||||
| @@ -23,7 +24,6 @@ | |||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/kernel/opencl/kernel/biasadd.h" | |||||
| #include "src/runtime/opencl/opencl_runtime.h" | #include "src/runtime/opencl/opencl_runtime.h" | ||||
| #include "src/runtime/kernel/opencl/cl/biasadd.cl.inc" | #include "src/runtime/kernel/opencl/cl/biasadd.cl.inc" | ||||
| @@ -38,7 +38,7 @@ namespace mindspore::kernel { | |||||
| void BiasAddOpenCLKernel::InitBuffer() { | void BiasAddOpenCLKernel::InitBuffer() { | ||||
| int C = in_tensors_[1]->shape()[0]; | int C = in_tensors_[1]->shape()[0]; | ||||
| int div_ci = UP_DIV(C, C4NUM); | int div_ci = UP_DIV(C, C4NUM); | ||||
| auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| @@ -57,8 +57,7 @@ int BiasAddOpenCLKernel::Init() { | |||||
| for (int i = 0; i < in_size_; ++i) { | for (int i = 0; i < in_size_; ++i) { | ||||
| input_shape_.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i]; | input_shape_.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i]; | ||||
| } | } | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); | fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); | ||||
| if (in_size_ != 4 && in_size_ != 2) { | if (in_size_ != 4 && in_size_ != 2) { | ||||
| MS_LOG(ERROR) << "BiasAdd only support dim=4 or 2, but your dim=" << in_size_; | MS_LOG(ERROR) << "BiasAdd only support dim=4 or 2, but your dim=" << in_size_; | ||||
| @@ -75,8 +74,8 @@ int BiasAddOpenCLKernel::Init() { | |||||
| std::string source = biasadd_source; | std::string source = biasadd_source; | ||||
| std::string program_name = "BiasAdd"; | std::string program_name = "BiasAdd"; | ||||
| std::string kernel_name = "BiasAdd"; | std::string kernel_name = "BiasAdd"; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | out_ori_format_ = out_tensors_[0]->GetFormat(); | ||||
| @@ -89,18 +88,17 @@ int BiasAddOpenCLKernel::Init() { | |||||
| int BiasAddOpenCLKernel::Run() { | int BiasAddOpenCLKernel::Run() { | ||||
| cl_int4 global_size = GetGlobalshape(); | cl_int4 global_size = GetGlobalshape(); | ||||
| MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; | MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| std::map<schema::Format, int> data_type{ | std::map<schema::Format, int> data_type{ | ||||
| {schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}}; | {schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, BiasAdd_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | |||||
| std::vector<size_t> local = {1, 1}; | std::vector<size_t> local = {1, 1}; | ||||
| std::vector<size_t> global = {static_cast<size_t>(global_size.s[1]), static_cast<size_t>(global_size.s[2])}; | std::vector<size_t> global = {static_cast<size_t>(global_size.s[1]), static_cast<size_t>(global_size.s[2])}; | ||||
| auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; | MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -23,7 +23,6 @@ | |||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -13,13 +13,13 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/opencl/kernel/concat.h" | |||||
| #include <cstring> | #include <cstring> | ||||
| #include <string> | #include <string> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <set> | #include <set> | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/concat.h" | |||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include "src/runtime/kernel/opencl/cl/concat.cl.inc" | #include "src/runtime/kernel/opencl/cl/concat.cl.inc" | ||||
| @@ -40,8 +40,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) | |||||
| im_dst_x = out_tensors_[0]->Width(); | im_dst_x = out_tensors_[0]->Width(); | ||||
| } | } | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| } | } | ||||
| @@ -52,8 +51,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) | |||||
| } | } | ||||
| int ConcatOpenCLKernel::RunAxis0() { | int ConcatOpenCLKernel::RunAxis0() { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto allocator_ = ocl_runtime->GetAllocator(); | |||||
| auto allocator_ = ocl_runtime_->GetAllocator(); | |||||
| std::vector<size_t> img_size; | std::vector<size_t> img_size; | ||||
| auto dst_data = out_tensors_[0]->data_c(); | auto dst_data = out_tensors_[0]->data_c(); | ||||
| auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | ||||
| @@ -64,7 +62,7 @@ int ConcatOpenCLKernel::RunAxis0() { | |||||
| auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | ||||
| auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1}; | auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1}; | ||||
| cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data)); | cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data)); | ||||
| ocl_runtime->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region); | |||||
| ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region); | |||||
| dst_origin[1] += region[1]; | dst_origin[1] += region[1]; | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -112,9 +110,8 @@ int ConcatOpenCLKernel::Init() { | |||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = concat_source; | std::string source = concat_source; | ||||
| std::string program_name = "Concat"; | std::string program_name = "Concat"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -155,7 +152,6 @@ int ConcatOpenCLKernel::Run() { | |||||
| return RunAxis0(); | return RunAxis0(); | ||||
| } | } | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto input1_shape = in_tensors_[0]->shape(); | auto input1_shape = in_tensors_[0]->shape(); | ||||
| auto input2_shape = in_tensors_[1]->shape(); | auto input2_shape = in_tensors_[1]->shape(); | ||||
| auto output_shape = out_tensors_[0]->shape(); | auto output_shape = out_tensors_[0]->shape(); | ||||
| @@ -168,7 +164,7 @@ int ConcatOpenCLKernel::Run() { | |||||
| uint32_t OW = output_shape[2]; | uint32_t OW = output_shape[2]; | ||||
| uint32_t OC = UP_DIV(output_shape[3], C4NUM); | uint32_t OC = UP_DIV(output_shape[3], C4NUM); | ||||
| const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize(); | |||||
| const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | |||||
| std::vector<size_t> local = {1, 1, 1}; // init local | std::vector<size_t> local = {1, 1, 1}; // init local | ||||
| std::vector<size_t> global = {OH, OW, OC}; | std::vector<size_t> global = {OH, OW, OC}; | ||||
| ConcatGetWorkGroup(global, &local, max_global[0]); | ConcatGetWorkGroup(global, &local, max_global[0]); | ||||
| @@ -176,48 +172,48 @@ int ConcatOpenCLKernel::Run() { | |||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| if (in_tensors_.size() == 2) { | if (in_tensors_.size() == 2) { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| } else if (in_tensors_.size() == 3) { | } else if (in_tensors_.size() == 3) { | ||||
| auto input3_shape = in_tensors_[2]->shape(); | auto input3_shape = in_tensors_[2]->shape(); | ||||
| cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| } else if (in_tensors_.size() == 4) { | } else if (in_tensors_.size() == 4) { | ||||
| auto input3_shape = in_tensors_[2]->shape(); | auto input3_shape = in_tensors_[2]->shape(); | ||||
| auto input4_shape = in_tensors_[3]->shape(); | auto input4_shape = in_tensors_[3]->shape(); | ||||
| cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | ||||
| cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)}; | cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape4_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape4_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << " input sizes must 2 or 3 or 4"; | MS_LOG(ERROR) << " input sizes must 2 or 3 or 4"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/arm/base/concat_base.h" | #include "src/runtime/kernel/arm/base/concat_base.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -14,12 +14,11 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h" | |||||
| #include <string> | #include <string> | ||||
| #include <set> | #include <set> | ||||
| #include "nnacl/fp32/common_func.h" | #include "nnacl/fp32/common_func.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h" | |||||
| #ifndef PROGRAM_WITH_IL | #ifndef PROGRAM_WITH_IL | ||||
| #include "src/runtime/kernel/opencl/cl/conv2d_transpose2x2.cl.inc" | #include "src/runtime/kernel/opencl/cl/conv2d_transpose2x2.cl.inc" | ||||
| #endif | #endif | ||||
| @@ -41,16 +40,15 @@ int Conv2dTransposeOpenCLKernel::Init() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::string kernel_name = "conv2d_transpose2x2_" + std::string(EnumNameFormat(op_format_)); | std::string kernel_name = "conv2d_transpose2x2_" + std::string(EnumNameFormat(op_format_)); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| std::string source = conv2d_transpose2x2_source; | std::string source = conv2d_transpose2x2_source; | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string program_name = "conv2d_transpose2x2"; | std::string program_name = "conv2d_transpose2x2"; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| PadWeight(); | PadWeight(); | ||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| @@ -71,7 +69,7 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { | |||||
| int kw = param->kernel_w_; | int kw = param->kernel_w_; | ||||
| int div_ci = UP_DIV(ci, C4NUM); | int div_ci = UP_DIV(ci, C4NUM); | ||||
| int div_co = UP_DIV(co, C4NUM); | int div_co = UP_DIV(co, C4NUM); | ||||
| auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| auto data_size = enable_fp16_ ? sizeof(int16_t) : sizeof(float); | auto data_size = enable_fp16_ ? sizeof(int16_t) : sizeof(float); | ||||
| // IHWO to OHWI4(I)4(O)(converter format is IHWO) | // IHWO to OHWI4(I)4(O)(converter format is IHWO) | ||||
| @@ -188,7 +186,6 @@ int Conv2dTransposeOpenCLKernel::Run() { | |||||
| int ow = out_tensors_[0]->shape()[2]; | int ow = out_tensors_[0]->shape()[2]; | ||||
| int h = in_tensors_[0]->shape()[1]; | int h = in_tensors_[0]->shape()[1]; | ||||
| int w = in_tensors_[0]->shape()[2]; | int w = in_tensors_[0]->shape()[2]; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| // local size should less than MAX_GROUP_SIZE | // local size should less than MAX_GROUP_SIZE | ||||
| std::vector<size_t> local = {16, 1, 16}; | std::vector<size_t> local = {16, 1, 16}; | ||||
| std::vector<size_t> global = {UP_ROUND((size_t)UP_ROUND(oh / 2, 2), local[0]), | std::vector<size_t> global = {UP_ROUND((size_t)UP_ROUND(oh / 2, 2), local[0]), | ||||
| @@ -200,16 +197,16 @@ int Conv2dTransposeOpenCLKernel::Run() { | |||||
| cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1}; | cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1}; | ||||
| cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1}; | cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1}; | ||||
| int arg_cnt = 0; | int arg_cnt = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/conv_parameter.h" | #include "nnacl/conv_parameter.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -39,12 +39,11 @@ constexpr size_t CO_TILE = C4NUM; | |||||
| int ConvolutionOpenCLKernel::Init() { | int ConvolutionOpenCLKernel::Init() { | ||||
| static int init_count = 0; | static int init_count = 0; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto allocator = ocl_runtime->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| auto param = reinterpret_cast<ConvParameter *>(op_parameter_); | auto param = reinterpret_cast<ConvParameter *>(op_parameter_); | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| init_count++; | init_count++; | ||||
| use_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| use_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { | if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { | ||||
| MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; | MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; | ||||
| @@ -76,21 +75,21 @@ int ConvolutionOpenCLKernel::Init() { | |||||
| MS_LOG(DEBUG) << "use winograd"; | MS_LOG(DEBUG) << "use winograd"; | ||||
| std::string program_name; | std::string program_name; | ||||
| program_name = "Winograd4x4To36" + std::to_string(init_count); | program_name = "Winograd4x4To36" + std::to_string(init_count); | ||||
| ocl_runtime->LoadSource(program_name, CodeGenWinograd4x4To36()); | |||||
| ocl_runtime->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options); | |||||
| ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36()); | |||||
| ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options); | |||||
| program_name = "WinogradConvolution" + std::to_string(init_count); | program_name = "WinogradConvolution" + std::to_string(init_count); | ||||
| ocl_runtime->LoadSource(program_name, CodeGenWinogradConvolution()); | |||||
| ocl_runtime->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options); | |||||
| ocl_runtime_->LoadSource(program_name, CodeGenWinogradConvolution()); | |||||
| ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options); | |||||
| program_name = "Winograd36To4x4" + std::to_string(init_count); | program_name = "Winograd36To4x4" + std::to_string(init_count); | ||||
| ocl_runtime->LoadSource(program_name, CodeGenWinograd36To4x4()); | |||||
| ocl_runtime->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options); | |||||
| ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4()); | |||||
| ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options); | |||||
| } else { | } else { | ||||
| std::string program_name = "convolution" + std::to_string(init_count); | std::string program_name = "convolution" + std::to_string(init_count); | ||||
| std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4(); | std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4(); | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_conv_, program_name, "Convolution", build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options); | |||||
| } | } | ||||
| // allocate winograd memory | // allocate winograd memory | ||||
| @@ -167,7 +166,7 @@ int ConvolutionOpenCLKernel::GenerateWinogradWeight() { | |||||
| } | } | ||||
| int ConvolutionOpenCLKernel::InitWeight() { | int ConvolutionOpenCLKernel::InitWeight() { | ||||
| auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| // allocate memory | // allocate memory | ||||
| size_t packed_weight_size; | size_t packed_weight_size; | ||||
| @@ -205,8 +204,7 @@ int ConvolutionOpenCLKernel::InitWeight() { | |||||
| } | } | ||||
| int ConvolutionOpenCLKernel::InitBias() { | int ConvolutionOpenCLKernel::InitBias() { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto allocator = ocl_runtime->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| // align bias from C to C4 | // align bias from C to C4 | ||||
| auto bias_tensor = in_tensors_[2]; | auto bias_tensor = in_tensors_[2]; | ||||
| @@ -272,57 +270,56 @@ int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_s | |||||
| int ConvolutionOpenCLKernel::Run() { | int ConvolutionOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| if (use_winograd_) { | if (use_winograd_) { | ||||
| arg_cn = 0; | arg_cn = 0; | ||||
| cl_int4 _4x4to36_in_shape = {1, IH_, IW_, CI_SLICES_}; | cl_int4 _4x4to36_in_shape = {1, IH_, IW_, CI_SLICES_}; | ||||
| cl_int4 _4x4to36_out_shape = {1, 36, TILES_XY_, CI_SLICES_}; | cl_int4 _4x4to36_out_shape = {1, 36, TILES_XY_, CI_SLICES_}; | ||||
| ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape); | |||||
| arg_cn = 0; | arg_cn = 0; | ||||
| cl_int4 conv_in_shape = {1, 36, TILES_XY_, CI_SLICES_}; | cl_int4 conv_in_shape = {1, 36, TILES_XY_, CI_SLICES_}; | ||||
| cl_int4 conv_out_shape = {1, 36, TILES_XY_, CO_SLICES_}; | cl_int4 conv_out_shape = {1, 36, TILES_XY_, CO_SLICES_}; | ||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape); | |||||
| arg_cn = 0; | arg_cn = 0; | ||||
| cl_int4 _36to4x4_in_shape = {1, 16, TILES_XY_, CO_SLICES_}; | cl_int4 _36to4x4_in_shape = {1, 16, TILES_XY_, CO_SLICES_}; | ||||
| cl_int4 _36to4x4_out_shape = {1, OH_, OW_, CO_SLICES_}; | cl_int4 _36to4x4_out_shape = {1, OH_, OW_, CO_SLICES_}; | ||||
| ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape); | |||||
| } else { | } else { | ||||
| arg_cn = 0; | arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); | |||||
| if (op_format_ == Format_NC4HW4) { | if (op_format_ == Format_NC4HW4) { | ||||
| cl_int4 input_shape = {1, IH_, IW_, CI_SLICES_}; | cl_int4 input_shape = {1, IH_, IW_, CI_SLICES_}; | ||||
| cl_int4 output_shape = {1, OH_, OW_, CO_SLICES_}; | cl_int4 output_shape = {1, OH_, OW_, CO_SLICES_}; | ||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, input_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, output_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, input_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, output_shape); | |||||
| } | } | ||||
| } | } | ||||
| if (use_winograd_) { | if (use_winograd_) { | ||||
| ocl_runtime->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr); | |||||
| ocl_runtime->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr); | |||||
| ocl_runtime->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr); | |||||
| } else { | } else { | ||||
| std::vector<size_t> global, local; | std::vector<size_t> global, local; | ||||
| SetGlobalLocalConv(&global, &local); | SetGlobalLocalConv(&global, &local); | ||||
| ocl_runtime->RunKernel(kernel_conv_, global, local, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_conv_, global, local, nullptr); | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -819,10 +816,9 @@ std::string ConvolutionOpenCLKernel::CodeGenWinograd36To4x4() { | |||||
| } | } | ||||
| int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector<size_t> *global, std::vector<size_t> *local) { | int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector<size_t> *global, std::vector<size_t> *local) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| constexpr size_t work_group_size[] = {4, 4, 1}; | constexpr size_t work_group_size[] = {4, 4, 1}; | ||||
| auto max_work_item_sizes = ocl_runtime->GetWorkItemSize(); | |||||
| size_t max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime->Device())()); | |||||
| auto max_work_item_sizes = ocl_runtime_->GetWorkItemSize(); | |||||
| size_t max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime_->Device())()); | |||||
| const size_t max_z_size = std::min<size_t>(16, max_work_item_sizes[2]); | const size_t max_z_size = std::min<size_t>(16, max_work_item_sizes[2]); | ||||
| size_t global_h = UP_DIV(OH_, work_group_size[0]) * work_group_size[0]; | size_t global_h = UP_DIV(OH_, work_group_size[0]) * work_group_size[0]; | ||||
| @@ -22,7 +22,6 @@ | |||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "nnacl/conv_parameter.h" | #include "nnacl/conv_parameter.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include <map> | #include <map> | ||||
| #include <utility> | #include <utility> | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include "nnacl/fp32/common_func.h" | #include "nnacl/fp32/common_func.h" | ||||
| #include "nnacl/op_base.h" | #include "nnacl/op_base.h" | ||||
| @@ -42,7 +41,6 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int DepthwiseConv2dOpenCLKernel::Init() { | int DepthwiseConv2dOpenCLKernel::Init() { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::string kernel_name = "DepthwiseConv2d"; | std::string kernel_name = "DepthwiseConv2d"; | ||||
| auto in_format = op_format_; | auto in_format = op_format_; | ||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| @@ -69,13 +67,13 @@ int DepthwiseConv2dOpenCLKernel::Init() { | |||||
| kernel_name += "_1x1"; | kernel_name += "_1x1"; | ||||
| } | } | ||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| std::string program_name = "DepthwiseConv2d"; | std::string program_name = "DepthwiseConv2d"; | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = depthwise_conv2d_source; | std::string source = depthwise_conv2d_source; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| this->InitBuffer(); | this->InitBuffer(); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_); | MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_); | ||||
| @@ -84,9 +82,8 @@ int DepthwiseConv2dOpenCLKernel::Init() { | |||||
| int DepthwiseConv2dOpenCLKernel::InitBuffer() { | int DepthwiseConv2dOpenCLKernel::InitBuffer() { | ||||
| auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); | auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto allocator = ocl_runtime->GetAllocator(); | |||||
| bool is_fp16 = ocl_runtime->GetFp16Enable(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| bool is_fp16 = ocl_runtime_->GetFp16Enable(); | |||||
| // weight: o, h, w, i; o == group, i == 1 | // weight: o, h, w, i; o == group, i == 1 | ||||
| void *origin_weight = in_tensors_.at(kWeightIndex)->data_c(); | void *origin_weight = in_tensors_.at(kWeightIndex)->data_c(); | ||||
| @@ -162,7 +159,7 @@ int DepthwiseConv2dOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *i | |||||
| im_dst_x = out_tensors_[0]->Width(); | im_dst_x = out_tensors_[0]->Width(); | ||||
| } | } | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| if (lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) { | |||||
| if (ocl_runtime_->GetFp16Enable()) { | |||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| } | } | ||||
| img_size->clear(); | img_size->clear(); | ||||
| @@ -189,7 +186,6 @@ int DepthwiseConv2dOpenCLKernel::GetLocalSize(size_t idx, const std::vector<size | |||||
| int DepthwiseConv2dOpenCLKernel::Run() { | int DepthwiseConv2dOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); | auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | ||||
| size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); | size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); | ||||
| std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; | std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; | ||||
| @@ -207,19 +203,19 @@ int DepthwiseConv2dOpenCLKernel::Run() { | |||||
| (cl_int)out_tensors_[0]->Batch()}; | (cl_int)out_tensors_[0]->Batch()}; | ||||
| int arg_cnt = 0; | int arg_cnt = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dilation); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "nnacl/conv_parameter.h" | #include "nnacl/conv_parameter.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <utility> | #include <utility> | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/gather.h" | #include "src/runtime/kernel/opencl/kernel/gather.h" | ||||
| #include "src/runtime/kernel/opencl/cl/gather.cl.inc" | #include "src/runtime/kernel/opencl/cl/gather.cl.inc" | ||||
| @@ -49,9 +48,8 @@ int GatherOpenCLKernel::Init() { | |||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = gather_source; | std::string source = gather_source; | ||||
| std::string program_name = "gather"; | std::string program_name = "gather"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| // init indices_data_ | // init indices_data_ | ||||
| auto indices_tensor = in_tensors_.at(1); | auto indices_tensor = in_tensors_.at(1); | ||||
| int indices_num = indices_tensor->ElementsNum(); | int indices_num = indices_tensor->ElementsNum(); | ||||
| @@ -104,8 +102,7 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) | |||||
| im_dst_x = out_tensors_[0]->Width(); | im_dst_x = out_tensors_[0]->Width(); | ||||
| } | } | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| } | } | ||||
| @@ -117,7 +114,6 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) | |||||
| int GatherOpenCLKernel::Run() { | int GatherOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running! "; | MS_LOG(DEBUG) << this->name() << " Running! "; | ||||
| auto param = reinterpret_cast<GatherParameter *>(this->op_parameter_); | auto param = reinterpret_cast<GatherParameter *>(this->op_parameter_); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| if (InitBuffer() != RET_OK) { | if (InitBuffer() != RET_OK) { | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -134,14 +130,14 @@ int GatherOpenCLKernel::Run() { | |||||
| std::vector<size_t> local = {1, 1, 1}; | std::vector<size_t> local = {1, 1, 1}; | ||||
| std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; | std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; | ||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, src_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, dst_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_num); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, src_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dst_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_num); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "nnacl/gather_parameter.h" | #include "nnacl/gather_parameter.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <map> | #include <map> | ||||
| #include "nnacl/fp32/common_func.h" | #include "nnacl/fp32/common_func.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/matmul.h" | #include "src/runtime/kernel/opencl/kernel/matmul.h" | ||||
| #ifndef PROGRAM_WITH_IL | #ifndef PROGRAM_WITH_IL | ||||
| #include "src/runtime/kernel/opencl/cl/matmul.cl.inc" | #include "src/runtime/kernel/opencl/cl/matmul.cl.inc" | ||||
| @@ -35,7 +34,6 @@ namespace mindspore::kernel { | |||||
| int MatMulOpenCLKernel::Init() { | int MatMulOpenCLKernel::Init() { | ||||
| std::string kernel_name = "MatMul"; | std::string kernel_name = "MatMul"; | ||||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto param = reinterpret_cast<MatMulParameter *>(op_parameter_); | auto param = reinterpret_cast<MatMulParameter *>(op_parameter_); | ||||
| transposeA = param->a_transpose_; | transposeA = param->a_transpose_; | ||||
| if (transposeA) { | if (transposeA) { | ||||
| @@ -43,7 +41,7 @@ int MatMulOpenCLKernel::Init() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| transposeB = param->b_transpose_; | transposeB = param->b_transpose_; | ||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (in_tensors_[0]->shape().size() != out_tensors_[0]->shape().size() || | if (in_tensors_[0]->shape().size() != out_tensors_[0]->shape().size() || | ||||
| (in_tensors_[0]->shape().size() != 2 && in_tensors_[0]->shape().size() != 4)) { | (in_tensors_[0]->shape().size() != 2 && in_tensors_[0]->shape().size() != 4)) { | ||||
| MS_LOG(ERROR) << "matmul only support input shape size=2 or 4."; | MS_LOG(ERROR) << "matmul only support input shape size=2 or 4."; | ||||
| @@ -57,13 +55,13 @@ int MatMulOpenCLKernel::Init() { | |||||
| std::map<int, std::string> dims2str = {{2, "_2d"}, {4, "_4d"}}; | std::map<int, std::string> dims2str = {{2, "_2d"}, {4, "_4d"}}; | ||||
| kernel_name += dims2str[dims]; | kernel_name += dims2str[dims]; | ||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = matmul_source; | std::string source = matmul_source; | ||||
| std::string program_name = "MatMul"; | std::string program_name = "MatMul"; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| PadWeight(); | PadWeight(); | ||||
| @@ -79,7 +77,7 @@ int MatMulOpenCLKernel::ReSize() { return RET_OK; } | |||||
| void MatMulOpenCLKernel::PadWeight() { | void MatMulOpenCLKernel::PadWeight() { | ||||
| // ABMCI @ ABCICO = ABMCO | // ABMCI @ ABCICO = ABMCO | ||||
| auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| int ci = inShape[3]; | int ci = inShape[3]; | ||||
| int ci4 = UP_DIV(ci, C4NUM); | int ci4 = UP_DIV(ci, C4NUM); | ||||
| int co = outShape[3]; | int co = outShape[3]; | ||||
| @@ -201,7 +199,6 @@ int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) | |||||
| int MatMulOpenCLKernel::Run() { | int MatMulOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| // local size should less than MAX_GROUP_SIZE | // local size should less than MAX_GROUP_SIZE | ||||
| std::vector<size_t> local = {32, 4, 1}; | std::vector<size_t> local = {32, 4, 1}; | ||||
| std::vector<size_t> global = {UP_DIV(static_cast<size_t>(outShape[3]), C4NUM), | std::vector<size_t> global = {UP_DIV(static_cast<size_t>(outShape[3]), C4NUM), | ||||
| @@ -210,14 +207,14 @@ int MatMulOpenCLKernel::Run() { | |||||
| int arg_count = 0; | int arg_count = 0; | ||||
| cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; | cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; | ||||
| cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; | cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, in_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, out_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "nnacl/matmul_parameter.h" | #include "nnacl/matmul_parameter.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -20,8 +20,6 @@ | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include "src/runtime/opencl/opencl_wrapper.h" | |||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/image_format.h" | #include "src/runtime/kernel/opencl/image_format.h" | ||||
| #ifndef PROGRAM_WITH_IL | #ifndef PROGRAM_WITH_IL | ||||
| #include "src/runtime/kernel/opencl/cl/avg_pool2d.cl.inc" | #include "src/runtime/kernel/opencl/cl/avg_pool2d.cl.inc" | ||||
| @@ -59,10 +57,9 @@ int PoolingOpenCLKernel::Init() { | |||||
| MS_LOG(ERROR) << "Init `Pooling2d` kernel failed!"; | MS_LOG(ERROR) << "Init `Pooling2d` kernel failed!"; | ||||
| return RET_INVALID_OP_NAME; | return RET_INVALID_OP_NAME; | ||||
| } | } | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | ||||
| if (out_mem_type_ == OpenCLMemType::BUF) { | if (out_mem_type_ == OpenCLMemType::BUF) { | ||||
| @@ -72,8 +69,8 @@ int PoolingOpenCLKernel::Init() { | |||||
| kernel_name += "_IMG"; | kernel_name += "_IMG"; | ||||
| } | } | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | out_ori_format_ = out_tensors_[0]->GetFormat(); | ||||
| @@ -124,7 +121,6 @@ int PoolingOpenCLKernel::ReSize() { return RET_OK; } | |||||
| int PoolingOpenCLKernel::Run() { | int PoolingOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); | int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); | ||||
| cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; | cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; | ||||
| @@ -135,21 +131,21 @@ int PoolingOpenCLKernel::Run() { | |||||
| cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; | cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, stride); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding); | |||||
| std::vector<size_t> local_size; | std::vector<size_t> local_size; | ||||
| std::vector<size_t> global_size = InitGlobalSize(); | std::vector<size_t> global_size = InitGlobalSize(); | ||||
| int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())()); | |||||
| int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())()); | |||||
| local_size = GetCommonLocalSize(global_size, max_work_group_size); | local_size = GetCommonLocalSize(global_size, max_work_group_size); | ||||
| global_size = GetCommonGlobalSize(local_size, global_size); | global_size = GetCommonGlobalSize(local_size, global_size); | ||||
| ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_, global_size, local_size, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "nnacl/fp32/pooling.h" | #include "nnacl/fp32/pooling.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -24,7 +24,6 @@ | |||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "nnacl/fp32/common_func.h" | #include "nnacl/fp32/common_func.h" | ||||
| #include "src/runtime/kernel/opencl/kernel/prelu.h" | #include "src/runtime/kernel/opencl/kernel/prelu.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/cl/prelu.cl.inc" | #include "src/runtime/kernel/opencl/cl/prelu.cl.inc" | ||||
| using mindspore::kernel::KERNEL_ARCH::kGPU; | using mindspore::kernel::KERNEL_ARCH::kGPU; | ||||
| @@ -36,7 +35,7 @@ using mindspore::schema::PrimitiveType_PReLU; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| void PReluOpenCLKernel::InitBuffer() { | void PReluOpenCLKernel::InitBuffer() { | ||||
| auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| int elem_num = in_tensors_[0]->shape().size() == 2 ? in_tensors_[0]->shape()[1] : in_tensors_[0]->shape()[3]; | int elem_num = in_tensors_[0]->shape().size() == 2 ? in_tensors_[0]->shape()[1] : in_tensors_[0]->shape()[3]; | ||||
| int elem_num_c4 = UP_DIV(elem_num, C4NUM); | int elem_num_c4 = UP_DIV(elem_num, C4NUM); | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| @@ -91,12 +90,11 @@ int PReluOpenCLKernel::Init() { | |||||
| std::string source = prelu_source; | std::string source = prelu_source; | ||||
| std::string program_name = "PRelu"; | std::string program_name = "PRelu"; | ||||
| std::string kernel_name = "PRelu"; | std::string kernel_name = "PRelu"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); | fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); | ||||
| InitBuffer(); | InitBuffer(); | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| in_tensors_[0]->SetFormat(op_format_); | in_tensors_[0]->SetFormat(op_format_); | ||||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | out_ori_format_ = out_tensors_[0]->GetFormat(); | ||||
| @@ -107,18 +105,17 @@ int PReluOpenCLKernel::Init() { | |||||
| int PReluOpenCLKernel::Run() { | int PReluOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; | MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::map<schema::Format, int> data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}}; | std::map<schema::Format, int> data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, reinterpret_cast<int>(in_tensors_[1]->shape()[0])); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, PReluWeight_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, reinterpret_cast<int>(in_tensors_[1]->shape()[0])); | |||||
| std::vector<size_t> local = {1, 1}; | std::vector<size_t> local = {1, 1}; | ||||
| std::vector<size_t> global = {static_cast<size_t>(global_shape_.s[1]), static_cast<size_t>(global_shape_.s[2])}; | std::vector<size_t> global = {static_cast<size_t>(global_shape_.s[1]), static_cast<size_t>(global_shape_.s[2])}; | ||||
| auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; | MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -22,7 +22,6 @@ | |||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <map> | #include <map> | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/reduce.h" | #include "src/runtime/kernel/opencl/kernel/reduce.h" | ||||
| #include "src/runtime/kernel/opencl/cl/reduce.cl.inc" | #include "src/runtime/kernel/opencl/cl/reduce.cl.inc" | ||||
| @@ -59,8 +58,7 @@ int ReduceOpenCLKernel::Init() { | |||||
| } | } | ||||
| std::string kernel_name = reduce_type2str.at(reduce_param->mode_); | std::string kernel_name = reduce_type2str.at(reduce_param->mode_); | ||||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { | if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { | ||||
| MS_LOG(ERROR) << "Reduce input channel " << in_tensors_[0]->shape().back() << " should equal output channel" | MS_LOG(ERROR) << "Reduce input channel " << in_tensors_[0]->shape().back() << " should equal output channel" | ||||
| @@ -68,12 +66,12 @@ int ReduceOpenCLKernel::Init() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = reduce_source; | std::string source = reduce_source; | ||||
| ocl_runtime->LoadSource(kernel_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, kernel_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(kernel_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, kernel_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | out_ori_format_ = out_tensors_[0]->GetFormat(); | ||||
| @@ -130,15 +128,14 @@ int ReduceOpenCLKernel::Run() { | |||||
| int w = shapex[2]; | int w = shapex[2]; | ||||
| int c = shapex[3]; | int c = shapex[3]; | ||||
| int c4 = UP_DIV(c, C4NUM); | int c4 = UP_DIV(c, C4NUM); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::vector<size_t> local = {}; | std::vector<size_t> local = {}; | ||||
| std::vector<size_t> global = {static_cast<size_t>(c4)}; | std::vector<size_t> global = {static_cast<size_t>(c4)}; | ||||
| cl_int4 size = {h, w, c4, 1}; | cl_int4 size = {h, w, c4, 1}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "nnacl/reduce_parameter.h" | #include "nnacl/reduce_parameter.h" | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/reshape.h" | #include "src/runtime/kernel/opencl/kernel/reshape.h" | ||||
| #include "src/runtime/kernel/opencl/cl/reshape.cl.inc" | #include "src/runtime/kernel/opencl/cl/reshape.cl.inc" | ||||
| @@ -34,8 +33,7 @@ namespace mindspore::kernel { | |||||
| int ReshapeOpenCLKernel::Init() { | int ReshapeOpenCLKernel::Init() { | ||||
| std::string kernel_name = "reshape"; | std::string kernel_name = "reshape"; | ||||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) { | if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) { | ||||
| MS_LOG(ERROR) << "Reshape output size should in 2,4"; | MS_LOG(ERROR) << "Reshape output size should in 2,4"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -46,13 +44,13 @@ int ReshapeOpenCLKernel::Init() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = reshape_source; | std::string source = reshape_source; | ||||
| std::string program_name = "reshape"; | std::string program_name = "reshape"; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | out_ori_format_ = out_tensors_[0]->GetFormat(); | ||||
| @@ -112,17 +110,16 @@ int ReshapeOpenCLKernel::Run() { | |||||
| oh = out_tensors_[0]->shape()[1]; | oh = out_tensors_[0]->shape()[1]; | ||||
| ow = out_tensors_[0]->shape()[2]; | ow = out_tensors_[0]->shape()[2]; | ||||
| } | } | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::vector<size_t> local = {}; | std::vector<size_t> local = {}; | ||||
| std::vector<size_t> global = {(size_t)oh, (size_t)ow, (size_t)c4}; | std::vector<size_t> global = {(size_t)oh, (size_t)ow, (size_t)c4}; | ||||
| cl_int4 size = {h, w, c4, 1}; | cl_int4 size = {h, w, c4, 1}; | ||||
| cl_int4 size_out = {oh, ow, c4, 1}; | cl_int4 size_out = {oh, ow, c4, 1}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, size_out); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size_out); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -245,7 +245,6 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| } | } | ||||
| int ScaleOpenCLKernel::Init() { | int ScaleOpenCLKernel::Init() { | ||||
| ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::string kernel_name; | std::string kernel_name; | ||||
| const ScaleParameter *scale_param = reinterpret_cast<const ScaleParameter *>(op_parameter_); | const ScaleParameter *scale_param = reinterpret_cast<const ScaleParameter *>(op_parameter_); | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "nnacl/scale.h" | #include "nnacl/scale.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -42,7 +41,6 @@ class ScaleOpenCLKernel : public OpenCLKernel { | |||||
| int InitBuffer(); | int InitBuffer(); | ||||
| cl::Kernel kernel_; | cl::Kernel kernel_; | ||||
| lite::opencl::OpenCLRuntime *ocl_runtime_; | |||||
| bool element_flag_{true}; | bool element_flag_{true}; | ||||
| void *scale_ptr_{nullptr}; | void *scale_ptr_{nullptr}; | ||||
| void *offset_ptr_{nullptr}; | void *offset_ptr_{nullptr}; | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <set> | #include <set> | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/slice.h" | #include "src/runtime/kernel/opencl/kernel/slice.h" | ||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include "src/runtime/kernel/opencl/cl/slice.cl.inc" | #include "src/runtime/kernel/opencl/cl/slice.cl.inc" | ||||
| @@ -40,8 +39,7 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||||
| im_dst_x = out_tensors_[0]->Width(); | im_dst_x = out_tensors_[0]->Width(); | ||||
| } | } | ||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| } | } | ||||
| @@ -71,9 +69,8 @@ int SliceOpenCLKernel::Init() { | |||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = slice_source; | std::string source = slice_source; | ||||
| std::string program_name = "slice"; | std::string program_name = "slice"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -96,7 +93,6 @@ void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l | |||||
| int SliceOpenCLKernel::Run() { | int SliceOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running! "; | MS_LOG(DEBUG) << this->name() << " Running! "; | ||||
| auto param = reinterpret_cast<SliceParameter *>(this->op_parameter_); | auto param = reinterpret_cast<SliceParameter *>(this->op_parameter_); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto input_shape = in_tensors_[0]->shape(); | auto input_shape = in_tensors_[0]->shape(); | ||||
| cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)}; | cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)}; | ||||
| cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)}; | cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)}; | ||||
| @@ -105,18 +101,18 @@ int SliceOpenCLKernel::Run() { | |||||
| uint32_t OH = param->size_[1]; | uint32_t OH = param->size_[1]; | ||||
| uint32_t OW = param->size_[2]; | uint32_t OW = param->size_[2]; | ||||
| const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize(); | |||||
| const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | |||||
| std::vector<size_t> local = {1, 1, 1}; // init local | std::vector<size_t> local = {1, 1, 1}; // init local | ||||
| std::vector<size_t> global = {1, OH, OW}; | std::vector<size_t> global = {1, OH, OW}; | ||||
| SlcieGetWorkGroup(global, &local, max_global[0]); | SlcieGetWorkGroup(global, &local, max_global[0]); | ||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, size_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, begin_); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "ir/anf.h" | #include "ir/anf.h" | ||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "nnacl/fp32/slice.h" | #include "nnacl/fp32/slice.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <set> | #include <set> | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #ifndef PROGRAM_WITH_IL | #ifndef PROGRAM_WITH_IL | ||||
| #include "src/runtime/kernel/opencl/cl/softmax.cl.inc" | #include "src/runtime/kernel/opencl/cl/softmax.cl.inc" | ||||
| @@ -51,7 +50,7 @@ int SoftmaxOpenCLKernel::InitGlobalSize() { | |||||
| int SoftmaxOpenCLKernel::SetWorkGroupSize() { | int SoftmaxOpenCLKernel::SetWorkGroupSize() { | ||||
| // set work group size | // set work group size | ||||
| InitGlobalSize(); | InitGlobalSize(); | ||||
| int max_work_group_size = runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*runtime_->Device())()); | |||||
| int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())()); | |||||
| local_size_ = GetCommonLocalSize(global_size_, max_work_group_size); | local_size_ = GetCommonLocalSize(global_size_, max_work_group_size); | ||||
| global_size_ = GetCommonGlobalSize(local_size_, global_size_); | global_size_ = GetCommonGlobalSize(local_size_, global_size_); | ||||
| return lite::RET_OK; | return lite::RET_OK; | ||||
| @@ -101,8 +100,7 @@ int SoftmaxOpenCLKernel::Init() { | |||||
| std::string program_name = "SoftMax"; | std::string program_name = "SoftMax"; | ||||
| std::string source = softmax_source; | std::string source = softmax_source; | ||||
| runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = runtime_->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| // framework not set this param yet! just use default. | // framework not set this param yet! just use default. | ||||
| if (in_tensors_[0]->shape().size() == 4) { | if (in_tensors_[0]->shape().size() == 4) { | ||||
| // support 4d tensor | // support 4d tensor | ||||
| @@ -133,8 +131,8 @@ int SoftmaxOpenCLKernel::Init() { | |||||
| program_name += "_IMG"; | program_name += "_IMG"; | ||||
| } | } | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| runtime_->LoadSource(program_name, source); | |||||
| runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | in_ori_format_ = in_tensors_[0]->GetFormat(); | ||||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | out_ori_format_ = out_tensors_[0]->GetFormat(); | ||||
| @@ -158,32 +156,32 @@ int SoftmaxOpenCLKernel::Run() { | |||||
| auto mask_ = GetMaskForLastChannel(channel_size); | auto mask_ = GetMaskForLastChannel(channel_size); | ||||
| cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; | cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (is_image_out_) { | if (is_image_out_) { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| } else { | } else { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, mask); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, slices); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx, slices_x32); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, slices); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx, slices_x32); | |||||
| SetWorkGroupSize1x1(); | SetWorkGroupSize1x1(); | ||||
| } else { | } else { | ||||
| int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); | int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); | ||||
| cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; | cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (is_image_out_) { | if (is_image_out_) { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| } else { | } else { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx, input_shape); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape); | |||||
| SetWorkGroupSize(); | SetWorkGroupSize(); | ||||
| } | } | ||||
| // run opengl kernel | // run opengl kernel | ||||
| runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); | |||||
| ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); | |||||
| return lite::RET_OK; | return lite::RET_OK; | ||||
| } | } | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| #include "nnacl/fp32/softmax.h" | #include "nnacl/fp32/softmax.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -46,7 +45,6 @@ class SoftmaxOpenCLKernel : public OpenCLKernel { | |||||
| private: | private: | ||||
| cl::Kernel kernel_; | cl::Kernel kernel_; | ||||
| SoftmaxParameter *parameter_; | SoftmaxParameter *parameter_; | ||||
| lite::opencl::OpenCLRuntime *runtime_; | |||||
| bool onexone_flag_{false}; | bool onexone_flag_{false}; | ||||
| std::vector<size_t> local_size_; | std::vector<size_t> local_size_; | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include <utility> | #include <utility> | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/cl/to_format.cl.inc" | #include "src/runtime/kernel/opencl/cl/to_format.cl.inc" | ||||
| using mindspore::kernel::KERNEL_ARCH::kGPU; | using mindspore::kernel::KERNEL_ARCH::kGPU; | ||||
| @@ -33,7 +32,6 @@ using mindspore::schema::PrimitiveType_ToFormat; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ToFormatOpenCLKernel::Init() { | int ToFormatOpenCLKernel::Init() { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto parameter = reinterpret_cast<OpenCLToFormatParameter *>(op_parameter_); | auto parameter = reinterpret_cast<OpenCLToFormatParameter *>(op_parameter_); | ||||
| out_mem_type_ = parameter->out_mem_type; | out_mem_type_ = parameter->out_mem_type; | ||||
| std::string program_name = "to_format"; | std::string program_name = "to_format"; | ||||
| @@ -53,12 +51,12 @@ int ToFormatOpenCLKernel::Init() { | |||||
| this->set_name(kernel_name); | this->set_name(kernel_name); | ||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = to_format_source; | std::string source = to_format_source; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| InitNHWCShape(); | InitNHWCShape(); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| @@ -147,7 +145,7 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| img_size->clear(); | img_size->clear(); | ||||
| auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable(); | |||||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| size_t img_dtype = CL_FLOAT; | size_t img_dtype = CL_FLOAT; | ||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| img_dtype = CL_HALF_FLOAT; | img_dtype = CL_HALF_FLOAT; | ||||
| @@ -158,7 +156,6 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size | |||||
| } | } | ||||
| int ToFormatOpenCLKernel::Run() { | int ToFormatOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::vector<size_t> local = {}; | std::vector<size_t> local = {}; | ||||
| std::vector<size_t> global; | std::vector<size_t> global; | ||||
| GetGlobalSize(0, &global); | GetGlobalSize(0, &global); | ||||
| @@ -167,11 +164,11 @@ int ToFormatOpenCLKernel::Run() { | |||||
| cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1}; | cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1}; | ||||
| auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; | auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; | ||||
| auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF; | auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF; | ||||
| ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type); | |||||
| ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type); | |||||
| ocl_runtime->SetKernelArg(kernel_, 2, gsize); | |||||
| ocl_runtime->SetKernelArg(kernel_, 3, shape); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type); | |||||
| ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type); | |||||
| ocl_runtime_->SetKernelArg(kernel_, 2, gsize); | |||||
| ocl_runtime_->SetKernelArg(kernel_, 3, shape); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -18,7 +18,6 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/kernel/transpose.h" | #include "src/runtime/kernel/opencl/kernel/transpose.h" | ||||
| #ifndef PROGRAM_WITH_IL | #ifndef PROGRAM_WITH_IL | ||||
| #include "src/runtime/kernel/opencl/cl/transpose.cl.inc" | #include "src/runtime/kernel/opencl/cl/transpose.cl.inc" | ||||
| @@ -34,8 +33,7 @@ namespace mindspore::kernel { | |||||
| int TransposeOpenCLKernel::Init() { | int TransposeOpenCLKernel::Init() { | ||||
| std::string kernel_name = "transpose"; | std::string kernel_name = "transpose"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| auto param = reinterpret_cast<TransposeParameter *>(op_parameter_); | auto param = reinterpret_cast<TransposeParameter *>(op_parameter_); | ||||
| if (param->num_axes_ == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 && | if (param->num_axes_ == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 && | ||||
| param->perm_[3] == 2) { | param->perm_[3] == 2) { | ||||
| @@ -52,13 +50,13 @@ int TransposeOpenCLKernel::Init() { | |||||
| kernel_name += "_IMG"; | kernel_name += "_IMG"; | ||||
| } | } | ||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||||
| #else | #else | ||||
| std::set<std::string> build_options; | std::set<std::string> build_options; | ||||
| std::string source = transpose_source; | std::string source = transpose_source; | ||||
| std::string program_name = "transpose"; | std::string program_name = "transpose"; | ||||
| ocl_runtime->LoadSource(program_name, source); | |||||
| ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| ocl_runtime_->LoadSource(program_name, source); | |||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||||
| #endif | #endif | ||||
| if ((in_tensors_[0]->shape()[1] * in_tensors_[0]->shape()[2]) % 4 != 0) { | if ((in_tensors_[0]->shape()[1] * in_tensors_[0]->shape()[2]) % 4 != 0) { | ||||
| MS_LOG(ERROR) << "input H * W % 4 != 0 not support!"; | MS_LOG(ERROR) << "input H * W % 4 != 0 not support!"; | ||||
| @@ -114,24 +112,23 @@ int TransposeOpenCLKernel::Run() { | |||||
| int c = shapex[3]; | int c = shapex[3]; | ||||
| int c4 = UP_DIV(c, 4); | int c4 = UP_DIV(c, 4); | ||||
| int hw4 = UP_DIV(h * w, 4); | int hw4 = UP_DIV(h * w, 4); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| std::vector<size_t> local = {16, 16}; | std::vector<size_t> local = {16, 16}; | ||||
| std::vector<size_t> global = {UP_ROUND(hw4, local[0]), UP_ROUND(c4, local[1])}; | std::vector<size_t> global = {UP_ROUND(hw4, local[0]), UP_ROUND(c4, local[1])}; | ||||
| cl_int2 HW = {h * w, hw4}; | cl_int2 HW = {h * w, hw4}; | ||||
| cl_int2 C = {c, c4}; | cl_int2 C = {c, c4}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (out_mem_type_ == OpenCLMemType::BUF) { | if (out_mem_type_ == OpenCLMemType::BUF) { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| } else { | } else { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| } | } | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, HW); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, C); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, w); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, h); | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, HW); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, C); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, w); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, h); | |||||
| ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -21,7 +21,6 @@ | |||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "nnacl/transpose.h" | #include "nnacl/transpose.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | #include "src/runtime/kernel/opencl/opencl_kernel.h" | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -20,6 +20,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include "src/lite_kernel.h" | #include "src/lite_kernel.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -36,7 +37,16 @@ class OpenCLKernel : public LiteKernel { | |||||
| public: | public: | ||||
| explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs) | const std::vector<lite::Tensor *> &outputs) | ||||
| : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {} | |||||
| : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { | |||||
| ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| } | |||||
| ~OpenCLKernel() { | |||||
| if (ocl_runtime_ != nullptr) { | |||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| ocl_runtime_ = nullptr; | |||||
| } | |||||
| } | |||||
| virtual int Init() { return RET_ERROR; } | virtual int Init() { return RET_ERROR; } | ||||
| virtual int Prepare() { return RET_ERROR; } | virtual int Prepare() { return RET_ERROR; } | ||||
| @@ -59,6 +69,7 @@ class OpenCLKernel : public LiteKernel { | |||||
| schema::Format in_ori_format_{schema::Format::Format_NHWC}; | schema::Format in_ori_format_{schema::Format::Format_NHWC}; | ||||
| schema::Format out_ori_format_{schema::Format::Format_NHWC4}; | schema::Format out_ori_format_{schema::Format::Format_NHWC4}; | ||||
| schema::Format op_format_{schema::Format::Format_NHWC4}; | schema::Format op_format_{schema::Format::Format_NHWC4}; | ||||
| lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -99,7 +99,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||||
| out_tensors->emplace_back(new_tensor); | out_tensors->emplace_back(new_tensor); | ||||
| KernelKey desc{kGPU, kNumberTypeFloat32, schema::PrimitiveType_ToFormat}; | KernelKey desc{kGPU, kNumberTypeFloat32, schema::PrimitiveType_ToFormat}; | ||||
| if (mem_type == OpenCLMemType::IMG && lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) { | |||||
| if (mem_type == OpenCLMemType::IMG && ocl_runtime_->GetFp16Enable()) { | |||||
| desc.data_type = kNumberTypeFloat16; | desc.data_type = kNumberTypeFloat16; | ||||
| new_tensor->set_data_type(kNumberTypeFloat16); | new_tensor->set_data_type(kNumberTypeFloat16); | ||||
| } | } | ||||
| @@ -160,7 +160,8 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||||
| } | } | ||||
| int SubGraphOpenCLKernel::Init() { | int SubGraphOpenCLKernel::Init() { | ||||
| allocator_ = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||||
| ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| allocator_ = ocl_runtime_->GetAllocator(); | |||||
| MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); | MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); | ||||
| for (const auto tensor : in_tensors_) { | for (const auto tensor : in_tensors_) { | ||||
| tensor->set_allocator(allocator_); | tensor->set_allocator(allocator_); | ||||
| @@ -195,8 +196,7 @@ int SubGraphOpenCLKernel::Init() { | |||||
| } | } | ||||
| int SubGraphOpenCLKernel::UpdateTensorDataType() { | int SubGraphOpenCLKernel::UpdateTensorDataType() { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| bool is_fp16 = ocl_runtime->GetFp16Enable(); | |||||
| bool is_fp16 = ocl_runtime_->GetFp16Enable(); | |||||
| if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) { | if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) { | ||||
| std::set<lite::Tensor *> out_set; | std::set<lite::Tensor *> out_set; | ||||
| out_set.insert(in_tensors_.begin(), in_tensors_.end()); | out_set.insert(in_tensors_.begin(), in_tensors_.end()); | ||||
| @@ -292,16 +292,25 @@ int SubGraphOpenCLKernel::UnInit() { | |||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| } | } | ||||
| in_convert_tensors_.clear(); | |||||
| for (const auto &tensor : out_convert_tensors_) { | for (const auto &tensor : out_convert_tensors_) { | ||||
| if (tensor != nullptr) { | if (tensor != nullptr) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| } | } | ||||
| for (const auto &op : in_convert_ops_) { | |||||
| out_convert_tensors_.clear(); | |||||
| for (const auto &op : nodes_) { | |||||
| if (op != nullptr) { | if (op != nullptr) { | ||||
| delete op; | delete op; | ||||
| } | } | ||||
| } | } | ||||
| nodes_.clear(); | |||||
| in_convert_ops_.clear(); | |||||
| out_convert_ops_.clear(); | |||||
| if (ocl_runtime_ != nullptr) { | |||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| ocl_runtime_ = nullptr; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -310,14 +319,13 @@ int SubGraphOpenCLKernel::InferShape() { return RET_OK; } | |||||
| int SubGraphOpenCLKernel::ReSize() { return RET_OK; } | int SubGraphOpenCLKernel::ReSize() { return RET_OK; } | ||||
| int SubGraphOpenCLKernel::Run() { | int SubGraphOpenCLKernel::Run() { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| for (auto &tensor : in_tensors_) { | for (auto &tensor : in_tensors_) { | ||||
| allocator_->UnmapBuffer(tensor->data_c()); | allocator_->UnmapBuffer(tensor->data_c()); | ||||
| } | } | ||||
| lite::opencl::OpenCLExecutor executor; | lite::opencl::OpenCLExecutor executor; | ||||
| executor.Run(in_tensors_, out_tensors_, nodes_, allocator_); | executor.Run(in_tensors_, out_tensors_, nodes_, allocator_); | ||||
| ocl_runtime->SyncCommandQueue(); | |||||
| ocl_runtime_->SyncCommandQueue(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -64,6 +64,7 @@ class SubGraphOpenCLKernel : public SubGraphKernel { | |||||
| std::vector<OpenCLToFormatParameter *> out_parameters_; | std::vector<OpenCLToFormatParameter *> out_parameters_; | ||||
| std::vector<LiteKernel *> in_convert_ops_; | std::vector<LiteKernel *> in_convert_ops_; | ||||
| std::vector<LiteKernel *> out_convert_ops_; | std::vector<LiteKernel *> out_convert_ops_; | ||||
| lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -23,8 +23,6 @@ | |||||
| namespace mindspore::lite::opencl { | namespace mindspore::lite::opencl { | ||||
| OpenCLAllocator::OpenCLAllocator() {} | |||||
| OpenCLAllocator::OpenCLAllocator(OpenCLRuntime *ocl_runtime) : ocl_runtime_(ocl_runtime) {} | OpenCLAllocator::OpenCLAllocator(OpenCLRuntime *ocl_runtime) : ocl_runtime_(ocl_runtime) {} | ||||
| OpenCLAllocator::~OpenCLAllocator() { Clear(); } | OpenCLAllocator::~OpenCLAllocator() { Clear(); } | ||||
| @@ -49,9 +47,6 @@ void OpenCLAllocator::UnLock() { | |||||
| void *OpenCLAllocator::Malloc(size_t size) { return Malloc(size, std::vector<size_t>{}); } | void *OpenCLAllocator::Malloc(size_t size) { return Malloc(size, std::vector<size_t>{}); } | ||||
| void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size) { | void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size) { | ||||
| if (ocl_runtime_ == nullptr) { | |||||
| ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); | |||||
| } | |||||
| auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | ||||
| size_t img_pitch = 0; | size_t img_pitch = 0; | ||||
| @@ -144,9 +139,6 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v | |||||
| MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; | MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| if (ocl_runtime_ == nullptr) { | |||||
| ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); | |||||
| } | |||||
| Lock(); | Lock(); | ||||
| auto iter = free_list_.lower_bound(size); | auto iter = free_list_.lower_bound(size); | ||||
| while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) { | while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) { | ||||
| @@ -258,9 +250,6 @@ void *OpenCLAllocator::GetBuffer(void *buffer) { | |||||
| void OpenCLAllocator::Clear() { | void OpenCLAllocator::Clear() { | ||||
| Lock(); | Lock(); | ||||
| if (ocl_runtime_ == nullptr) { | |||||
| ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); | |||||
| } | |||||
| auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | ||||
| for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) { | for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) { | ||||
| if (svm_capabilities) { | if (svm_capabilities) { | ||||
| @@ -306,9 +295,6 @@ void OpenCLAllocator::Clear() { | |||||
| } | } | ||||
| void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) { | void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) { | ||||
| if (ocl_runtime_ == nullptr) { | |||||
| ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); | |||||
| } | |||||
| auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | ||||
| if (svm_capabilities) { | if (svm_capabilities) { | ||||
| if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { | if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { | ||||
| @@ -362,9 +348,6 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, | |||||
| } | } | ||||
| int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { | int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { | ||||
| if (ocl_runtime_ == nullptr) { | |||||
| ocl_runtime_ = opencl::OpenCLRuntime::GetInstance(); | |||||
| } | |||||
| auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); | ||||
| if (svm_capabilities) { | if (svm_capabilities) { | ||||
| if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { | if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { | ||||
| @@ -45,7 +45,6 @@ enum class MemType : char { SVM, BUF, IMG }; | |||||
| class OpenCLAllocator : public Allocator { | class OpenCLAllocator : public Allocator { | ||||
| public: | public: | ||||
| OpenCLAllocator(); | |||||
| explicit OpenCLAllocator(OpenCLRuntime *ocl_runtime); | explicit OpenCLAllocator(OpenCLRuntime *ocl_runtime); | ||||
| ~OpenCLAllocator() override; | ~OpenCLAllocator() override; | ||||
| void SetContext(const AllocatorContext &ctx) override; | void SetContext(const AllocatorContext &ctx) override; | ||||
| @@ -27,7 +27,11 @@ | |||||
| namespace mindspore::lite::opencl { | namespace mindspore::lite::opencl { | ||||
| class OpenCLExecutor : Executor { | class OpenCLExecutor : Executor { | ||||
| public: | public: | ||||
| OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); } | |||||
| OpenCLExecutor() : Executor() { | |||||
| auto ocl_runtime = OpenCLRuntime::GetInstance(); | |||||
| allocator_ = ocl_runtime->GetAllocator(); | |||||
| OpenCLRuntime::DeleteInstance(); | |||||
| } | |||||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels); | int Prepare(const std::vector<kernel::LiteKernel *> &kernels); | ||||
| @@ -244,7 +244,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens | |||||
| TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); | TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); | ||||
| kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())}; | kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())}; | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| if (context_->device_type_ == DT_GPU && lite::opencl::OpenCLRuntime::GetInstance()->IsInitOK()) { | |||||
| if (context_->device_type_ == DT_GPU) { | |||||
| desc.arch = kernel::KERNEL_ARCH::kGPU; | desc.arch = kernel::KERNEL_ARCH::kGPU; | ||||
| auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc); | auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc); | ||||
| if (kernel != nullptr) { | if (kernel != nullptr) { | ||||
| @@ -157,7 +157,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||||
| ret = sub_graph->Init(); | ret = sub_graph->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init sub_graph error."; | MS_LOG(ERROR) << "Init sub_graph error."; | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -167,7 +166,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||||
| MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | ||||
| ret = sub_graph->Run(); | ret = sub_graph->Run(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -182,7 +180,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||||
| printf_tensor<float>("ReluFp32--output data--", outputs[0]); | printf_tensor<float>("ReluFp32--output data--", outputs[0]); | ||||
| CompareRes<float>(output_tensor, out_file); | CompareRes<float>(output_tensor, out_file); | ||||
| } | } | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -271,7 +268,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | |||||
| ret = sub_graph->Init(); | ret = sub_graph->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init sub_graph error."; | MS_LOG(ERROR) << "Init sub_graph error."; | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -281,7 +277,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | |||||
| MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | ||||
| ret = sub_graph->Run(); | ret = sub_graph->Run(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -297,7 +292,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | |||||
| printf_tensor<float>("Relu6:FP32--output data---", outputs[0]); | printf_tensor<float>("Relu6:FP32--output data---", outputs[0]); | ||||
| CompareRes<float>(output_tensor, out_file); | CompareRes<float>(output_tensor, out_file); | ||||
| } | } | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -386,7 +380,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | |||||
| ret = sub_graph->Init(); | ret = sub_graph->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init sub_graph error."; | MS_LOG(ERROR) << "Init sub_graph error."; | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -396,7 +389,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | |||||
| MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | ||||
| ret = sub_graph->Run(); | ret = sub_graph->Run(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -412,7 +404,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | |||||
| printf_tensor<float>("Sigmoid:FP32--output data---", outputs[0]); | printf_tensor<float>("Sigmoid:FP32--output data---", outputs[0]); | ||||
| CompareRes<float>(output_tensor, out_file); | CompareRes<float>(output_tensor, out_file); | ||||
| } | } | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -502,7 +493,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||||
| ret = sub_graph->Init(); | ret = sub_graph->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init sub_graph error."; | MS_LOG(ERROR) << "Init sub_graph error."; | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -512,7 +502,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||||
| MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | ||||
| ret = sub_graph->Run(); | ret = sub_graph->Run(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -527,7 +516,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||||
| printf_tensor<float>("Leaky Relu:FP32--output data---", outputs[0]); | printf_tensor<float>("Leaky Relu:FP32--output data---", outputs[0]); | ||||
| CompareRes<float>(output_tensor, out_file); | CompareRes<float>(output_tensor, out_file); | ||||
| } | } | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -616,7 +604,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||||
| ret = sub_graph->Init(); | ret = sub_graph->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init sub_graph error."; | MS_LOG(ERROR) << "Init sub_graph error."; | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -626,7 +613,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||||
| MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | ||||
| ret = sub_graph->Run(); | ret = sub_graph->Run(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -642,7 +628,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||||
| printf_tensor<float>("Tanh:FP32--output data---", outputs[0]); | printf_tensor<float>("Tanh:FP32--output data---", outputs[0]); | ||||
| CompareRes<float>(output_tensor, out_file); | CompareRes<float>(output_tensor, out_file); | ||||
| } | } | ||||
| delete kernel; | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| @@ -127,7 +127,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { | |||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| delete param; | delete param; | ||||
| delete arithmeticself_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -203,7 +203,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| delete[] data_c_ocl; | delete[] data_c_ocl; | ||||
| delete kernel; | delete kernel; | ||||
| delete arith_kernel; | |||||
| delete param; | delete param; | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -147,7 +147,6 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { | |||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| delete param; | delete param; | ||||
| delete batchnorm_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| } | } | ||||
| TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { | TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { | ||||
| @@ -174,7 +174,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||||
| delete weight_tensor; | delete weight_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| delete param; | delete param; | ||||
| delete biasadd_kernel; | |||||
| return; | return; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Sub graph begin running!"; | MS_LOG(INFO) << "Sub graph begin running!"; | ||||
| @@ -186,7 +185,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||||
| delete weight_tensor; | delete weight_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| delete param; | delete param; | ||||
| delete biasadd_kernel; | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -202,7 +200,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| delete param; | delete param; | ||||
| delete biasadd_kernel; | |||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | lite::opencl::OpenCLRuntime::DeleteInstance(); | ||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -164,7 +164,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { | |||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| delete param; | delete param; | ||||
| delete concat_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| } | } | ||||
| @@ -284,7 +283,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { | |||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| delete param; | delete param; | ||||
| delete concat_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -78,7 +78,6 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector<in | |||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| auto *output_data = reinterpret_cast<T *>(outputs[0]->data_c()); | auto *output_data = reinterpret_cast<T *>(outputs[0]->data_c()); | ||||
| CommonTest::CompareOutputData<T>(output_data, static_cast<T*>(correct_data), outputs[0]->ElementsNum(), 0.0001); | CommonTest::CompareOutputData<T>(output_data, static_cast<T*>(correct_data), outputs[0]->ElementsNum(), 0.0001); | ||||
| delete pkernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| } | } | ||||
| TEST_F(TestGatherOpenCL, Axis1Fp32) { | TEST_F(TestGatherOpenCL, Axis1Fp32) { | ||||
| @@ -167,7 +167,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| delete output_tensor; | delete output_tensor; | ||||
| delete weight_tensor; | delete weight_tensor; | ||||
| delete param; | delete param; | ||||
| delete prelu_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -179,7 +178,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| delete output_tensor; | delete output_tensor; | ||||
| delete weight_tensor; | delete weight_tensor; | ||||
| delete param; | delete param; | ||||
| delete prelu_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| return; | return; | ||||
| } | } | ||||
| @@ -195,7 +193,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| delete output_tensor; | delete output_tensor; | ||||
| delete weight_tensor; | delete weight_tensor; | ||||
| delete param; | delete param; | ||||
| delete prelu_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | lite::opencl::OpenCLRuntime::DeleteInstance(); | ||||
| } | } | ||||
| @@ -223,7 +223,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| delete[] data_out_ocl; | delete[] data_out_ocl; | ||||
| delete kernel; | delete kernel; | ||||
| delete scale_kernel; | |||||
| delete param; | delete param; | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -143,7 +143,6 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { | |||||
| for (auto tensor : outputs) { | for (auto tensor : outputs) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| delete slice_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| } | } | ||||
| TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | ||||
| @@ -251,7 +250,6 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | |||||
| for (auto tensor : outputs) { | for (auto tensor : outputs) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| delete slice_kernel; | |||||
| delete sub_graph; | delete sub_graph; | ||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||