| @@ -123,7 +123,7 @@ __kernel void transpose_0231_NHWC4(__read_only image2d_t src_data, __write_only | |||
| if (X >= shape.y || 4 * Y >= shape.z || 4 * Z >= shape.w) { | |||
| return; | |||
| } | |||
| int W4 = UP_DIV(shape.y, 4); | |||
| int W4 = UP_DIV(shape.z, 4); | |||
| int C4 = UP_DIV(shape.w, 4); | |||
| FLT4 src0 = READ_IMAGE(src_data, smp_zero, (int2)(X * W4 + Y, 4 * Z)); | |||
| FLT4 src1 = (FLT4)0.f; | |||
| @@ -61,12 +61,12 @@ int ActivationOpenClKernel::Init() { | |||
| MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_; | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| std::map<int, std::string> Program_Kernel{{ActivationType_LEAKY_RELU, "LeakyRelu"}, | |||
| {ActivationType_RELU, "Relu"}, | |||
| {ActivationType_SIGMOID, "Sigmoid"}, | |||
| {ActivationType_RELU6, "Relu6"}, | |||
| {ActivationType_TANH, "Tanh"}}; | |||
| if (Program_Kernel.count(type_) == 0) { | |||
| std::map<int, std::string> kernel_names{{ActivationType_LEAKY_RELU, "LeakyRelu"}, | |||
| {ActivationType_RELU, "Relu"}, | |||
| {ActivationType_SIGMOID, "Sigmoid"}, | |||
| {ActivationType_RELU6, "Relu6"}, | |||
| {ActivationType_TANH, "Tanh"}}; | |||
| if (kernel_names.count(type_) == 0) { | |||
| MS_LOG(ERROR) << "schema::ActivationType:" << type_ << "not found"; | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| @@ -75,12 +75,8 @@ int ActivationOpenClKernel::Init() { | |||
| std::set<std::string> build_options; | |||
| std::string program_name = "Activation"; | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| std::string kernel_name = Program_Kernel[type_]; | |||
| std::string kernel_name = kernel_names[type_]; | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << op_parameter_->name_ << " init Done!"; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| @@ -107,32 +103,12 @@ int ActivationOpenClKernel::Run() { | |||
| cl_int4 ActivationOpenClKernel::GetImg2dShape() { | |||
| cl_int4 img2d_shape = {1, 1, 1, 1}; | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| img2d_shape.s[1] = nhwc_shape_[1]; | |||
| img2d_shape.s[2] = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); | |||
| img2d_shape.s[3] = C4NUM; | |||
| } | |||
| if (op_format_ == schema::Format_NC4HW4) { | |||
| img2d_shape.s[1] = UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1]; | |||
| img2d_shape.s[2] = nhwc_shape_[2]; | |||
| img2d_shape.s[3] = C4NUM; | |||
| } | |||
| img2d_shape.s[1] = nhwc_shape_[1]; | |||
| img2d_shape.s[2] = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); | |||
| img2d_shape.s[3] = C4NUM; | |||
| return img2d_shape; | |||
| } | |||
| int ActivationOpenClKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| cl_int4 img_shape = GetImg2dShape(); | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| img_size->push_back(img_shape.s[2]); | |||
| img_size->push_back(img_shape.s[1]); | |||
| img_size->push_back(img_dtype); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenClActivationKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| @@ -26,27 +26,25 @@ namespace mindspore::kernel { | |||
| class ActivationOpenClKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ActivationOpenClKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) { | |||
| type_ = (reinterpret_cast<ActivationParameter *>(parameter))->type_; | |||
| alpha_ = (reinterpret_cast<ActivationParameter *>(parameter))->alpha_; | |||
| } | |||
| ~ActivationOpenClKernel() override{}; | |||
| ActivationOpenClKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs), | |||
| type_(reinterpret_cast<ActivationParameter *>(parameter)->type_), | |||
| alpha_(reinterpret_cast<ActivationParameter *>(parameter)->alpha_) {} | |||
| ~ActivationOpenClKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| cl_int4 GetImg2dShape(); | |||
| void InitBuffer() {} | |||
| private: | |||
| cl_int4 GetImg2dShape(); | |||
| cl::Kernel kernel_; | |||
| int type_; | |||
| float alpha_; | |||
| int in_size_; | |||
| int out_size_; | |||
| size_t fp_size; | |||
| int in_size_{}; | |||
| int out_size_{}; | |||
| size_t fp_size{}; | |||
| bool enable_fp16_{false}; | |||
| std::vector<size_t> nhwc_shape_; | |||
| }; | |||
| @@ -34,79 +34,35 @@ using mindspore::schema::PrimitiveType_Eltwise; | |||
| namespace mindspore::kernel { | |||
| ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() {} | |||
| std::vector<size_t> ArithmeticOpenCLKernel::InitGlobalSize() const { | |||
| const size_t global_x = out_tensors_[0]->Width(); | |||
| const size_t global_y = out_tensors_[0]->Height(); | |||
| const size_t global_z = UP_ROUND_DIV(out_tensors_[0]->Channel(), 4); | |||
| std::vector<size_t> global = {global_x, global_y, global_z}; | |||
| return global; | |||
| auto out_shape = out_tensors_[0]->shape(); | |||
| if (out_shape.size() == 2) { | |||
| const size_t global_x = 1; | |||
| const size_t global_y = 1; | |||
| const size_t global_z = UP_ROUND_DIV(out_shape[1], C4NUM); | |||
| std::vector<size_t> global = {global_x, global_y, global_z}; | |||
| return global; | |||
| } else { | |||
| const size_t global_x = out_shape[2]; | |||
| const size_t global_y = out_shape[1]; | |||
| const size_t global_z = UP_ROUND_DIV(out_shape[3], C4NUM); | |||
| std::vector<size_t> global = {global_x, global_y, global_z}; | |||
| return global; | |||
| } | |||
| } | |||
| void ArithmeticOpenCLKernel::Image2dGetWorkGroupSize() { | |||
| local_size_ = {16, 16}; | |||
| if (out_tensors_[0]->shape().size() == 2) { | |||
| size_t H = out_tensors_[0]->shape()[0]; | |||
| size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); | |||
| global_size_ = {W, H}; | |||
| return; | |||
| } | |||
| if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { | |||
| size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t W = out_tensors_[0]->Width(); | |||
| global_size_ = {W, H}; | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { | |||
| size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); | |||
| size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| global_size_ = {W, H}; | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { | |||
| size_t H = out_tensors_[0]->Batch(); | |||
| size_t W = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| auto out_shape = out_tensors_[0]->shape(); | |||
| if (out_shape.size() == 2) { | |||
| size_t H = out_shape[0]; | |||
| size_t W = UP_DIV(out_shape[1], C4NUM); | |||
| global_size_ = {W, H}; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); | |||
| } | |||
| } | |||
| void ArithmeticOpenCLKernel::BufferGetWorkGroupSize() { | |||
| uint32_t element_num = out_tensors_[0]->ElementsC4Num(); | |||
| global_size_ = {element_num}; | |||
| } | |||
| int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| if (out_tensors_[0]->shape().size() == 2) { | |||
| im_dst_x = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); | |||
| im_dst_y = out_tensors_[0]->shape()[0]; | |||
| } else { | |||
| if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { | |||
| im_dst_y = out_tensors_[0]->Batch(); | |||
| im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } else if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | |||
| img_dtype = CL_FLOAT; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[0]->data_type(); | |||
| size_t H = out_shape[0] * out_shape[1]; | |||
| size_t W = out_shape[2] * UP_DIV(out_shape[3], C4NUM); | |||
| global_size_ = {W, H}; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| int ArithmeticOpenCLKernel::InitBuffer() { | |||
| @@ -119,7 +75,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||
| inputs_weight_ptrs_.push_back(nullptr); | |||
| } else { | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| std::vector<size_t> img_size = GetImage2dShapeFromNHWC(nhwc_shape, op_format_); | |||
| std::vector<size_t> img_size = GetImage2dShapeFromNHWC(nhwc_shape, schema::Format_NHWC4); | |||
| int pack_weight_size = img_size[0] * img_size[1] * C4NUM; | |||
| int plane = nhwc_shape[1] * nhwc_shape[2]; | |||
| int channel = nhwc_shape[3]; | |||
| @@ -132,22 +88,12 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||
| return RET_ERROR; | |||
| } | |||
| memset(weight, 0x00, pack_weight_size * data_size); | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| if (in_tensor_->data_type() == kNumberTypeFloat32) { | |||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | |||
| PackNHWCToNHWC4<float, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } else if (in_tensor_->data_type() == kNumberTypeFloat16) { | |||
| std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); }; | |||
| PackNHWCToNHWC4<float16_t, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| if (in_tensor_->data_type() == kNumberTypeFloat32) { | |||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | |||
| PackNHWCToNC4HW4<float, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } else if (in_tensor_->data_type() == kNumberTypeFloat16) { | |||
| std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); }; | |||
| PackNHWCToNC4HW4<float16_t, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } | |||
| if (in_tensor_->data_type() == kNumberTypeFloat32) { | |||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | |||
| PackNHWCToNHWC4<float, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } else if (in_tensor_->data_type() == kNumberTypeFloat16) { | |||
| std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); }; | |||
| PackNHWCToNHWC4<float16_t, float>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } | |||
| if (batch * plane * channel == 1) { | |||
| // scalar | |||
| @@ -163,22 +109,12 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||
| return RET_ERROR; | |||
| } | |||
| memset(weight, 0x00, pack_weight_size * data_size); | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| if (in_tensor_->data_type() == kNumberTypeFloat32) { | |||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | |||
| PackNHWCToNHWC4<float, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } else if (in_tensor_->data_type() == kNumberTypeFloat16) { | |||
| std::function<float16_t(float16_t)> to_dtype = [](float16_t x) -> float16_t { return x; }; | |||
| PackNHWCToNHWC4<float16_t, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| if (in_tensor_->data_type() == kNumberTypeFloat32) { | |||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | |||
| PackNHWCToNC4HW4<float, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } else if (in_tensor_->data_type() == kNumberTypeFloat16) { | |||
| std::function<float16_t(float16_t)> to_dtype = [](float16_t x) -> float16_t { return x; }; | |||
| PackNHWCToNC4HW4<float16_t, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } | |||
| if (in_tensor_->data_type() == kNumberTypeFloat32) { | |||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | |||
| PackNHWCToNHWC4<float, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } else if (in_tensor_->data_type() == kNumberTypeFloat16) { | |||
| std::function<float16_t(float16_t)> to_dtype = [](float16_t x) -> float16_t { return x; }; | |||
| PackNHWCToNHWC4<float16_t, float16_t>(in_tensor_->data_c(), weight, batch, plane, channel, to_dtype); | |||
| } | |||
| if (batch * plane * channel == 1) { | |||
| // scalar | |||
| @@ -195,18 +131,11 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||
| int ArithmeticOpenCLKernel::Init() { | |||
| std::string kernel_name; | |||
| const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | |||
| auto *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | |||
| if (arithmetic_parameter->broadcasting_) { | |||
| element_flag_ = false; | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| kernel_name = "BroadcastNHWC4"; | |||
| } else { | |||
| kernel_name = "BroadcastNC4HW4"; | |||
| MS_LOG(ERROR) << "Don't support BroadcastNC4HW4 yet"; | |||
| return RET_ERROR; | |||
| } | |||
| kernel_name = "BroadcastNHWC4"; | |||
| } else { | |||
| kernel_name = "Element"; | |||
| } | |||
| @@ -302,17 +231,6 @@ int ArithmeticOpenCLKernel::Init() { | |||
| return error_code; | |||
| } | |||
| auto format = schema::Format::Format_NHWC4; | |||
| if (arithmetic_parameter->ndim_ == 2) { | |||
| format = schema::Format::Format_NC4; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(format); | |||
| if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) { | |||
| in_tensors_[1]->SetFormat(format); | |||
| } | |||
| out_tensors_[0]->SetFormat(format); | |||
| Image2dGetWorkGroupSize(); | |||
| InitBuffer(); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| @@ -25,20 +25,18 @@ namespace mindspore::kernel { | |||
| class ArithmeticOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ArithmeticOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| ArithmeticOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ArithmeticOpenCLKernel() override; | |||
| ~ArithmeticOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int InitBuffer() override; | |||
| private: | |||
| std::vector<size_t> InitGlobalSize() const; | |||
| void Image2dGetWorkGroupSize(); | |||
| void BufferGetWorkGroupSize(); | |||
| int InitBuffer(); | |||
| cl::Kernel kernel_; | |||
| bool element_flag_{true}; | |||
| @@ -41,38 +41,6 @@ using mindspore::schema::PrimitiveType_Square; | |||
| namespace mindspore::kernel { | |||
| int ArithmeticSelfOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| auto out_shape = out_tensors_[0]->shape(); | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) { | |||
| if (in_tensors_[0]->shape().size() == 4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); | |||
| } else { | |||
| im_dst_x = UP_DIV(out_shape[1], C4NUM); | |||
| im_dst_y = out_tensors_[0]->Batch(); | |||
| } | |||
| } else { | |||
| if (in_tensors_[0]->shape().size() == 4) { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * UP_DIV(out_shape[1], C4NUM); | |||
| im_dst_x = 1; | |||
| } | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| void ArithmeticSelfOpenCLKernel::GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param) { | |||
| switch (param->op_parameter_.type_) { | |||
| case PrimitiveType_Abs: | |||
| @@ -126,24 +94,9 @@ int ArithmeticSelfOpenCLKernel::Init() { | |||
| } | |||
| auto param = reinterpret_cast<ArithmeticSelfParameter *>(this->op_parameter_); | |||
| auto in_format = op_format_; | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4 && in_format != schema::Format_NC4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| std::string kernel_name = "ArithmeticSelf"; | |||
| GetKernelName(&kernel_name, param); | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else if (in_format == schema::Format_NHWC4) { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| kernel_name += "_NHWC4"; | |||
| MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; | |||
| std::set<std::string> build_options; | |||
| std::string source = arithmeticself_source; | |||
| @@ -154,8 +107,6 @@ int ArithmeticSelfOpenCLKernel::Init() { | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int ArithmeticSelfOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } | |||
| void ArithmeticSelfGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) { | |||
| const int max_divider = 8; | |||
| const int max_x = 4, max_y = 8; | |||
| @@ -26,23 +26,19 @@ namespace mindspore::kernel { | |||
| class ArithmeticSelfOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ArithmeticSelfOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| ArithmeticSelfOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ArithmeticSelfOpenCLKernel() override{}; | |||
| ~ArithmeticSelfOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| void GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param); | |||
| private: | |||
| cl::Kernel kernel_; | |||
| }; | |||
| @@ -32,18 +32,12 @@ using mindspore::schema::PrimitiveType_BatchToSpaceND; | |||
| namespace mindspore::kernel { | |||
| int BatchToSpaceNDOpenCLKernel::Init() { | |||
| std::string kernel_name = "batch_to_space_nd"; | |||
| auto in_format = op_format_; | |||
| std::string kernel_name = "batch_to_space_nd_NHWC4"; | |||
| if (in_tensors_[0]->shape().size() != 4 && out_tensors_[0]->shape().size() != 4) { | |||
| MS_LOG(ERROR) << "input/output shape size must be 4, actual: " << in_tensors_[0]->shape().size() << ", " | |||
| << out_tensors_[0]->shape().size(); | |||
| return RET_ERROR; | |||
| } | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| auto *param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_); | |||
| if (param->block_shape_[0] < 1 || param->block_shape_[1] < 1) { | |||
| MS_LOG(ERROR) << "block_sizes_ must > 1, actual " << param->block_shape_[0] << ", " << param->block_shape_[1]; | |||
| @@ -55,18 +49,10 @@ int BatchToSpaceNDOpenCLKernel::Init() { | |||
| return RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| std::set<std::string> build_options; | |||
| std::string source = batch_to_space_nd_source; | |||
| std::string program_name = "batch_to_space_nd"; | |||
| @@ -76,28 +62,7 @@ int BatchToSpaceNDOpenCLKernel::Init() { | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| int BatchToSpaceNDOpenCLKernel::InitBuffer() { return RET_OK; } | |||
| int BatchToSpaceNDOpenCLKernel::ReSize() { return RET_OK; } | |||
| int BatchToSpaceNDOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = std::move(vec); | |||
| return RET_OK; | |||
| } | |||
| int BatchToSpaceNDOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| auto param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_); | |||
| @@ -25,22 +25,16 @@ namespace mindspore::kernel { | |||
| class BatchToSpaceNDOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit BatchToSpaceNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| BatchToSpaceNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~BatchToSpaceNDOpenCLKernel() override{}; | |||
| ~BatchToSpaceNDOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int InitBuffer(); | |||
| private: | |||
| cl::Kernel kernel_; | |||
| }; | |||
| @@ -30,45 +30,8 @@ using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::kernel { | |||
| int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| int BatchNormOpenCLKernel::Init() { | |||
| auto in_format = op_format_; | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| std::string kernel_name = "Batch_normalization"; | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else if (in_format == schema::Format_NHWC4) { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| std::string kernel_name = "Batch_normalization_NHWC4"; | |||
| std::set<std::string> build_options; | |||
| std::string source = batchnorm_source; | |||
| std::string program_name = "Batch_normalization"; | |||
| @@ -78,8 +41,6 @@ int BatchNormOpenCLKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int BatchNormOpenCLKernel::ReSize() { return RET_OK; } | |||
| void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) { | |||
| const int max_divider = 8; | |||
| const int max_x = 4, max_y = 8; | |||
| @@ -25,20 +25,16 @@ namespace mindspore::kernel { | |||
| class BatchNormOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit BatchNormOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| BatchNormOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~BatchNormOpenCLKernel() override{}; | |||
| ~BatchNormOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| }; | |||
| @@ -35,7 +35,7 @@ using mindspore::schema::PrimitiveType_BiasAdd; | |||
| namespace mindspore::kernel { | |||
| void BiasAddOpenCLKernel::InitBuffer() { | |||
| int BiasAddOpenCLKernel::InitBuffer() { | |||
| int C = in_tensors_[1]->shape()[0]; | |||
| int div_ci = UP_DIV(C, C4NUM); | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| @@ -49,6 +49,7 @@ void BiasAddOpenCLKernel::InitBuffer() { | |||
| memset(BiasAdd_, 0x00, div_ci * C4NUM * fp_size); | |||
| memcpy(BiasAdd_, in_tensors_[1]->data_c(), C * fp_size); | |||
| allocator->UnmapBuffer(BiasAdd_); | |||
| return RET_OK; | |||
| } | |||
| int BiasAddOpenCLKernel::Init() { | |||
| @@ -77,10 +78,6 @@ int BiasAddOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << program_name << " Init Done!"; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| @@ -95,7 +92,7 @@ int BiasAddOpenCLKernel::Run() { | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, BiasAdd_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[schema::Format::Format_NHWC4]); | |||
| std::vector<size_t> local = {1, 1}; | |||
| std::vector<size_t> global = {static_cast<size_t>(global_size.s[1]), static_cast<size_t>(global_size.s[2])}; | |||
| auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); | |||
| @@ -108,32 +105,10 @@ int BiasAddOpenCLKernel::Run() { | |||
| cl_int4 BiasAddOpenCLKernel::GetGlobalshape() { | |||
| cl_int4 global_shape = input_shape_; | |||
| if (op_format_ == schema::Format::Format_NC4) { | |||
| global_shape.s[1] = global_shape.s[2]; | |||
| global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM); | |||
| } | |||
| if (op_format_ == schema::Format::Format_NC4HW4) { | |||
| global_shape.s[1] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[1]; // c / 4 * H | |||
| } | |||
| if (op_format_ == schema::Format::Format_NHWC4) { | |||
| global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[2]; | |||
| } | |||
| global_shape.s[2] = UP_DIV(global_shape.s[3], C4NUM) * global_shape.s[2]; | |||
| return global_shape; | |||
| } | |||
| int BiasAddOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| cl_int4 img_shape = GetGlobalshape(); | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| img_size->push_back(img_shape.s[2]); | |||
| img_size->push_back(img_shape.s[1]); | |||
| img_size->push_back(img_dtype); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenCLBiasAddKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| @@ -28,25 +28,25 @@ namespace mindspore::kernel { | |||
| class BiasAddOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit BiasAddOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| BiasAddOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~BiasAddOpenCLKernel() override{}; | |||
| ~BiasAddOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| void InitBuffer(); | |||
| cl_int4 GetGlobalshape(); | |||
| int InitBuffer() override; | |||
| private: | |||
| cl_int4 GetGlobalshape(); | |||
| cl::Kernel kernel_; | |||
| void *BiasAdd_; | |||
| int in_size_; | |||
| int out_size_; | |||
| size_t fp_size; | |||
| cl_int4 input_shape_; | |||
| bool enable_fp16_{false}; | |||
| void *BiasAdd_{nullptr}; | |||
| int in_size_{}; | |||
| int out_size_{}; | |||
| size_t fp_size{}; | |||
| cl_int4 input_shape_{}; | |||
| bool enable_fp16_{}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -30,27 +30,6 @@ using mindspore::schema::PrimitiveType_Cast; | |||
| namespace mindspore::kernel { | |||
| int CastOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| int CastOpenCLKernel::GetKernelName(std::string *kernel_name, CastParameter *param) { | |||
| if (param->src_type_ == kNumberTypeFloat32 && param->dst_type_ == kNumberTypeFloat16) { | |||
| kernel_name[0] += "_Fp32ToFp16"; | |||
| @@ -65,23 +44,9 @@ int CastOpenCLKernel::GetKernelName(std::string *kernel_name, CastParameter *par | |||
| int CastOpenCLKernel::Init() { | |||
| auto param = reinterpret_cast<CastParameter *>(this->op_parameter_); | |||
| auto in_format = op_format_; | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| std::string kernel_name = "Cast"; | |||
| GetKernelName(&kernel_name, param); | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else if (in_format == schema::Format_NHWC4) { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| kernel_name += "_NHWC4"; | |||
| std::set<std::string> build_options; | |||
| std::string source = cast_source; | |||
| std::string program_name = "cast"; | |||
| @@ -91,8 +56,6 @@ int CastOpenCLKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int CastOpenCLKernel::ReSize() { return RET_OK; } | |||
| void CastGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) { | |||
| const int max_divider = 8; | |||
| const int max_x = 4, max_y = 8; | |||
| @@ -26,23 +26,19 @@ namespace mindspore::kernel { | |||
| class CastOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit CastOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| CastOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~CastOpenCLKernel() override{}; | |||
| ~CastOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| private: | |||
| int GetKernelName(std::string *kernel_name, CastParameter *param); | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| }; | |||
| @@ -31,27 +31,6 @@ using mindspore::schema::PrimitiveType_Concat; | |||
| namespace mindspore::kernel { | |||
| int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| int ConcatOpenCLKernel::RunAxis0() { | |||
| auto allocator_ = ocl_runtime_->GetAllocator(); | |||
| std::vector<size_t> img_size; | |||
| @@ -85,39 +64,15 @@ int ConcatOpenCLKernel::Init() { | |||
| MS_LOG(ERROR) << " only support axis >= 0 and axis <= 3 "; | |||
| return RET_ERROR; | |||
| } | |||
| auto in_format = op_format_; | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| std::string kernel_name = "Concat"; | |||
| if (in_tensors_.size() == 2) { | |||
| kernel_name += "2inputaxis"; | |||
| kernel_name += std::to_string(param->axis_); | |||
| } else if (in_tensors_.size() == 3) { | |||
| kernel_name += "3inputaxis"; | |||
| kernel_name += std::to_string(param->axis_); | |||
| } else if (in_tensors_.size() == 4) { | |||
| kernel_name += "4inputaxis"; | |||
| kernel_name += std::to_string(param->axis_); | |||
| } else if (in_tensors_.size() == 6) { | |||
| kernel_name += "6inputaxis"; | |||
| kernel_name += std::to_string(param->axis_); | |||
| if (in_tensors_.size() == 2 || in_tensors_.size() == 3 || in_tensors_.size() == 4 || in_tensors_.size() == 4) { | |||
| kernel_name += std::to_string(in_tensors_.size()) + "inputaxis" + std::to_string(param->axis_); | |||
| } else { | |||
| MS_LOG(ERROR) << " input must be 2 , 3 , 4 or 6"; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else if (in_format == schema::Format_NHWC4) { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| kernel_name += "_NHWC4"; | |||
| MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; | |||
| std::set<std::string> build_options; | |||
| std::string source = concat_source; | |||
| @@ -128,16 +83,13 @@ int ConcatOpenCLKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int ConcatOpenCLKernel::ReSize() { return RET_OK; } | |||
| int ConcatOpenCLKernel::IntegraShapeToXYZ() { | |||
| auto in_format = op_format_; | |||
| if (out_tensors_[0]->shape().size() > 4 || out_tensors_[0]->shape().size() <= 0) { | |||
| if (out_tensors_[0]->shape().size() > 4 || out_tensors_[0]->shape().empty()) { | |||
| MS_LOG(ERROR) << "in_tensors_.shape() must between 0~4"; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_format == schema::Format_NHWC4 || in_format == schema::Format_NC4HW4) { | |||
| if (out_tensors_[0]->shape().size() == 4) { | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| cl_int4 temp_cl; | |||
| auto temp = in_tensors_[i]->shape(); | |||
| @@ -25,28 +25,24 @@ namespace mindspore::kernel { | |||
| class ConcatOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ConcatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| ConcatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ConcatOpenCLKernel() override{}; | |||
| ~ConcatOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| private: | |||
| int RunAxis0(); | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int IntegraShapeToXYZ(); | |||
| private: | |||
| cl::Kernel kernel_; | |||
| std::vector<cl_int3> XYZShape; | |||
| cl_int4 shape_nhwc; | |||
| cl_int4 shape_nhwc{}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -38,8 +38,7 @@ int Conv2dTransposeOpenCLKernel::Init() { | |||
| MS_LOG(ERROR) << "only support kernel - stride == 2 * pad"; | |||
| return RET_ERROR; | |||
| } | |||
| std::string kernel_name = "conv2d_transpose"; | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| std::string kernel_name = "conv2d_transpose_NHWC4"; | |||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| @@ -51,20 +50,14 @@ int Conv2dTransposeOpenCLKernel::Init() { | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| PadWeight(); | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int Conv2dTransposeOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } | |||
| void Conv2dTransposeOpenCLKernel::PadWeight() { | |||
| ConvParameter *param = reinterpret_cast<ConvParameter *>(op_parameter_); | |||
| int ci = in_tensors_[0]->Channel(); | |||
| int co = out_tensors_[0]->Channel(); | |||
| int ci = in_tensors_[0]->shape()[3]; | |||
| int co = out_tensors_[0]->shape()[3]; | |||
| int kh = param->kernel_h_; | |||
| int kw = param->kernel_w_; | |||
| int div_ci = UP_DIV(ci, C4NUM); | |||
| @@ -147,32 +140,6 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { | |||
| allocator->UnmapBuffer(bias_); | |||
| } | |||
| int Conv2dTransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| int n = out_tensors_[0]->shape()[0]; | |||
| int h = out_tensors_[0]->shape()[1]; | |||
| int w = out_tensors_[0]->shape()[2]; | |||
| int c = out_tensors_[0]->shape()[3]; | |||
| if (op_format_ == schema::Format::Format_NHWC4) { | |||
| im_dst_x = w * UP_DIV(c, C4NUM); | |||
| im_dst_y = n * h; | |||
| } else if (op_format_ == schema::Format::Format_NC4HW4) { | |||
| im_dst_x = w; | |||
| im_dst_y = n * UP_DIV(c, C4NUM) * h; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int Conv2dTransposeOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| ConvParameter *param = reinterpret_cast<ConvParameter *>(op_parameter_); | |||
| @@ -27,21 +27,20 @@ namespace mindspore::kernel { | |||
| class Conv2dTransposeOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit Conv2dTransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| Conv2dTransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~Conv2dTransposeOpenCLKernel() override{}; | |||
| ~Conv2dTransposeOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| void PadWeight(); | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| void PadWeight(); | |||
| cl::Kernel kernel_; | |||
| void *padWeight_; | |||
| void *bias_; | |||
| void *padWeight_{nullptr}; | |||
| void *bias_{nullptr}; | |||
| bool enable_fp16_{false}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -29,8 +29,6 @@ using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Conv2D; | |||
| using mindspore::schema::Format::Format_NC4HW4; | |||
| using mindspore::schema::Format::Format_NCHW; | |||
| using mindspore::schema::Format::Format_NHWC; | |||
| using mindspore::schema::Format::Format_NHWC4; | |||
| namespace mindspore::kernel { | |||
| @@ -46,14 +44,6 @@ int ConvolutionOpenCLKernel::Init() { | |||
| auto input_tensor = in_tensors_[0]; | |||
| auto output_tensor = out_tensors_[0]; | |||
| in_ori_format_ = input_tensor->GetFormat(); | |||
| out_ori_format_ = output_tensor->GetFormat(); | |||
| if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| input_tensor->SetFormat(op_format_); | |||
| output_tensor->SetFormat(op_format_); | |||
| batch_size_ = input_tensor->Batch(); | |||
| CI_ = input_tensor->Channel(); | |||
| @@ -112,7 +102,7 @@ int ConvolutionOpenCLKernel::Init() { | |||
| winograd_mem1_ = allocator->Malloc(size, {width, height, img_dtype}); | |||
| } | |||
| this->InitBuffer(); | |||
| InitBuffer(); | |||
| MS_LOG(DEBUG) << "Convolution Init Done!"; | |||
| return RET_OK; | |||
| @@ -251,30 +241,6 @@ int ConvolutionOpenCLKernel::InitBuffer() { | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == Format_NHWC4) { | |||
| if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { | |||
| { | |||
| im_dst_y = batch_size_ * OH_; | |||
| im_dst_x = OW_ * CO_SLICES_; | |||
| } | |||
| } else { | |||
| im_dst_y = OW_; | |||
| im_dst_x = batch_size_ * OH_ * CO_SLICES_; | |||
| } | |||
| } else { | |||
| im_dst_y = batch_size_ * CO_SLICES_ * OH_; | |||
| im_dst_x = OW_; | |||
| } | |||
| size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; | |||
| img_size->clear(); | |||
| img_size->push_back(im_dst_x); | |||
| img_size->push_back(im_dst_y); | |||
| img_size->push_back(img_dtype); | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| @@ -23,22 +23,22 @@ | |||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | |||
| #include "schema/model_generated.h" | |||
| #include "nnacl/conv_parameter.h" | |||
| #include "schema/ops_generated.h" | |||
| namespace mindspore::kernel { | |||
| class ConvolutionOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ConvolutionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| ConvolutionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ConvolutionOpenCLKernel() override{}; | |||
| ~ConvolutionOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int InitBuffer() override; | |||
| private: | |||
| int InitBuffer(); | |||
| int InitWeight(); | |||
| int InitBias(); | |||
| int GenerateWinogradWeight(); | |||
| @@ -92,7 +92,8 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { | |||
| return code_id; | |||
| } | |||
| bool use_fp16_ = false; | |||
| bool use_fp16_{false}; | |||
| const schema::Format op_format_{schema::Format_NHWC4}; | |||
| int batch_size_{}; | |||
| int CI_{}; | |||
| @@ -105,16 +106,16 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { | |||
| int CO_SLICES_{}; | |||
| int KH_{}; | |||
| int KW_{}; | |||
| void *packed_weight_ = nullptr; | |||
| void *packed_bias_ = nullptr; | |||
| bool has_bias_ = false; | |||
| void *packed_weight_{nullptr}; | |||
| void *packed_bias_{nullptr}; | |||
| bool has_bias_{false}; | |||
| bool use_winograd_ = false; | |||
| bool use_winograd_{false}; | |||
| int TILES_X_{}; | |||
| int TILES_Y_{}; | |||
| int TILES_XY_{}; | |||
| void *winograd_mem0_ = nullptr; | |||
| void *winograd_mem1_ = nullptr; | |||
| void *winograd_mem0_{nullptr}; | |||
| void *winograd_mem1_{nullptr}; | |||
| cl::Kernel kernel_4x4to36_; | |||
| cl::Kernel kernel_conv_; | |||
| @@ -42,26 +42,12 @@ namespace mindspore::kernel { | |||
| int DepthwiseConv2dOpenCLKernel::Init() { | |||
| std::string kernel_name = "DepthwiseConv2d"; | |||
| auto in_format = op_format_; | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| if (in_format != schema::Format::Format_NHWC4 && in_format != schema::Format::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| in_tensors_[0]->SetFormat(in_format); | |||
| out_tensors_[0]->SetFormat(in_format); | |||
| if (out_mem_type_ == OpenCLMemType::BUF) { | |||
| kernel_name += "_BUF"; | |||
| } else { | |||
| kernel_name += "_IMG"; | |||
| } | |||
| if (in_format == schema::Format::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else if (in_format == schema::Format::Format_NHWC4) { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| kernel_name += "_NHWC4"; | |||
| auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); | |||
| if (parameter->kernel_h_ == 1) { | |||
| kernel_name += "_1x1"; | |||
| @@ -75,7 +61,7 @@ int DepthwiseConv2dOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| this->InitBuffer(); | |||
| InitBuffer(); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| @@ -146,28 +132,6 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int DepthwiseConv2dOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } | |||
| int DepthwiseConv2dOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (ocl_runtime_->GetFp16Enable()) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int DepthwiseConv2dOpenCLKernel::GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4}; | |||
| @@ -25,27 +25,25 @@ namespace mindspore::kernel { | |||
| class DepthwiseConv2dOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit DepthwiseConv2dOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs), packed_weight_(nullptr), bias_data_(nullptr), kernel_(nullptr) {} | |||
| DepthwiseConv2dOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~DepthwiseConv2dOpenCLKernel() override{}; | |||
| ~DepthwiseConv2dOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitBuffer(); | |||
| int InitBuffer() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) override; | |||
| int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) override; | |||
| private: | |||
| void *packed_weight_; | |||
| void *bias_data_; | |||
| void *packed_weight_{nullptr}; | |||
| void *bias_data_{nullptr}; | |||
| cl::Kernel kernel_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -33,8 +33,7 @@ using mindspore::schema::PrimitiveType_FullConnection; | |||
| namespace mindspore::kernel { | |||
| int FullConnectionOpenCLKernel::Init() { | |||
| std::string kernel_name = "FullConnection"; | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| std::string kernel_name = "FullConnection_NHWC4"; | |||
| auto param = reinterpret_cast<MatMulParameter *>(op_parameter_); | |||
| transposeA = param->a_transpose_; | |||
| if (transposeA) { | |||
| @@ -77,16 +76,10 @@ int FullConnectionOpenCLKernel::Init() { | |||
| #endif | |||
| PadWeight(); | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| int FullConnectionOpenCLKernel::ReSize() { return RET_OK; } | |||
| void FullConnectionOpenCLKernel::PadWeight() { | |||
| // ABMCI @ ABCICO = ABMCO | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| @@ -177,39 +170,6 @@ void FullConnectionOpenCLKernel::PadWeight() { | |||
| allocator->UnmapBuffer(bias_); | |||
| } | |||
| int FullConnectionOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| auto out_shape = out_tensors_[0]->shape(); | |||
| int n = 1, h = 1, w = 1, c = 1; | |||
| if (out_tensors_[0]->shape().size() == 2) { | |||
| n = out_shape[0]; | |||
| c = out_shape[1]; | |||
| } else { | |||
| n = out_shape[0]; | |||
| h = out_shape[1]; | |||
| w = out_shape[2]; | |||
| c = out_shape[3]; | |||
| } | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| im_dst_x = w * UP_DIV(c, C4NUM); | |||
| im_dst_y = n * h; | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| im_dst_x = w; | |||
| im_dst_y = n * UP_DIV(c, C4NUM) * h; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| int FullConnectionOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| // local size should less than MAX_GROUP_SIZE | |||
| @@ -26,21 +26,20 @@ namespace mindspore::kernel { | |||
| class FullConnectionOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit FullConnectionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| FullConnectionOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~FullConnectionOpenCLKernel() override{}; | |||
| ~FullConnectionOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| void PadWeight(); | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| void PadWeight(); | |||
| cl::Kernel kernel_; | |||
| void *padWeight_; | |||
| void *bias_; | |||
| void *padWeight_{nullptr}; | |||
| void *bias_{nullptr}; | |||
| bool enable_fp16_{false}; | |||
| bool transposeA{false}; | |||
| bool transposeB{true}; | |||
| @@ -31,25 +31,10 @@ using mindspore::schema::PrimitiveType_Gather; | |||
| namespace mindspore::kernel { | |||
| int GatherOpenCLKernel::Init() { | |||
| std::string kernel_name = "gather"; | |||
| auto in_format = op_format_; | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| std::string kernel_name = "gather_NHWC4"; | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| std::set<std::string> build_options; | |||
| std::string source = gather_source; | |||
| std::string program_name = "gather"; | |||
| @@ -99,29 +84,6 @@ int GatherOpenCLKernel::InitBuffer() { | |||
| return RET_OK; | |||
| } | |||
| int GatherOpenCLKernel::ReSize() { return RET_OK; } | |||
| int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = std::move(vec); | |||
| return RET_OK; | |||
| } | |||
| int GatherOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| auto param = reinterpret_cast<GatherParameter *>(this->op_parameter_); | |||
| @@ -25,25 +25,19 @@ namespace mindspore::kernel { | |||
| class GatherOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit GatherOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs), indices_data_(nullptr) {} | |||
| GatherOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~GatherOpenCLKernel() override{}; | |||
| ~GatherOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int InitBuffer(); | |||
| int InitBuffer() override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| int32_t *indices_data_; | |||
| int32_t *indices_data_{nullptr}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif | |||
| @@ -31,8 +31,7 @@ using mindspore::schema::PrimitiveType_MatMul; | |||
| namespace mindspore::kernel { | |||
| int MatMulOpenCLKernel::Init() { | |||
| std::string kernel_name = "MatMul"; | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| std::string kernel_name = "MatMul_NHWC4"; | |||
| auto param = reinterpret_cast<MatMulParameter *>(op_parameter_); | |||
| transposeA = param->a_transpose_; | |||
| if (transposeA) { | |||
| @@ -64,16 +63,10 @@ int MatMulOpenCLKernel::Init() { | |||
| #endif | |||
| PadWeight(); | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int MatMulOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } | |||
| void MatMulOpenCLKernel::PadWeight() { | |||
| // ABMCI @ ABCICO = ABMCO | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| @@ -137,39 +130,6 @@ void MatMulOpenCLKernel::PadWeight() { | |||
| allocator->UnmapBuffer(padWeight_); | |||
| } | |||
| int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| auto out_shape = out_tensors_[0]->shape(); | |||
| int n = 1, h = 1, w = 1, c = 1; | |||
| if (dims == 2) { | |||
| n = out_shape[0]; | |||
| c = out_shape[1]; | |||
| } else if (dims == 4) { | |||
| n = out_shape[0]; | |||
| h = out_shape[1]; | |||
| w = out_shape[2]; | |||
| c = out_shape[3]; | |||
| } | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| im_dst_x = w * UP_DIV(c, C4NUM); | |||
| im_dst_y = n * h; | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| im_dst_x = w; | |||
| im_dst_y = n * UP_DIV(c, C4NUM) * h; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int MatMulOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| // local size should less than MAX_GROUP_SIZE | |||
| @@ -26,27 +26,26 @@ namespace mindspore::kernel { | |||
| class MatMulOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit MatMulOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs), inShape(MAX_DIMS, 1), outShape(MAX_DIMS, 1) {} | |||
| ~MatMulOpenCLKernel() override{}; | |||
| MatMulOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~MatMulOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| void PadWeight(); | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| void PadWeight(); | |||
| cl::Kernel kernel_; | |||
| void *padWeight_; | |||
| void *padWeight_{nullptr}; | |||
| bool enable_fp16_{false}; | |||
| bool transposeA{false}; | |||
| bool transposeB{true}; | |||
| int dims; | |||
| static constexpr int MAX_DIMS = 4; // max supported matmul dims | |||
| std::vector<int> inShape; | |||
| std::vector<int> outShape; | |||
| int dims{}; | |||
| static constexpr int MAX_DIMS{4}; // max supported matmul dims | |||
| std::vector<int> inShape{std::vector<int>(MAX_DIMS, 1)}; | |||
| std::vector<int> outShape{std::vector<int>(MAX_DIMS, 1)}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -30,10 +30,6 @@ using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PaddingMode_CONSTANT; | |||
| using mindspore::schema::PrimitiveType_Pad; | |||
| using mindspore::schema::Format::Format_NC4HW4; | |||
| using mindspore::schema::Format::Format_NCHW; | |||
| using mindspore::schema::Format::Format_NHWC; | |||
| using mindspore::schema::Format::Format_NHWC4; | |||
| namespace mindspore::kernel { | |||
| @@ -41,9 +37,6 @@ int PadOpenCLKernel::Init() { | |||
| auto param = reinterpret_cast<PadParameter *>(op_parameter_); | |||
| std::set<std::string> build_options; | |||
| if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!"; | |||
| } | |||
| if (in_tensors_.empty()) { | |||
| MS_LOG(ERROR) << "PadOpenCLKernel in_tensors is empty"; | |||
| return RET_ERROR; | |||
| @@ -63,10 +56,6 @@ int PadOpenCLKernel::Init() { | |||
| auto input_tensor = in_tensors_[0]; | |||
| auto output_tensor = out_tensors_[0]; | |||
| in_ori_format_ = input_tensor->GetFormat(); | |||
| out_ori_format_ = output_tensor->GetFormat(); | |||
| input_tensor->SetFormat(op_format_); | |||
| output_tensor->SetFormat(op_format_); | |||
| CI_ = input_tensor->Channel(); | |||
| IH_ = input_tensor->Height(); | |||
| @@ -78,8 +67,8 @@ int PadOpenCLKernel::Init() { | |||
| CO_SLICES_ = UP_DIV(CO_, C4NUM); | |||
| const std::string source = pad_source; | |||
| const std::string kernel_name = op_format_ == Format_NHWC4 ? "Pad_NHWC4" : "Pad_NC4HW4"; | |||
| const std::string program_name = "Pad"; | |||
| const std::string kernel_name = "Pad_NHWC4"; | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| @@ -87,30 +76,6 @@ int PadOpenCLKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int PadOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == Format_NHWC4) { | |||
| if (OW_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { | |||
| { | |||
| im_dst_x = OW_ * CO_SLICES_; | |||
| im_dst_y = OH_; | |||
| } | |||
| } else { | |||
| im_dst_x = OH_ * CO_SLICES_; | |||
| im_dst_y = OW_; | |||
| } | |||
| } else { | |||
| im_dst_y = OH_ * CO_SLICES_; | |||
| im_dst_x = OW_; | |||
| } | |||
| size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; | |||
| img_size->clear(); | |||
| img_size->push_back(im_dst_x); | |||
| img_size->push_back(im_dst_y); | |||
| img_size->push_back(img_dtype); | |||
| return RET_OK; | |||
| } | |||
| int PadOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| @@ -28,16 +28,16 @@ namespace mindspore::kernel { | |||
| class PadOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit PadOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| PadOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~PadOpenCLKernel() override{}; | |||
| ~PadOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| int CI_{}; | |||
| int IH_{}; | |||
| int IW_{}; | |||
| @@ -46,7 +46,6 @@ class PadOpenCLKernel : public OpenCLKernel { | |||
| int OW_{}; | |||
| int CI_SLICES_{}; | |||
| int CO_SLICES_{}; | |||
| cl::Kernel kernel_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -72,7 +72,7 @@ int PoolingOpenCLKernel::Init() { | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| kernel_name += "_NHWC4"; | |||
| if (out_mem_type_ == OpenCLMemType::BUF) { | |||
| MS_LOG(ERROR) << "buffer output not support yet."; | |||
| return mindspore::lite::RET_ERROR; | |||
| @@ -83,10 +83,6 @@ int PoolingOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return mindspore::lite::RET_OK; | |||
| @@ -100,36 +96,6 @@ std::vector<size_t> PoolingOpenCLKernel::InitGlobalSize() const { | |||
| return global; | |||
| } | |||
| int PoolingOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| int n = out_tensors_[0]->shape()[0]; | |||
| int h = out_tensors_[0]->shape()[1]; | |||
| int w = out_tensors_[0]->shape()[2]; | |||
| int c = out_tensors_[0]->shape()[3]; | |||
| if (op_format_ == schema::Format::Format_NHWC4) { | |||
| im_dst_x = w * UP_DIV(c, C4NUM); | |||
| im_dst_y = n * h; | |||
| } else if (op_format_ == schema::Format::Format_NC4HW4) { | |||
| im_dst_x = w; | |||
| im_dst_y = n * UP_DIV(c, C4NUM) * h; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int PoolingOpenCLKernel::InitBuffer() { return mindspore::lite::RET_OK; } | |||
| int PoolingOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } | |||
| int PoolingOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| @@ -26,18 +26,13 @@ namespace mindspore::kernel { | |||
| class PoolingOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit PoolingOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) { | |||
| parameter_ = reinterpret_cast<PoolingParameter *>(parameter); | |||
| } | |||
| ~PoolingOpenCLKernel() override{}; | |||
| PoolingOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs), parameter_(reinterpret_cast<PoolingParameter *>(parameter)) {} | |||
| ~PoolingOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitBuffer(); | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| std::vector<size_t> InitGlobalSize() const; | |||
| @@ -33,7 +33,7 @@ using mindspore::schema::PrimitiveType_PReLU; | |||
| namespace mindspore::kernel { | |||
| void PReluOpenCLKernel::InitBuffer() { | |||
| int PReluOpenCLKernel::InitBuffer() { | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| auto weight_tensor = in_tensors_[1]; | |||
| if (weight_is_scalar) { | |||
| @@ -71,6 +71,7 @@ void PReluOpenCLKernel::InitBuffer() { | |||
| } | |||
| allocator->UnmapBuffer(weight_vector_); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int PReluOpenCLKernel::Init() { | |||
| @@ -84,10 +85,6 @@ int PReluOpenCLKernel::Init() { | |||
| C_ = input_tensor->Channel(); | |||
| H_ = input_tensor->Height(); | |||
| W_ = input_tensor->Width(); | |||
| if (input_tensor->GetFormat() != schema::Format_NC4HW4 && input_tensor->GetFormat() != schema::Format_NHWC4) { | |||
| MS_LOG(ERROR) << "PRelu only support Format_NC4HW4 and Format_NHWC4"; | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| if (batch_size_ != 1) { | |||
| MS_LOG(ERROR) << "Init PRelu kernel failed: Unsupported multi-batch."; | |||
| return RET_ERROR; | |||
| @@ -104,12 +101,7 @@ int PReluOpenCLKernel::Init() { | |||
| MS_LOG(ERROR) << "PRelu weight must be float32 or float16"; | |||
| return RET_ERROR; | |||
| } | |||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| in_ori_format_ = input_tensor->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| input_tensor->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| std::set<std::string> build_options; | |||
| std::string source = prelu_source; | |||
| @@ -137,11 +129,7 @@ int PReluOpenCLKernel::Run() { | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape); | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); | |||
| } else { // Format_NC4HW4 = 100 | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 100); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); | |||
| std::vector<size_t> local = {4, 4, 1}; | |||
| std::vector<size_t> global = {static_cast<size_t>(H_), static_cast<size_t>(W_), static_cast<size_t>(CO_SLICES_)}; | |||
| @@ -153,31 +141,6 @@ int PReluOpenCLKernel::Run() { | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int PReluOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| auto CO_SLICES_ = UP_DIV(C_, C4NUM); | |||
| if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) { | |||
| if (W_ * CO_SLICES_ <= MAX_IMAGE2D_SIZE) { | |||
| { | |||
| im_dst_y = batch_size_ * H_; | |||
| im_dst_x = W_ * CO_SLICES_; | |||
| } | |||
| } else { | |||
| im_dst_y = W_; | |||
| im_dst_x = batch_size_ * H_ * CO_SLICES_; | |||
| } | |||
| } else { | |||
| im_dst_y = batch_size_ * CO_SLICES_ * H_; | |||
| im_dst_x = W_; | |||
| } | |||
| size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; | |||
| img_size->clear(); | |||
| img_size->push_back(im_dst_x); | |||
| img_size->push_back(im_dst_y); | |||
| img_size->push_back(img_dtype); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenCLPReluKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| @@ -27,15 +27,14 @@ namespace mindspore::kernel { | |||
| class PReluOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit PReluOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| PReluOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~PReluOpenCLKernel() override{}; | |||
| ~PReluOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| void InitBuffer(); | |||
| int InitBuffer() override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| @@ -57,7 +57,7 @@ int ReduceOpenCLKernel::Init() { | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| std::string kernel_name = reduce_type2str.at(reduce_param->mode_); | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| kernel_name += "_NHWC4"; | |||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { | |||
| @@ -74,10 +74,6 @@ int ReduceOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| @@ -97,31 +93,6 @@ void ReduceOpenCLKernel::InitNHWCShape() { | |||
| nhwc_shape_ = {n, h, w, c}; | |||
| } | |||
| int ReduceOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } | |||
| int ReduceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| im_dst_x = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); | |||
| im_dst_y = nhwc_shape_[0] * nhwc_shape_[1]; | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| im_dst_x = nhwc_shape_[2]; | |||
| im_dst_y = nhwc_shape_[0] * UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1]; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int ReduceOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| std::vector<int> shapex = in_tensors_[0]->shape(); | |||
| @@ -26,15 +26,13 @@ | |||
| namespace mindspore::kernel { | |||
| class ReduceOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ReduceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| ReduceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ReduceOpenCLKernel() override{}; | |||
| ~ReduceOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| void InitNHWCShape(); | |||
| private: | |||
| @@ -31,25 +31,11 @@ using mindspore::schema::PrimitiveType_Squeeze; | |||
| namespace mindspore::kernel { | |||
| int ReshapeOpenCLKernel::Init() { | |||
| std::string kernel_name = "reshape"; | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| std::string kernel_name = "reshape_NHWC4"; | |||
| if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) { | |||
| MS_LOG(ERROR) << "Reshape output size should in 2,4"; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_[0]->shape().size() == 2) { | |||
| inShape = {in_tensors_[0]->shape()[0], 1, 1, in_tensors_[0]->shape()[1]}; | |||
| } else { | |||
| inShape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], | |||
| in_tensors_[0]->shape()[3]}; | |||
| } | |||
| if (out_tensors_[0]->shape().size() == 2) { | |||
| outShape = {out_tensors_[0]->shape()[0], 1, 1, out_tensors_[0]->shape()[1]}; | |||
| } else { | |||
| outShape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2], | |||
| out_tensors_[0]->shape()[3]}; | |||
| } | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| @@ -59,55 +45,20 @@ int ReshapeOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| int ReshapeOpenCLKernel::ReSize() { return RET_OK; } | |||
| int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| int n = outShape[0]; | |||
| int h = outShape[1]; | |||
| int w = outShape[2]; | |||
| int c = outShape[3]; | |||
| if (img_size_.size() == OpenCLImageSizeIndex::IDX_NUM) { | |||
| *img_size = img_size_; | |||
| return RET_OK; | |||
| } | |||
| if (op_format_ == schema::Format::Format_NHWC4) { | |||
| im_dst_x = w * UP_DIV(c, C4NUM); | |||
| im_dst_y = n * h; | |||
| } else if (op_format_ == schema::Format::Format_NC4HW4) { | |||
| im_dst_x = w; | |||
| im_dst_y = n * UP_DIV(c, C4NUM) * h; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| img_size_ = vec; | |||
| return RET_OK; | |||
| } | |||
| int ReshapeOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| auto in = Image2DInfo(in_tensors_.front()); | |||
| auto out = Image2DInfo(out_tensors_.front()); | |||
| std::vector<size_t> local = {}; | |||
| std::vector<size_t> global{img_size_[0], img_size_[1]}; | |||
| cl_int4 src_size = {inShape[3], inShape[2], inShape[1], inShape[0]}; | |||
| cl_int4 dst_size = {static_cast<cl_int>(img_size_[0]), static_cast<cl_int>(img_size_[1]), outShape[3], | |||
| outShape[3] * outShape[2]}; | |||
| std::vector<size_t> global{out.width, out.height}; | |||
| cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)}; | |||
| cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)}; | |||
| int arg_idx = 0; | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||
| @@ -25,21 +25,16 @@ | |||
| namespace mindspore::kernel { | |||
| class ReshapeOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ReshapeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| ReshapeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ReshapeOpenCLKernel() override{}; | |||
| ~ReshapeOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| bool enable_fp16_{false}; | |||
| std::vector<int> inShape; | |||
| std::vector<int> outShape; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -54,7 +54,7 @@ int ResizeOpenCLKernel::Init() { | |||
| MS_LOG(ERROR) << "unsupported resize method:" << resize_param->method_; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| kernel_name += "_NHWC4"; | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| @@ -64,39 +64,10 @@ int ResizeOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| int ResizeOpenCLKernel::ReSize() { return RET_OK; } | |||
| int ResizeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| auto nhwc_shape_ = out_tensors_[0]->shape(); | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| im_dst_x = nhwc_shape_[2] * UP_DIV(nhwc_shape_[3], C4NUM); | |||
| im_dst_y = nhwc_shape_[0] * nhwc_shape_[1]; | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| im_dst_x = nhwc_shape_[2]; | |||
| im_dst_y = nhwc_shape_[0] * UP_DIV(nhwc_shape_[3], C4NUM) * nhwc_shape_[1]; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (ocl_runtime_->GetFp16Enable()) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size) { | |||
| return input_size > 1 && output_size > 1 && alignCorner | |||
| ? static_cast<float>(input_size - 1) / static_cast<float>(output_size - 1) | |||
| @@ -26,21 +26,20 @@ | |||
| namespace mindspore::kernel { | |||
| class ResizeOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ResizeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| ResizeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ResizeOpenCLKernel() override{}; | |||
| ~ResizeOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| float getResizeScaleFactor(int input_size, int output_size); | |||
| private: | |||
| float getResizeScaleFactor(int input_size, int output_size); | |||
| cl::Kernel kernel_; | |||
| bool alignCorner; | |||
| bool preserveAspectRatio; | |||
| bool alignCorner{false}; | |||
| bool preserveAspectRatio{false}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -46,79 +46,19 @@ ScaleOpenCLKernel::~ScaleOpenCLKernel() { | |||
| } | |||
| } | |||
| std::vector<size_t> ScaleOpenCLKernel::InitGlobalSize() const { | |||
| const size_t global_x = out_tensors_[0]->Width(); | |||
| const size_t global_y = out_tensors_[0]->Height(); | |||
| const size_t global_z = UP_ROUND_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| std::vector<size_t> global = {global_x, global_y, global_z}; | |||
| return global; | |||
| } | |||
| void ScaleOpenCLKernel::Image2dGetWorkGroupSize() { | |||
| local_size_ = {16, 16}; | |||
| if (out_tensors_[0]->shape().size() == 2) { | |||
| size_t H = out_tensors_[0]->shape()[0]; | |||
| size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); | |||
| global_size_ = {W, H}; | |||
| return; | |||
| } | |||
| if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { | |||
| size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t W = out_tensors_[0]->Width(); | |||
| global_size_ = {W, H}; | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { | |||
| } else { | |||
| size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); | |||
| size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| global_size_ = {W, H}; | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { | |||
| size_t H = out_tensors_[0]->Batch(); | |||
| size_t W = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| global_size_ = {W, H}; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); | |||
| } | |||
| } | |||
| void ScaleOpenCLKernel::BufferGetWorkGroupSize() { | |||
| uint32_t element_num = out_tensors_[0]->ElementsC4Num(); | |||
| global_size_ = {element_num}; | |||
| } | |||
| int ScaleOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| if (out_tensors_[0]->shape().size() == 2) { | |||
| im_dst_x = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); | |||
| im_dst_y = out_tensors_[0]->shape()[0]; | |||
| } else { | |||
| if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); | |||
| } else if (out_tensors_[0]->GetFormat() == schema::Format_NC4) { | |||
| im_dst_y = out_tensors_[0]->Batch(); | |||
| im_dst_x = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport data format " << out_tensors_[0]->GetFormat(); | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } else if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | |||
| img_dtype = CL_FLOAT; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[0]->data_type(); | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| int ScaleOpenCLKernel::InitBuffer() { | |||
| if (!element_flag_) { | |||
| return RET_OK; | |||
| @@ -127,6 +67,7 @@ int ScaleOpenCLKernel::InitBuffer() { | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| std::vector<size_t> img_size; | |||
| GetImageSize(0, &img_size); | |||
| img_size[2] = in_tensors_[1]->data_type() == kNumberTypeFloat16 ? CL_HALF_FLOAT : CL_FLOAT; | |||
| if (scale_C_flag_) { | |||
| img_size[1] = 1; | |||
| img_size[0] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM); | |||
| @@ -147,57 +88,7 @@ int ScaleOpenCLKernel::InitBuffer() { | |||
| << in_tensors_[0]->data_type(); | |||
| return RET_ERROR; | |||
| } | |||
| } else if (in_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { | |||
| if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) { | |||
| if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | |||
| float *scale = new (std::nothrow) float[pack_weight_size]; | |||
| if (scale == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| float *offset = new (std::nothrow) float[pack_weight_size]; | |||
| if (offset == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed!"; | |||
| delete[] scale; | |||
| return RET_ERROR; | |||
| } | |||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | |||
| PackNHWCToNC4HW4<float, float>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); | |||
| PackNHWCToNC4HW4<float, float>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); | |||
| scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | |||
| offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | |||
| delete[] scale; | |||
| delete[] offset; | |||
| } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | |||
| float16_t *scale = new (std::nothrow) float16_t[pack_weight_size]; | |||
| if (scale == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| float16_t *offset = new (std::nothrow) float16_t[pack_weight_size]; | |||
| if (offset == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed!"; | |||
| delete[] scale; | |||
| return RET_ERROR; | |||
| } | |||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | |||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); | |||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); | |||
| scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | |||
| offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | |||
| delete[] scale; | |||
| delete[] offset; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " | |||
| << in_tensors_[0]->data_type(); | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupport format transpose from " << in_tensors_[1]->GetFormat() << "to " | |||
| << in_tensors_[0]->GetFormat(); | |||
| return RET_ERROR; | |||
| } | |||
| } else if (in_tensors_[0]->GetFormat() == schema::Format_NHWC4) { | |||
| } else if (in_tensors_[0]->GetFormat() == schema::Format_NHWC) { | |||
| if (in_tensors_[1]->GetFormat() == schema::Format_NHWC) { | |||
| if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | |||
| float *scale = new (std::nothrow) float[pack_weight_size]; | |||
| @@ -298,18 +189,6 @@ int ScaleOpenCLKernel::Init() { | |||
| return error_code; | |||
| } | |||
| auto format = op_format_; | |||
| if (out_tensors_[0]->shape().size() == 2) { | |||
| format = schema::Format_NC4; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(format); | |||
| if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) { | |||
| in_tensors_[1]->SetFormat(format); | |||
| in_tensors_[2]->SetFormat(format); | |||
| } | |||
| out_tensors_[0]->SetFormat(format); | |||
| Image2dGetWorkGroupSize(); | |||
| InitBuffer(); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| @@ -25,20 +25,17 @@ namespace mindspore::kernel { | |||
| class ScaleOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ScaleOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| ScaleOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ScaleOpenCLKernel() override; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int InitBuffer() override; | |||
| private: | |||
| std::vector<size_t> InitGlobalSize() const; | |||
| void Image2dGetWorkGroupSize(); | |||
| void BufferGetWorkGroupSize(); | |||
| int InitBuffer(); | |||
| cl::Kernel kernel_; | |||
| bool element_flag_{true}; | |||
| @@ -30,55 +30,17 @@ using mindspore::schema::PrimitiveType_Slice; | |||
| namespace mindspore::kernel { | |||
| int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return RET_OK; | |||
| } | |||
| int SliceOpenCLKernel::Init() { | |||
| std::string kernel_name = "slice"; | |||
| auto in_format = op_format_; | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else if (in_format == schema::Format_NHWC4) { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| std::set<std::string> build_options; | |||
| std::string source = slice_source; | |||
| std::string program_name = "slice"; | |||
| std::string kernel_name = "slice_NHWC4"; | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| int SliceOpenCLKernel::ReSize() { return RET_OK; } | |||
| void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) { | |||
| const int max_divider = 8; | |||
| const int max_x = 4, max_y = 8; | |||
| @@ -25,20 +25,16 @@ namespace mindspore::kernel { | |||
| class SliceOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit SliceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| SliceOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~SliceOpenCLKernel() override{}; | |||
| ~SliceOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| }; | |||
| @@ -77,30 +77,6 @@ int SoftmaxOpenCLKernel::SetWorkGroupSize1x1() { | |||
| return lite::RET_OK; | |||
| } | |||
| int SoftmaxOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| auto out_shape = out_tensors_[0]->shape(); | |||
| int n = nhwc_shape_[0], h = nhwc_shape_[1], w = nhwc_shape_[2], c = nhwc_shape_[3]; | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| im_dst_x = w * UP_DIV(c, C4NUM); | |||
| im_dst_y = n * h; | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| im_dst_x = w; | |||
| im_dst_y = n * UP_DIV(c, C4NUM) * h; | |||
| } else { | |||
| MS_LOG(ERROR) << "not support op format:" << EnumNameFormat(op_format_); | |||
| return mindspore::lite::RET_ERROR; | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int SoftmaxOpenCLKernel::Init() { | |||
| std::string kernel_name = "SoftMax"; | |||
| std::string program_name = "SoftMax"; | |||
| @@ -131,7 +107,7 @@ int SoftmaxOpenCLKernel::Init() { | |||
| onexone_flag_ = false; | |||
| kernel_name += "Axis" + std::to_string(axis_); | |||
| } | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| kernel_name += "_NHWC4"; | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| @@ -139,10 +115,6 @@ int SoftmaxOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return lite::RET_OK; | |||
| } | |||
| @@ -26,26 +26,22 @@ namespace mindspore::kernel { | |||
| class SoftmaxOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit SoftmaxOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) { | |||
| parameter_ = reinterpret_cast<SoftmaxParameter *>(parameter); | |||
| } | |||
| SoftmaxOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs), parameter_(reinterpret_cast<SoftmaxParameter *>(parameter)) {} | |||
| ~SoftmaxOpenCLKernel() override{}; | |||
| ~SoftmaxOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| int InitGlobalSize(); | |||
| int SetWorkGroupSize1x1(); | |||
| int SetWorkGroupSize(); | |||
| std::vector<float> GetMaskForLastChannel(int channels); | |||
| private: | |||
| cl::Kernel kernel_; | |||
| SoftmaxParameter *parameter_; | |||
| bool onexone_flag_{false}; | |||
| std::vector<size_t> local_size_; | |||
| std::vector<size_t> global_size_; | |||
| @@ -32,18 +32,12 @@ using mindspore::schema::PrimitiveType_SpaceToBatchND; | |||
| namespace mindspore::kernel { | |||
| int SpaceToBatchNDOpenCLKernel::Init() { | |||
| std::string kernel_name = "space_to_batch_nd"; | |||
| auto in_format = op_format_; | |||
| std::string kernel_name = "space_to_batch_nd_NHWC4"; | |||
| if (in_tensors_[0]->shape().size() != 4 && out_tensors_[0]->shape().size() != 4) { | |||
| MS_LOG(ERROR) << "input/output shape size must be 4, actual: " << in_tensors_[0]->shape().size() << ", " | |||
| << out_tensors_[0]->shape().size(); | |||
| return RET_ERROR; | |||
| } | |||
| if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { | |||
| MS_LOG(ERROR) << "input format(" << in_format << ") " | |||
| << "format not support!"; | |||
| return RET_ERROR; | |||
| } | |||
| auto *param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_); | |||
| param->need_paddings_ = (param->paddings_[0] | param->paddings_[1] | param->paddings_[2] | param->paddings_[3]); | |||
| param->padded_in_shape_[kNHWC_N] = in_tensors_[0]->shape().at(kNHWC_N); | |||
| @@ -60,18 +54,10 @@ int SpaceToBatchNDOpenCLKernel::Init() { | |||
| return RET_ERROR; | |||
| } | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| if (in_format == schema::Format_NC4HW4) { | |||
| kernel_name += "_NC4HW4"; | |||
| } else { | |||
| kernel_name += "_NHWC4"; | |||
| } | |||
| std::set<std::string> build_options; | |||
| std::string source = space_to_batch_nd_source; | |||
| std::string program_name = "space_to_batch_nd"; | |||
| @@ -81,28 +67,7 @@ int SpaceToBatchNDOpenCLKernel::Init() { | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| int SpaceToBatchNDOpenCLKernel::InitBuffer() { return RET_OK; } | |||
| int SpaceToBatchNDOpenCLKernel::ReSize() { return RET_OK; } | |||
| int SpaceToBatchNDOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | |||
| size_t im_dst_x, im_dst_y; | |||
| if (in_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4) { | |||
| im_dst_x = out_tensors_[0]->Width() * CO4; | |||
| im_dst_y = out_tensors_[0]->Height() * out_tensors_[0]->Batch(); | |||
| } else { | |||
| im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; | |||
| im_dst_x = out_tensors_[0]->Width(); | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| auto enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = std::move(vec); | |||
| return RET_OK; | |||
| } | |||
| int SpaceToBatchNDOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| auto param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_); | |||
| @@ -25,22 +25,16 @@ namespace mindspore::kernel { | |||
| class SpaceToBatchNDOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit SpaceToBatchNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| SpaceToBatchNDOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~SpaceToBatchNDOpenCLKernel() override{}; | |||
| ~SpaceToBatchNDOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| int InitBuffer(); | |||
| private: | |||
| cl::Kernel kernel_; | |||
| }; | |||
| @@ -84,14 +84,6 @@ int ToFormatOpenCLKernel::InitNHWC() { | |||
| return RET_OK; | |||
| } | |||
| int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t img_height = N_ * H_; | |||
| size_t img_width = W_ * UP_DIV(C_, C4NUM); | |||
| size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; | |||
| *img_size = {img_width, img_height, img_dtype}; | |||
| return RET_OK; | |||
| } | |||
| int ToFormatOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| std::vector<size_t> global = {N_ * H_, W_, UP_DIV(C_, C4NUM)}; | |||
| @@ -25,15 +25,13 @@ | |||
| namespace mindspore::kernel { | |||
| class ToFormatOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit ToFormatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| ToFormatOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~ToFormatOpenCLKernel() override{}; | |||
| ~ToFormatOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override { return mindspore::lite::RET_OK; }; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| int InitNHWC(); | |||
| @@ -57,7 +57,8 @@ int TransposeOpenCLKernel::Init() { | |||
| // just for input | |||
| kernel_name += "_oversize"; | |||
| } | |||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | |||
| kernel_name += "_NHWC4"; | |||
| #ifdef PROGRAM_WITH_IL | |||
| kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); | |||
| #else | |||
| @@ -67,37 +68,11 @@ int TransposeOpenCLKernel::Init() { | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); | |||
| #endif | |||
| in_ori_format_ = in_tensors_[0]->GetFormat(); | |||
| out_ori_format_ = out_tensors_[0]->GetFormat(); | |||
| in_tensors_[0]->SetFormat(op_format_); | |||
| out_tensors_[0]->SetFormat(op_format_); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int TransposeOpenCLKernel::ReSize() { return mindspore::lite::RET_OK; } | |||
| int TransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x = 1, im_dst_y = 1; | |||
| auto out_shape = out_tensors_[0]->shape(); | |||
| if (op_format_ == schema::Format_NHWC4) { | |||
| im_dst_x = out_shape[2] * UP_DIV(out_shape[3], C4NUM); // W * C4 | |||
| im_dst_y = out_shape[0] * out_shape[1]; // N * H | |||
| } else if (op_format_ == schema::Format_NC4HW4) { | |||
| im_dst_x = out_shape[2]; // W | |||
| im_dst_y = out_shape[0] * UP_DIV(out_shape[3], C4NUM) * out_shape[1]; // N * C4 * H | |||
| } | |||
| size_t img_dtype = CL_FLOAT; | |||
| if (enable_fp16_) { | |||
| img_dtype = CL_HALF_FLOAT; | |||
| } | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int TransposeOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running!"; | |||
| std::vector<int> shapex = out_tensors_[0]->shape(); | |||
| @@ -108,12 +83,11 @@ int TransposeOpenCLKernel::Run() { | |||
| size_t c4 = UP_DIV(c, 4); | |||
| std::vector<size_t> local = {}; | |||
| std::vector<size_t> global; | |||
| if (type == TransposeType::AXIS0312) { | |||
| if (type == TransposeType::AXIS0312) { // NHWC -> NCHW | |||
| global = {UP_DIV(h, C4NUM), w, c4}; | |||
| } else if (type == TransposeType::AXIS0231) { | |||
| } else if (type == TransposeType::AXIS0231) { // NCHW -> NHWC | |||
| global = {h, UP_DIV(w, C4NUM), c4}; | |||
| } | |||
| cl_int4 shape = {static_cast<int>(n), static_cast<int>(h), static_cast<int>(w), static_cast<int>(c)}; | |||
| int arg_idx = 0; | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||
| @@ -29,15 +29,13 @@ enum class TransposeType { AXIS0312, AXIS0231 }; | |||
| class TransposeOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| explicit TransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| TransposeOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~TransposeOpenCLKernel() override{}; | |||
| ~TransposeOpenCLKernel() override = default; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) override; | |||
| private: | |||
| cl::Kernel kernel_; | |||
| @@ -22,52 +22,120 @@ | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/opencl/opencl_runtime.h" | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| enum class OpenCLMemType { BUF, IMG }; | |||
| enum OpenCLImageSizeIndex { IDX_X = 0, IDX_Y, IDX_DTYPE, IDX_NUM }; | |||
| struct OpenCLToFormatParameter { | |||
| OpParameter op_parameter; | |||
| OpParameter op_parameter{}; | |||
| schema::Format src_format{schema::Format::Format_NHWC}; | |||
| schema::Format dst_format{schema::Format::Format_NHWC4}; | |||
| OpenCLMemType out_mem_type{OpenCLMemType::IMG}; | |||
| }; | |||
| struct Image2DInfo { | |||
| explicit Image2DInfo(const lite::Tensor *tensor) { | |||
| if (tensor) { | |||
| auto shape = tensor->shape(); | |||
| if (shape.size() == 1) { | |||
| N = shape[0]; | |||
| } else if (shape.size() == 2) { | |||
| N = shape[0]; | |||
| C = shape[1]; | |||
| } else if (shape.size() == 3) { | |||
| N = shape[0]; | |||
| W = shape[1]; | |||
| C = shape[2]; | |||
| } else if (shape.size() == 4) { | |||
| N = shape[0]; | |||
| H = shape[1]; | |||
| W = shape[2]; | |||
| C = shape[3]; | |||
| } else if (shape.size() >= 5) { | |||
| MS_LOG(ERROR) << "GPU dont't support Tensor with dim=" << shape.size(); | |||
| } | |||
| FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float); | |||
| } else { | |||
| FLT_size = sizeof(cl_float); | |||
| } | |||
| FLT4_size = FLT_size * 4; | |||
| Slice = UP_DIV(C, C4NUM); | |||
| if (W * Slice <= MAX_IMAGE2D_SIZE) { | |||
| height = N * H; | |||
| width = W * Slice; | |||
| } else { | |||
| height = W; | |||
| width = N * H * Slice; | |||
| } | |||
| auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); | |||
| int alignment = runtime_wrapper.GetInstance()->GetImagePitchAlignment(); | |||
| row_pitch = (width + alignment - 1) / alignment * alignment * FLT4_size; | |||
| ElementsNum = N * H * W * C; | |||
| ElementsC4Num = N * H * W * Slice * C4NUM; | |||
| OriginSize = ElementsNum * FLT_size; | |||
| Image2DSize = height * width * FLT4_size; | |||
| } | |||
| size_t N{1}; | |||
| size_t H{1}; | |||
| size_t W{1}; | |||
| size_t C{1}; | |||
| size_t Slice{}; | |||
| size_t width{}; | |||
| size_t height{}; | |||
| size_t FLT_size{}; | |||
| size_t FLT4_size{}; | |||
| size_t row_pitch{}; | |||
| size_t ElementsNum{}; | |||
| size_t ElementsC4Num{}; | |||
| size_t OriginSize{}; | |||
| size_t Image2DSize{}; | |||
| }; | |||
| class OpenCLKernel : public LiteKernel { | |||
| public: | |||
| explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { | |||
| ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); | |||
| } | |||
| ~OpenCLKernel() override = default; | |||
| ~OpenCLKernel() {} | |||
| int Init() override { return RET_ERROR; } | |||
| int PreProcess() override { return RET_ERROR; } | |||
| int ReSize() override { return RET_ERROR; } | |||
| int Run() override { return RET_ERROR; } | |||
| virtual int Init() { return mindspore::lite::RET_ERROR; } | |||
| virtual int PreProcess() { return mindspore::lite::RET_ERROR; } | |||
| virtual int InferShape() { return mindspore::lite::RET_ERROR; } | |||
| virtual int ReSize() { return mindspore::lite::RET_ERROR; } | |||
| virtual int Run() { return mindspore::lite::RET_ERROR; } | |||
| virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return mindspore::lite::RET_ERROR; } | |||
| virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return mindspore::lite::RET_ERROR; } | |||
| virtual int InitBuffer() { return RET_OK; } | |||
| virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; } | |||
| virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) { | |||
| return mindspore::lite::RET_ERROR; | |||
| return RET_ERROR; | |||
| } | |||
| int GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| if (idx >= out_tensors_.size()) { | |||
| return RET_ERROR; | |||
| } | |||
| auto img_info = Image2DInfo(out_tensors_[idx]); | |||
| size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; | |||
| *img_size = {img_info.width, img_info.height, img_dtype}; | |||
| return RET_OK; | |||
| } | |||
| OpenCLMemType GetMemType() { return out_mem_type_; } | |||
| void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } | |||
| void SetFormatType(schema::Format format_type) { op_format_ = format_type; } | |||
| schema::Format GetInOriFormat() { return in_ori_format_; } | |||
| schema::Format GetOutOriFormat() { return out_ori_format_; } | |||
| protected: | |||
| lite::opencl::OpenCLRuntime *ocl_runtime_; | |||
| OpenCLMemType out_mem_type_{OpenCLMemType::IMG}; | |||
| schema::Format in_ori_format_{schema::Format::Format_NHWC}; | |||
| schema::Format out_ori_format_{schema::Format::Format_NHWC4}; | |||
| schema::Format op_format_{schema::Format::Format_NHWC4}; | |||
| private: | |||
| lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_; | |||
| lite::opencl::OpenCLRuntime *ocl_runtime_; | |||
| std::vector<size_t> img_size_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -27,7 +27,7 @@ using mindspore::lite::RET_OK; | |||
| SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); } | |||
| int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors, | |||
| const std::vector<std::vector<kernel::LiteKernel *>> in_kernels, | |||
| const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels, | |||
| std::vector<lite::Tensor *> *out_tensors, | |||
| std::vector<OpenCLToFormatParameter *> *out_parameters, | |||
| std::vector<LiteKernel *> *out_convert_ops, OpenCLMemType mem_type) { | |||
| @@ -66,7 +66,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||
| } | |||
| auto dst_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC4 : schema::Format::Format_NHWC; | |||
| auto src_format = (mem_type == OpenCLMemType::IMG) ? schema::Format::Format_NHWC : schema::Format::Format_NHWC4; | |||
| lite::Tensor *new_tensor = new (std::nothrow) lite::Tensor(); | |||
| auto *new_tensor = new (std::nothrow) lite::Tensor(); | |||
| MS_ASSERT(new_tensor); | |||
| if (new_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "SubGraphOpenCLKernel new tensor failed!"; | |||
| @@ -87,8 +87,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||
| desc.data_type = kNumberTypeFloat16; | |||
| new_tensor->set_data_type(kNumberTypeFloat16); | |||
| } | |||
| OpenCLToFormatParameter *parameter = | |||
| static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter))); | |||
| auto *parameter = static_cast<OpenCLToFormatParameter *>(malloc(sizeof(OpenCLToFormatParameter))); | |||
| MS_ASSERT(parameter); | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "SubGraphOpenCLKernel new parameter failed!"; | |||
| @@ -196,11 +195,12 @@ int SubGraphOpenCLKernel::UpdateTensorDataType() { | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int SubGraphOpenCLKernel::MallocTensorWithReuse() { | |||
| kernel::LiteKernelUtil::InitTensorRefCount(nodes_); | |||
| for (auto *kernel : nodes_) { | |||
| MS_ASSERT(nullptr != kernel); | |||
| kernel::OpenCLKernel *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel); | |||
| auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel); | |||
| auto outputs = kernel->out_tensors(); | |||
| for (auto i = 0; i < outputs.size(); ++i) { | |||
| auto *output = outputs.at(i); | |||
| @@ -258,10 +258,10 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor * | |||
| kset.insert(tens.begin(), tens.end()); | |||
| ksets.emplace_back(kset); | |||
| } | |||
| for (size_t i = 0; i < in_tensors.size(); ++i) { | |||
| for (auto in_tensor : in_tensors) { | |||
| std::vector<kernel::LiteKernel *> kvec; | |||
| for (size_t j = 0; j < in_kernels.size(); ++j) { | |||
| if (ksets[j].count(in_tensors[i])) { | |||
| if (ksets[j].count(in_tensor)) { | |||
| kvec.emplace_back(in_kernels[j]); | |||
| } | |||
| } | |||
| @@ -272,21 +272,15 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::Tensor * | |||
| int SubGraphOpenCLKernel::UnInit() { | |||
| for (const auto &tensor : in_convert_tensors_) { | |||
| if (tensor != nullptr) { | |||
| delete tensor; | |||
| } | |||
| delete tensor; | |||
| } | |||
| in_convert_tensors_.clear(); | |||
| for (const auto &tensor : out_convert_tensors_) { | |||
| if (tensor != nullptr) { | |||
| delete tensor; | |||
| } | |||
| delete tensor; | |||
| } | |||
| out_convert_tensors_.clear(); | |||
| for (const auto &op : nodes_) { | |||
| if (op != nullptr) { | |||
| delete op; | |||
| } | |||
| delete op; | |||
| } | |||
| nodes_.clear(); | |||
| in_convert_ops_.clear(); | |||
| @@ -32,10 +32,10 @@ struct SubGraphOpenCLParameter { | |||
| class SubGraphOpenCLKernel : public SubGraphKernel { | |||
| public: | |||
| explicit SubGraphOpenCLKernel(const std::vector<lite::Tensor *> inputs, const std::vector<lite::Tensor *> outputs, | |||
| const std::vector<kernel::LiteKernel *> inKernels, | |||
| const std::vector<kernel::LiteKernel *> outKernels, | |||
| const std::vector<kernel::LiteKernel *> nodes, const lite::InnerContext *ctx = nullptr) | |||
| SubGraphOpenCLKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, | |||
| const std::vector<kernel::LiteKernel *> &inKernels, | |||
| const std::vector<kernel::LiteKernel *> &outKernels, | |||
| const std::vector<kernel::LiteKernel *> &nodes, const lite::InnerContext *ctx = nullptr) | |||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) { | |||
| ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); | |||
| subgraph_type_ = kGpuSubGraph; | |||
| @@ -50,21 +50,19 @@ class SubGraphOpenCLKernel : public SubGraphKernel { | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }; | |||
| int UnInit(); | |||
| protected: | |||
| private: | |||
| int UnInit(); | |||
| int UpdateTensorDataType(); | |||
| int MallocTensorWithReuse(); | |||
| int GenToFormatOp(const std::vector<lite::Tensor *> &in_tensors, | |||
| const std::vector<std::vector<kernel::LiteKernel *>> in_kernels, | |||
| const std::vector<std::vector<kernel::LiteKernel *>> &in_kernels, | |||
| std::vector<lite::Tensor *> *out_tensors, std::vector<OpenCLToFormatParameter *> *out_parameters, | |||
| std::vector<LiteKernel *> *out_convert_ops, OpenCLMemType mem_type); | |||
| int GetKernelFromToTensor(const std::vector<lite::Tensor *> &in_tensors, | |||
| const std::vector<kernel::LiteKernel *> &in_kernels, | |||
| std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, bool is_from); | |||
| private: | |||
| lite::opencl::OpenCLAllocator *allocator_; | |||
| lite::opencl::OpenCLAllocator *allocator_{nullptr}; | |||
| std::vector<lite::Tensor *> in_convert_tensors_; | |||
| std::vector<lite::Tensor *> out_convert_tensors_; | |||
| std::vector<OpenCLToFormatParameter *> in_parameters_; | |||
| @@ -20,6 +20,8 @@ | |||
| #include <vector> | |||
| #include "src/kernel_registry.h" | |||
| #include "src/runtime/opencl/opencl_runtime.h" | |||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | |||
| #include "src/common/file_utils.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| @@ -224,57 +226,77 @@ std::string CLErrorCode(cl_int error_code) { | |||
| } | |||
| } | |||
| void Write2File(void *mem, const std::string &file_name, int size) { | |||
| std::fstream os; | |||
| os.open(file_name, std::ios::out | std::ios::binary); | |||
| os.write(static_cast<char *>(mem), size); | |||
| os.close(); | |||
| int WriteToBin(const std::string &file_path, void *data, size_t size) { | |||
| std::ofstream out_file; | |||
| out_file.open(file_path.c_str(), std::ios::binary); | |||
| if (!out_file.good()) { | |||
| MS_LOG(ERROR) << "file is bad"; | |||
| return -1; | |||
| } | |||
| if (!out_file.is_open()) { | |||
| MS_LOG(ERROR) << "file open failed"; | |||
| return -1; | |||
| } | |||
| out_file.write(reinterpret_cast<char *>(data), size); | |||
| return 0; | |||
| } | |||
| void PrintTensor(lite::Tensor *tensor, int num, const std::string &out_file) { | |||
| void PrintTensor(const lite::Tensor *tensor, OpenCLMemType mem_type, int n, const std::string &out_file) { | |||
| if (tensor->data_c() == nullptr) { | |||
| return; | |||
| } | |||
| Image2DInfo img_info(tensor); | |||
| auto size = mem_type == OpenCLMemType::BUF ? img_info.OriginSize : img_info.Image2DSize; | |||
| std::vector<char> data(size); | |||
| auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); | |||
| auto runtime = runtime_wrapper.GetInstance(); | |||
| auto allocator = runtime->GetAllocator(); | |||
| auto origin_data = tensor->data_c(); | |||
| runtime->SyncCommandQueue(); | |||
| allocator->MapBuffer(origin_data, CL_MAP_READ, nullptr, true); | |||
| allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true); | |||
| if (mem_type == OpenCLMemType::BUF) { | |||
| memcpy(data.data(), tensor->data_c(), img_info.OriginSize); | |||
| } else { | |||
| auto row_size = img_info.width * img_info.FLT4_size; | |||
| for (int i = 0; i < img_info.height; ++i) { | |||
| memcpy(reinterpret_cast<char *>(data.data()) + i * row_size, | |||
| static_cast<char *>(tensor->data_c()) + i * img_info.row_pitch, row_size); | |||
| } | |||
| } | |||
| allocator->UnmapBuffer(tensor->data_c()); | |||
| printf("shape=("); | |||
| auto shape = tensor->shape(); | |||
| auto N = shape.size() > 0 ? shape[0] : 1; | |||
| auto H = shape.size() > 1 ? shape[1] : 1; | |||
| auto W = shape.size() > 2 ? shape[2] : 1; | |||
| auto C = shape.size() > 3 ? shape[3] : 1; | |||
| auto SLICES = UP_DIV(C, C4NUM); | |||
| auto ElementsC4Num = N * H * W * UP_ROUND(C, C4NUM); | |||
| auto alignment = runtime->GetImagePitchAlignment(); | |||
| auto FLT4_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half4) : sizeof(cl_float4); | |||
| auto row_pitch = (W * SLICES + alignment - 1) / alignment * alignment * FLT4_size; | |||
| auto row_size = W * SLICES * FLT4_size; | |||
| std::vector<char> data(N * H * row_size); | |||
| for (int i = 0; i < N * H; ++i) { | |||
| memcpy(static_cast<char *>(data.data()) + i * row_size, static_cast<char *>(origin_data) + i * row_pitch, row_size); | |||
| for (int i = 0; i < shape.size(); ++i) { | |||
| printf("%4d", shape[i]); | |||
| if (i + 1 < shape.size()) { | |||
| printf(","); | |||
| } | |||
| } | |||
| printf(") "); | |||
| std::cout << "shape=("; | |||
| for (auto x : shape) { | |||
| printf("%3d,", x); | |||
| } | |||
| printf("): "); | |||
| for (size_t i = 0; i < num && i < ElementsC4Num; ++i) { | |||
| if (tensor->data_type() == kNumberTypeFloat16) | |||
| printf("%zu %6.3f | ", i, (reinterpret_cast<float16_t *>(data.data()))[i]); | |||
| else | |||
| printf("%zu %6.3f | ", i, (reinterpret_cast<float *>(data.data()))[i]); | |||
| auto num = mem_type == OpenCLMemType::BUF ? img_info.ElementsNum : img_info.ElementsC4Num; | |||
| for (int i = 0; i < n && i < num; ++i) { | |||
| if (tensor->data_type() == kNumberTypeFloat16) { | |||
| printf("%d %7.3f | ", i, reinterpret_cast<float16_t *>(data.data())[i]); | |||
| } else { | |||
| printf("%d %7.3f | ", i, reinterpret_cast<float *>(data.data())[i]); | |||
| } | |||
| } | |||
| printf("\n"); | |||
| if (!out_file.empty()) { | |||
| Write2File(data.data(), out_file, data.size()); | |||
| WriteToBin(out_file, data.data(), data.size()); | |||
| } | |||
| } | |||
| void PrintKernelOutput(OpenCLKernel *kernel, int n, const std::string &out_file) { | |||
| printf("%-30s", kernel->name().c_str()); | |||
| if (!kernel->out_tensors().empty()) { | |||
| PrintTensor(kernel->out_tensors()[0], kernel->GetMemType(), n, out_file); | |||
| } | |||
| allocator->UnmapBuffer(origin_data); | |||
| } | |||
| std::vector<int> GetNHWCShape(const std::vector<int> &tensor_shape) { | |||
| @@ -24,6 +24,8 @@ | |||
| #include "nnacl/op_base.h" | |||
| #include "src/lite_kernel.h" | |||
| #include "src/common/utils.h" | |||
| #include "src/runtime/opencl/opencl_runtime.h" | |||
| #include "src/runtime/kernel/opencl/opencl_kernel.h" | |||
| namespace mindspore::lite { | |||
| kernel::LiteKernel *GetOpenCLKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| @@ -44,9 +46,11 @@ std::vector<size_t> GetCommonLocalSize(const std::vector<size_t> &global, int ma | |||
| std::string CLErrorCode(cl_int error_code); | |||
| void Write2File(void *mem, const std::string &file_name, int size); | |||
| int WriteToBin(const std::string &file_path, void *data, size_t size); | |||
| void PrintTensor(lite::Tensor *tensor, int num = 10, const std::string &out_file = ""); | |||
| void PrintTensor(const lite::Tensor *tensor, OpenCLMemType mem_type, int n = 10, const std::string &out_file = ""); | |||
| void PrintKernelOutput(OpenCLKernel *kernel, int n = 10, const std::string &out_file = ""); | |||
| std::vector<int> GetNHWCShape(const std::vector<int> &tensor_shape); | |||
| @@ -37,7 +37,7 @@ int OpenCLExecutor::Run(std::vector<Tensor *> &inputs, std::vector<Tensor *> &ou | |||
| MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name(); | |||
| } | |||
| } | |||
| kernel::OpenCLKernel *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel); | |||
| auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel); | |||
| auto cur_outputs = kernel->out_tensors(); | |||
| for (auto i = 0; i < cur_outputs.size(); ++i) { | |||
| auto *output = cur_outputs.at(i); | |||
| @@ -23,7 +23,6 @@ | |||
| #include <vector> | |||
| #include <iostream> | |||
| #include "src/common/log_adapter.h" | |||
| #include "src/runtime/opencl/opencl_runtime.h" | |||
| namespace mindspore::lite::opencl { | |||
| @@ -124,7 +124,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||
| param->type_ = ActivationType_RELU; | |||
| auto *kernel = | |||
| new (std::nothrow) kernel::ActivationOpenClKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| kernel->SetFormatType(op_format); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "Kernel:Relu create fail."; | |||
| delete param; | |||
| @@ -241,7 +240,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | |||
| delete output_tensor; | |||
| return; | |||
| } | |||
| kernel->SetFormatType(op_format); | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete param; | |||
| @@ -352,7 +350,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | |||
| delete output_tensor; | |||
| return; | |||
| } | |||
| kernel->SetFormatType(op_format); | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete param; | |||
| @@ -464,7 +461,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||
| delete output_tensor; | |||
| return; | |||
| } | |||
| kernel->SetFormatType(op_format); | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete param; | |||
| @@ -573,7 +569,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||
| delete output_tensor; | |||
| return; | |||
| } | |||
| kernel->SetFormatType(op_format); | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete param; | |||
| @@ -96,7 +96,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { | |||
| delete param; | |||
| return; | |||
| } | |||
| arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| arithmeticself_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -185,7 +184,6 @@ TEST_F(TestArithmeticSelfOpenCLCI, ArithmeticSelfRound) { | |||
| delete param; | |||
| return; | |||
| } | |||
| arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| arithmeticself_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -280,7 +278,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfdim2Fp16) { | |||
| delete param; | |||
| return; | |||
| } | |||
| arithmeticself_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| arithmeticself_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -142,7 +142,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||
| delete param; | |||
| return; | |||
| } | |||
| biasadd_kernel->SetFormatType(op_format); | |||
| auto ret = biasadd_kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "biasadd kernel init error."; | |||
| @@ -84,7 +84,6 @@ TEST_F(TestCastSelfOpenCL, Castfp32tofp16) { | |||
| delete param; | |||
| return; | |||
| } | |||
| cast_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| cast_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -171,7 +170,6 @@ TEST_F(TestCastSelfOpenCL, Castfp16tofp32) { | |||
| delete param; | |||
| return; | |||
| } | |||
| cast_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| cast_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -103,7 +103,6 @@ TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) { | |||
| delete param; | |||
| return; | |||
| } | |||
| concat_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| concat_kernel->Init(); | |||
| // to do allocate memory for inputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -220,7 +219,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_4input_dim4_axis1) { | |||
| delete param; | |||
| return; | |||
| } | |||
| concat_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| concat_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -344,7 +342,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_3input_dim4_axis1) { | |||
| delete param; | |||
| return; | |||
| } | |||
| concat_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| concat_kernel->Init(); | |||
| // to do allocate memory for inputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -473,7 +470,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_6input_dim4_axis1) { | |||
| delete param; | |||
| return; | |||
| } | |||
| concat_kernel->SetFormatType(schema::Format_NC4HW4); | |||
| concat_kernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| for (auto &input_tensor : inputs) { | |||
| @@ -170,7 +170,6 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma | |||
| std::vector<lite::Tensor *> inputs{&input, &weight, &bias}; | |||
| std::vector<lite::Tensor *> outputs{&output}; | |||
| auto kernel = std::make_unique<ConvolutionOpenCLKernel>(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||
| kernel->SetFormatType(op_format); | |||
| kernel->Init(); | |||
| MS_LOG(DEBUG) << "create SubGraph"; | |||
| @@ -101,7 +101,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat | |||
| delete[] packed_input; | |||
| return; | |||
| } | |||
| pKernel->SetFormatType(format); | |||
| pKernel->Init(); | |||
| std::vector<kernel::LiteKernel *> kernels{pKernel.release()}; | |||
| @@ -55,7 +55,6 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector<in | |||
| MS_LOG(INFO) << "new GatherOpenCLKernel failed "; | |||
| return; | |||
| } | |||
| pkernel->SetFormatType(schema::Format_NC4HW4); | |||
| pkernel->Init(); | |||
| // to do allocate memory for inputs and outputs | |||
| @@ -60,7 +60,6 @@ void TEST_MAIN(PadParameter *param, Format input_format, Format output_format, F | |||
| if (kernel == nullptr) { | |||
| return; | |||
| } | |||
| kernel->SetFormatType(op_format); | |||
| kernel->Init(); | |||
| MS_LOG(DEBUG) << "create SubGraph"; | |||
| @@ -141,7 +141,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||
| delete param; | |||
| return; | |||
| } | |||
| prelu_kernel->SetFormatType(op_format); | |||
| auto ret = prelu_kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init prelu kernel error"; | |||