|
|
|
@@ -48,19 +48,12 @@ ScaleOpenCLKernel::~ScaleOpenCLKernel() { |
|
|
|
|
|
|
|
void ScaleOpenCLKernel::Image2dGetWorkGroupSize() { |
|
|
|
local_size_ = {16, 16}; |
|
|
|
if (out_tensors_[0]->shape().size() == 2) { |
|
|
|
size_t H = out_tensors_[0]->shape()[0]; |
|
|
|
size_t W = UP_DIV(out_tensors_[0]->shape()[1], C4NUM); |
|
|
|
global_size_ = {W, H}; |
|
|
|
} else { |
|
|
|
size_t H = out_tensors_[0]->Batch() * out_tensors_[0]->Height(); |
|
|
|
size_t W = out_tensors_[0]->Width() * UP_DIV(out_tensors_[0]->Channel(), C4NUM); |
|
|
|
global_size_ = {W, H}; |
|
|
|
} |
|
|
|
auto image2d_info = Image2DInfo(out_tensors_[0]); |
|
|
|
global_size_ = {image2d_info.width, image2d_info.height}; |
|
|
|
} |
|
|
|
|
|
|
|
int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
if (!element_flag_) { |
|
|
|
if (!weight_vector_flag_) { |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
if (in_tensors_[1]->IsConst()) { |
|
|
|
@@ -68,17 +61,18 @@ int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
std::vector<size_t> img_size; |
|
|
|
GetImageSize(0, &img_size); |
|
|
|
img_size[2] = in_tensors_[1]->data_type() == kNumberTypeFloat16 ? CL_HALF_FLOAT : CL_FLOAT; |
|
|
|
if (scale_C_flag_) { |
|
|
|
if (broadcast_flag_) { |
|
|
|
img_size[1] = 1; |
|
|
|
img_size[0] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM); |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = allocator->CreateImageFromHost(in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
int pack_weight_size = in_tensors_[1]->ElementsC4Num(); |
|
|
|
int plane = in_tensors_[1]->Height() * in_tensors_[1]->Width(); |
|
|
|
int channel = in_tensors_[1]->Channel(); |
|
|
|
int batch = in_tensors_[1]->Batch(); |
|
|
|
auto image2d_info = Image2DInfo(in_tensors_[1]); |
|
|
|
int pack_weight_size = image2d_info.ElementsC4Num; |
|
|
|
int plane = image2d_info.H * image2d_info.W; |
|
|
|
int channel = image2d_info.C; |
|
|
|
int batch = image2d_info.N; |
|
|
|
if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { |
|
|
|
if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
@@ -157,16 +151,27 @@ int ScaleOpenCLKernel::Init() { |
|
|
|
} |
|
|
|
if (scale_shape.size() != in_shape.size()) { |
|
|
|
if (scale_tensor->ElementsNum() == 1) { |
|
|
|
element_flag_ = false; |
|
|
|
weight_vector_flag_ = false; |
|
|
|
kernel_name = "BoardcastScale"; |
|
|
|
} else if (((in_shape.size() == 4 && axis_ == 3) || (in_shape.size() == 2 && axis_ == 1)) && |
|
|
|
scale_shape.size() == 1) { |
|
|
|
element_flag_ = true; |
|
|
|
scale_C_flag_ = true; |
|
|
|
kernel_name = "Scale_C"; |
|
|
|
} else if (scale_shape.size() == 1) { |
|
|
|
weight_vector_flag_ = true; |
|
|
|
broadcast_flag_ = true; |
|
|
|
if ((in_shape.size() == 4 && axis_ == 3) || (in_shape.size() == 2 && axis_ == 1)) { |
|
|
|
kernel_name = "Scale_C"; |
|
|
|
} else if (in_shape.size() == 4 && axis_ == 1) { |
|
|
|
kernel_name = "Scale_H"; |
|
|
|
broadcast_H_flag_ = true; |
|
|
|
} else { |
|
|
|
MS_LOG(ERROR) << "unsupported scale axis " << axis_; |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
} else { |
|
|
|
MS_LOG(ERROR) << "unsupported scale axis " << axis_ << ", in shape " << in_shape << ", scale shape" |
|
|
|
<< scale_shape; |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
} else { |
|
|
|
element_flag_ = true; |
|
|
|
weight_vector_flag_ = true; |
|
|
|
kernel_name = "Scale"; |
|
|
|
} |
|
|
|
lite::STATUS error_code; |
|
|
|
@@ -206,7 +211,7 @@ int ScaleOpenCLKernel::Run() { |
|
|
|
|
|
|
|
int arg_idx = 0; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); |
|
|
|
if (element_flag_) { |
|
|
|
if (weight_vector_flag_) { |
|
|
|
void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_; |
|
|
|
void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); |
|
|
|
@@ -230,8 +235,12 @@ int ScaleOpenCLKernel::Run() { |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); |
|
|
|
cl_int2 output_shape{static_cast<int>(global_size_[0]), static_cast<int>(global_size_[1])}; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); |
|
|
|
if (element_flag_ && scale_C_flag_) { |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)); |
|
|
|
if (weight_vector_flag_ && broadcast_flag_) { |
|
|
|
if (broadcast_H_flag_) { |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]); |
|
|
|
} else { |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)); |
|
|
|
} |
|
|
|
} |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, act_type); |
|
|
|
ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); |
|
|
|
|