|
|
|
@@ -110,15 +110,17 @@ int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
if (!element_flag_) { |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
if (in_tensors_[1]->TensorType() == schema::NodeType_ValueNode && in_tensors_[1]->Data() != nullptr) { |
|
|
|
if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->MutableData() != nullptr) { |
|
|
|
auto allocator = ocl_runtime_->GetAllocator(); |
|
|
|
std::vector<size_t> img_size; |
|
|
|
GetImageSize(0, &img_size); |
|
|
|
if (in_tensors_[1]->shape().size() == 1 && axis_ == 3) { |
|
|
|
img_size[0] = 1; |
|
|
|
img_size[1] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM); |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->Data(), in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = allocator->CreateImageFromHost(in_tensors_[2]->Data(), in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
scale_ptr_ = |
|
|
|
allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = |
|
|
|
allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
int pack_weight_size = in_tensors_[1]->ElementsC4Num(); |
|
|
|
@@ -127,8 +129,10 @@ int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
int batch = in_tensors_[1]->Batch(); |
|
|
|
if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { |
|
|
|
if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(in_tensors_[1]->Data(), in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = allocator->CreateImageFromHost(in_tensors_[2]->Data(), in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
scale_ptr_ = |
|
|
|
allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = |
|
|
|
allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
} else { |
|
|
|
MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " |
|
|
|
<< in_tensors_[0]->data_type(); |
|
|
|
@@ -149,8 +153,8 @@ int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; }; |
|
|
|
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNC4HW4<float, float>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNC4HW4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNC4HW4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
delete[] scale; |
|
|
|
@@ -168,8 +172,8 @@ int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
std::function<int16_t(float)> to_dtype = Float32ToShort; |
|
|
|
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNC4HW4<float, int16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
delete[] scale; |
|
|
|
@@ -199,8 +203,8 @@ int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
std::function<float(float)> to_dtype = [](float x) -> float { return (float)x; }; |
|
|
|
PackNHWCToNHWC4<float, float>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNHWC4<float, float>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNHWC4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNHWC4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
delete[] scale; |
|
|
|
@@ -218,8 +222,8 @@ int ScaleOpenCLKernel::InitBuffer() { |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
std::function<int16_t(float)> to_dtype = Float32ToShort; |
|
|
|
PackNHWCToNHWC4<float, int16_t>(in_tensors_[1]->Data(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNHWC4<float, int16_t>(in_tensors_[2]->Data(), offset, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNHWC4<float, int16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); |
|
|
|
PackNHWCToNHWC4<float, int16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); |
|
|
|
scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); |
|
|
|
offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); |
|
|
|
delete[] scale; |
|
|
|
@@ -291,7 +295,7 @@ int ScaleOpenCLKernel::Init() { |
|
|
|
in_ori_format_ = in_tensors_[0]->GetFormat(); |
|
|
|
out_ori_format_ = out_tensors_[0]->GetFormat(); |
|
|
|
in_tensors_[0]->SetFormat(format); |
|
|
|
if (element_flag_ && in_tensors_[1]->TensorType() != schema::NodeType_ValueNode) { |
|
|
|
if (element_flag_ && in_tensors_[1]->category() != lite::Tensor::Category::CONST) { |
|
|
|
in_tensors_[1]->SetFormat(format); |
|
|
|
in_tensors_[2]->SetFormat(format); |
|
|
|
} |
|
|
|
@@ -305,27 +309,27 @@ int ScaleOpenCLKernel::Run() { |
|
|
|
MS_LOG(DEBUG) << this->name() << " Running!"; |
|
|
|
|
|
|
|
int arg_idx = 0; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data()); |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); |
|
|
|
if (element_flag_) { |
|
|
|
void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->Data() : scale_ptr_; |
|
|
|
void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->Data() : offset_ptr_; |
|
|
|
void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->MutableData() : scale_ptr_; |
|
|
|
void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->MutableData() : offset_ptr_; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); |
|
|
|
} else { |
|
|
|
if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { |
|
|
|
float scale = static_cast<float *>(in_tensors_[1]->Data())[0]; |
|
|
|
float offset = static_cast<float *>(in_tensors_[2]->Data())[0]; |
|
|
|
float scale = static_cast<float *>(in_tensors_[1]->MutableData())[0]; |
|
|
|
float offset = static_cast<float *>(in_tensors_[2]->MutableData())[0]; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); |
|
|
|
} else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { |
|
|
|
if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { |
|
|
|
float scale = static_cast<float *>(in_tensors_[1]->Data())[0]; |
|
|
|
float offset = static_cast<float *>(in_tensors_[2]->Data())[0]; |
|
|
|
float scale = static_cast<float *>(in_tensors_[1]->MutableData())[0]; |
|
|
|
float offset = static_cast<float *>(in_tensors_[2]->MutableData())[0]; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); |
|
|
|
} else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { |
|
|
|
int16_t scale = static_cast<int16_t *>(in_tensors_[1]->Data())[0]; |
|
|
|
int16_t offset = static_cast<int16_t *>(in_tensors_[2]->Data())[0]; |
|
|
|
int16_t scale = static_cast<int16_t *>(in_tensors_[1]->MutableData())[0]; |
|
|
|
int16_t offset = static_cast<int16_t *>(in_tensors_[2]->MutableData())[0]; |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); |
|
|
|
} else { |
|
|
|
@@ -334,7 +338,7 @@ int ScaleOpenCLKernel::Run() { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data()); |
|
|
|
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); |
|
|
|
int H = 0; |
|
|
|
int W = 0; |
|
|
|
if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { |
|
|
|
@@ -359,10 +363,9 @@ int ScaleOpenCLKernel::Run() { |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
kernel::LiteKernel *OpenCLScaleKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, |
|
|
|
const std::vector<lite::tensor::Tensor *> &outputs, |
|
|
|
OpParameter *opParameter, const lite::Context *ctx, |
|
|
|
const kernel::KernelKey &desc, |
|
|
|
kernel::LiteKernel *OpenCLScaleKernelCreator(const std::vector<lite::Tensor *> &inputs, |
|
|
|
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, |
|
|
|
const lite::Context *ctx, const kernel::KernelKey &desc, |
|
|
|
const mindspore::lite::PrimitiveC *primitive) { |
|
|
|
auto *kernel = |
|
|
|
new (std::nothrow) ScaleOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx); |
|
|
|
|