Browse Source

fixed opencl program stuck for some device

tags/v1.0.0
Corleone 5 years ago
parent
commit
c018938e9a
57 changed files with 306 additions and 414 deletions
  1. +8
    -10
      mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
  2. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
  3. +11
    -12
      mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
  4. +0
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
  5. +8
    -12
      mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
  6. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
  7. +13
    -17
      mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
  8. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
  9. +11
    -13
      mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc
  10. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h
  11. +36
    -40
      mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
  12. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
  13. +16
    -19
      mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
  14. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
  15. +38
    -42
      mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
  16. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h
  17. +19
    -23
      mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
  18. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
  19. +11
    -15
      mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
  20. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
  21. +13
    -16
      mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
  22. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
  23. +13
    -17
      mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
  24. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
  25. +11
    -14
      mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
  26. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
  27. +8
    -11
      mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
  28. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h
  29. +9
    -12
      mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
  30. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h
  31. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
  32. +0
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
  33. +11
    -15
      mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
  34. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h
  35. +15
    -17
      mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
  36. +0
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h
  37. +9
    -12
      mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
  38. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h
  39. +12
    -15
      mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
  40. +0
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
  41. +12
    -1
      mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
  42. +15
    -7
      mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
  43. +1
    -0
      mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h
  44. +0
    -17
      mindspore/lite/src/runtime/opencl/opencl_allocator.cc
  45. +0
    -1
      mindspore/lite/src/runtime/opencl/opencl_allocator.h
  46. +5
    -1
      mindspore/lite/src/runtime/opencl/opencl_executor.h
  47. +1
    -1
      mindspore/lite/src/scheduler.cc
  48. +0
    -15
      mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc
  49. +0
    -1
      mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc
  50. +0
    -1
      mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
  51. +0
    -1
      mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc
  52. +0
    -3
      mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc
  53. +0
    -2
      mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc
  54. +0
    -1
      mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
  55. +0
    -3
      mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc
  56. +0
    -1
      mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc
  57. +0
    -2
      mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc

+ 8
- 10
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc View File

@@ -55,8 +55,7 @@ int ActivationOpenClKernel::Init() {
c = in_tensors_[0]->shape()[3];
}
nhwc_shape_ = {n, h, w, c};
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
if (in_size_ != 2 && in_size_ != 4) {
MS_LOG(ERROR) << "Activate fun only support dim=4 or 2, but your dim=" << in_size_;
@@ -75,9 +74,9 @@ int ActivationOpenClKernel::Init() {

std::string source = activation_source;
std::set<std::string> build_options;
ocl_runtime->LoadSource(Program_Kernel[type_][0], source);
ocl_runtime_->LoadSource(Program_Kernel[type_][0], source);
std::string kernel_name = Program_Kernel[type_][1];
ocl_runtime->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options);
ocl_runtime_->BuildKernel(kernel_, Program_Kernel[type_][0], kernel_name, build_options);
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
@@ -89,17 +88,16 @@ int ActivationOpenClKernel::Init() {
int ActivationOpenClKernel::Run() {
MS_LOG(DEBUG) << op_parameter_->name_ << " begin running!";
cl_int4 img2d_shape = GetImg2dShape();
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, img2d_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, img2d_shape);
if (type_ == ActivationType_LEAKY_RELU) {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_);
}
std::vector<size_t> local = {};
std::vector<size_t> global = {static_cast<size_t>(img2d_shape.s[1]), static_cast<size_t>(img2d_shape.s[2])};
auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr);
auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run kernel:" << op_parameter_->name_ << " fail.";
return RET_ERROR;


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h View File

@@ -19,7 +19,6 @@

#include <vector>

#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/fp32/activation.h"



+ 11
- 12
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc View File

@@ -34,7 +34,7 @@ namespace mindspore::kernel {

ArithmeticOpenCLKernel::~ArithmeticOpenCLKernel() {
if (weight_ptr_ != nullptr) {
auto allocator = runtime_->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
allocator->Free(weight_ptr_);
weight_ptr_ = nullptr;
}
@@ -106,7 +106,7 @@ int ArithmeticOpenCLKernel::InitBuffer() {
const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
if (!arithmetic_parameter->broadcasting_) {
if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) {
auto allocator = runtime_->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size;
GetImageSize(0, &img_size);
int pack_weight_size = in_tensors_[1]->ElementsC4Num();
@@ -194,7 +194,6 @@ int ArithmeticOpenCLKernel::InitBuffer() {
}

int ArithmeticOpenCLKernel::Init() {
runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
std::string kernel_name;

const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_);
@@ -265,7 +264,7 @@ int ArithmeticOpenCLKernel::Init() {

lite::STATUS error_code = RET_OK;
#ifdef PROGRAM_WITH_IL
kernel_ = runtime_->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
if (out_mem_type_ == OpenCLMemType::IMG) {
kernel_name += "_IMG";
@@ -275,8 +274,8 @@ int ArithmeticOpenCLKernel::Init() {
std::string program_name = "Arithmetic";
std::set<std::string> build_options;
std::string source = arithmetic_source;
runtime_->LoadSource(program_name, source);
error_code = runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
if (error_code != RET_OK) {
return error_code;
@@ -302,10 +301,10 @@ int ArithmeticOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";

int arg_idx = 0;
runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (element_flag_) {
void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->data_c() : weight_ptr_;
runtime_->SetKernelArg(kernel_, arg_idx++, weight);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight);
} else {
float weight = 0.f;
if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
@@ -316,9 +315,9 @@ int ArithmeticOpenCLKernel::Run() {
MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type();
return RET_ERROR;
}
runtime_->SetKernelArg(kernel_, arg_idx++, weight);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight);
}
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());

int H = 0;
int W = 0;
@@ -336,8 +335,8 @@ int ArithmeticOpenCLKernel::Run() {
return RET_ERROR;
}
cl_int2 output_shape{W, H};
runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
return RET_OK;
}



+ 0
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h View File

@@ -19,7 +19,6 @@

#include <vector>
#include "src/runtime/kernel/arm/fp32/arithmetic.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"

namespace mindspore::kernel {
@@ -42,7 +41,6 @@ class ArithmeticOpenCLKernel : public OpenCLKernel {
int InitBuffer();

cl::Kernel kernel_;
lite::opencl::OpenCLRuntime *runtime_;
bool element_flag_{true};
void *weight_ptr_{nullptr};



+ 8
- 12
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc View File

@@ -17,7 +17,6 @@
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/arithmetic_self.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/arithmeticself.cl.inc"
@@ -51,8 +50,7 @@ int ArithmeticSelfOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *im
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@@ -136,9 +134,8 @@ int ArithmeticSelfOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = arithmeticself_source;
std::string program_name = "ArithmeticSelf";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);

return RET_OK;
}
@@ -162,7 +159,6 @@ void ArithmeticSelfGetWorkGroup(const std::vector<size_t> &global, std::vector<s
int ArithmeticSelfOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";

auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto output_shape = out_tensors_[0]->shape();
cl_int4 output_shape_ = {output_shape[0], output_shape[1], output_shape[2], UP_DIV(output_shape[3], C4NUM)};

@@ -170,17 +166,17 @@ int ArithmeticSelfOpenCLKernel::Run() {
uint32_t OW = output_shape[2];
uint32_t OC = UP_DIV(output_shape[3], C4NUM);

const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {OH, OW, OC};
ArithmeticSelfGetWorkGroup(global, &local, max_global[0]);

int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);

ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);

return RET_OK;
}


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h View File

@@ -21,7 +21,6 @@
#include <string>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/arithmetic_self_parameter.h"

namespace mindspore::kernel {


+ 13
- 17
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc View File

@@ -18,7 +18,6 @@
#include <set>
#include <string>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/batchnorm.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/batchnorm.cl.inc"
@@ -40,8 +39,7 @@ int BatchNormOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_siz
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@@ -72,9 +70,8 @@ int BatchNormOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = batchnorm_source;
std::string program_name = "Batch_normalization";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);

return RET_OK;
}
@@ -98,7 +95,6 @@ void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
int BatchNormOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto input0_shape = in_tensors_[0]->shape();
auto output_shape = out_tensors_[0]->shape();
cl_int4 input_shape_ = {input0_shape[0], input0_shape[1], input0_shape[2], UP_DIV(input0_shape[3], C4NUM)};
@@ -107,20 +103,20 @@ int BatchNormOpenCLKernel::Run() {
uint32_t OW = output_shape[2];
uint32_t OC = UP_DIV(output_shape[3], C4NUM);

const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {OH, OW, OC};
BatchNormGetWorkGroup(global, &local, max_global[0]);
int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->epsilon_);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);

return RET_OK;
}


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h View File

@@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/fp32/batchnorm.h"

namespace mindspore::kernel {


+ 11
- 13
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc View File

@@ -16,6 +16,7 @@
* limitations under the License.
*/

#include "src/runtime/kernel/opencl/kernel/biasadd.h"
#include <string>
#include <map>
#include <set>
@@ -23,7 +24,6 @@

#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "src/runtime/kernel/opencl/kernel/biasadd.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/cl/biasadd.cl.inc"

@@ -38,7 +38,7 @@ namespace mindspore::kernel {
void BiasAddOpenCLKernel::InitBuffer() {
int C = in_tensors_[1]->shape()[0];
int div_ci = UP_DIV(C, C4NUM);
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
@@ -57,8 +57,7 @@ int BiasAddOpenCLKernel::Init() {
for (int i = 0; i < in_size_; ++i) {
input_shape_.s[i + 4 - in_size_] = in_tensors_[0]->shape()[i];
}
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
if (in_size_ != 4 && in_size_ != 2) {
MS_LOG(ERROR) << "BiasAdd only support dim=4 or 2, but your dim=" << in_size_;
@@ -75,8 +74,8 @@ int BiasAddOpenCLKernel::Init() {
std::string source = biasadd_source;
std::string program_name = "BiasAdd";
std::string kernel_name = "BiasAdd";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);

in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@@ -89,18 +88,17 @@ int BiasAddOpenCLKernel::Init() {
int BiasAddOpenCLKernel::Run() {
cl_int4 global_size = GetGlobalshape();
MS_LOG(DEBUG) << op_parameter_->name_ << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
int arg_idx = 0;
std::map<schema::Format, int> data_type{
{schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}};
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, BiasAdd_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
std::vector<size_t> local = {1, 1};
std::vector<size_t> global = {static_cast<size_t>(global_size.s[1]), static_cast<size_t>(global_size.s[2])};
auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr);
auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error.";
return RET_ERROR;


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.h View File

@@ -23,7 +23,6 @@
#include "src/tensor.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "schema/model_generated.h"
#include "src/runtime/opencl/opencl_runtime.h"

namespace mindspore::kernel {



+ 36
- 40
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc View File

@@ -13,13 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "src/runtime/kernel/opencl/kernel/concat.h"
#include <cstring>
#include <string>
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/concat.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/concat.cl.inc"

@@ -40,8 +40,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@@ -52,8 +51,7 @@ int ConcatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
}

int ConcatOpenCLKernel::RunAxis0() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator_ = ocl_runtime->GetAllocator();
auto allocator_ = ocl_runtime_->GetAllocator();
std::vector<size_t> img_size;
auto dst_data = out_tensors_[0]->data_c();
auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
@@ -64,7 +62,7 @@ int ConcatOpenCLKernel::RunAxis0() {
auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1};
cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
ocl_runtime->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region);
ocl_runtime_->GetDefaultCommandQueue()->enqueueCopyImage(*input_image, *out_image, src_origin, dst_origin, region);
dst_origin[1] += region[1];
}
return RET_OK;
@@ -112,9 +110,8 @@ int ConcatOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = concat_source;
std::string program_name = "Concat";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);

return RET_OK;
}
@@ -155,7 +152,6 @@ int ConcatOpenCLKernel::Run() {
return RunAxis0();
}

auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto input1_shape = in_tensors_[0]->shape();
auto input2_shape = in_tensors_[1]->shape();
auto output_shape = out_tensors_[0]->shape();
@@ -168,7 +164,7 @@ int ConcatOpenCLKernel::Run() {
uint32_t OW = output_shape[2];
uint32_t OC = UP_DIV(output_shape[3], C4NUM);

const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {OH, OW, OC};
ConcatGetWorkGroup(global, &local, max_global[0]);
@@ -176,48 +172,48 @@ int ConcatOpenCLKernel::Run() {

int arg_cn = 0;
if (in_tensors_.size() == 2) {
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
} else if (in_tensors_.size() == 3) {
auto input3_shape = in_tensors_[2]->shape();
cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)};

ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
} else if (in_tensors_.size() == 4) {
auto input3_shape = in_tensors_[2]->shape();
auto input4_shape = in_tensors_[3]->shape();
cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)};
cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)};

ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape4_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape1_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape2_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape3_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape4_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
} else {
MS_LOG(ERROR) << " input sizes must 2 or 3 or 4";
return RET_ERROR;
}
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);

return RET_OK;
}


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h View File

@@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/arm/base/concat_base.h"

namespace mindspore::kernel {


+ 16
- 19
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc View File

@@ -14,12 +14,11 @@
* limitations under the License.
*/

#include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h"
#include <string>
#include <set>
#include "nnacl/fp32/common_func.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/conv2d_transpose.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/conv2d_transpose2x2.cl.inc"
#endif
@@ -41,16 +40,15 @@ int Conv2dTransposeOpenCLKernel::Init() {
return RET_ERROR;
}
std::string kernel_name = "conv2d_transpose2x2_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::string source = conv2d_transpose2x2_source;
std::set<std::string> build_options;
std::string program_name = "conv2d_transpose2x2";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
PadWeight();
in_ori_format_ = in_tensors_[0]->GetFormat();
@@ -71,7 +69,7 @@ void Conv2dTransposeOpenCLKernel::PadWeight() {
int kw = param->kernel_w_;
int div_ci = UP_DIV(ci, C4NUM);
int div_co = UP_DIV(co, C4NUM);
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
auto data_size = enable_fp16_ ? sizeof(int16_t) : sizeof(float);

// IHWO to OHWI4(I)4(O)(converter format is IHWO)
@@ -188,7 +186,6 @@ int Conv2dTransposeOpenCLKernel::Run() {
int ow = out_tensors_[0]->shape()[2];
int h = in_tensors_[0]->shape()[1];
int w = in_tensors_[0]->shape()[2];
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
// local size should less than MAX_GROUP_SIZE
std::vector<size_t> local = {16, 1, 16};
std::vector<size_t> global = {UP_ROUND((size_t)UP_ROUND(oh / 2, 2), local[0]),
@@ -200,16 +197,16 @@ int Conv2dTransposeOpenCLKernel::Run() {
cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1};
cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1};
int arg_cnt = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h View File

@@ -21,7 +21,6 @@

#include "src/lite_kernel.h"
#include "nnacl/conv_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"

namespace mindspore::kernel {


+ 38
- 42
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc View File

@@ -39,12 +39,11 @@ constexpr size_t CO_TILE = C4NUM;

int ConvolutionOpenCLKernel::Init() {
static int init_count = 0;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator = ocl_runtime->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
auto param = reinterpret_cast<ConvParameter *>(op_parameter_);
std::set<std::string> build_options;
init_count++;
use_fp16_ = ocl_runtime->GetFp16Enable();
use_fp16_ = ocl_runtime_->GetFp16Enable();

if (op_format_ != Format_NHWC4 && op_format_ != Format_NC4HW4) {
MS_LOG(ERROR) << "op_format_ " << op_format_ << " not support!";
@@ -76,21 +75,21 @@ int ConvolutionOpenCLKernel::Init() {
MS_LOG(DEBUG) << "use winograd";
std::string program_name;
program_name = "Winograd4x4To36" + std::to_string(init_count);
ocl_runtime->LoadSource(program_name, CodeGenWinograd4x4To36());
ocl_runtime->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options);
ocl_runtime_->LoadSource(program_name, CodeGenWinograd4x4To36());
ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options);

program_name = "WinogradConvolution" + std::to_string(init_count);
ocl_runtime->LoadSource(program_name, CodeGenWinogradConvolution());
ocl_runtime->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options);
ocl_runtime_->LoadSource(program_name, CodeGenWinogradConvolution());
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options);

program_name = "Winograd36To4x4" + std::to_string(init_count);
ocl_runtime->LoadSource(program_name, CodeGenWinograd36To4x4());
ocl_runtime->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options);
ocl_runtime_->LoadSource(program_name, CodeGenWinograd36To4x4());
ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options);
} else {
std::string program_name = "convolution" + std::to_string(init_count);
std::string source = op_format_ == Format_NHWC4 ? CodeGenConvolutionNHWC4() : CodeGenConvolutionNC4HW4();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_conv_, program_name, "Convolution", build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_conv_, program_name, "Convolution", build_options);
}

// allocate winograd memory
@@ -167,7 +166,7 @@ int ConvolutionOpenCLKernel::GenerateWinogradWeight() {
}

int ConvolutionOpenCLKernel::InitWeight() {
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();

// allocate memory
size_t packed_weight_size;
@@ -205,8 +204,7 @@ int ConvolutionOpenCLKernel::InitWeight() {
}

int ConvolutionOpenCLKernel::InitBias() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator = ocl_runtime->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();

// align bias from C to C4
auto bias_tensor = in_tensors_[2];
@@ -272,57 +270,56 @@ int ConvolutionOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_s

int ConvolutionOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();

int arg_cn = 0;
if (use_winograd_) {
arg_cn = 0;
cl_int4 _4x4to36_in_shape = {1, IH_, IW_, CI_SLICES_};
cl_int4 _4x4to36_out_shape = {1, 36, TILES_XY_, CI_SLICES_};
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape);
ocl_runtime->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_in_shape);
ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape);

arg_cn = 0;
cl_int4 conv_in_shape = {1, 36, TILES_XY_, CI_SLICES_};
cl_int4 conv_out_shape = {1, 36, TILES_XY_, CO_SLICES_};
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape);

arg_cn = 0;
cl_int4 _36to4x4_in_shape = {1, 16, TILES_XY_, CO_SLICES_};
cl_int4 _36to4x4_out_shape = {1, OH_, OW_, CO_SLICES_};
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape);
ocl_runtime->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape);
ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_out_shape);
} else {
arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF);
if (op_format_ == Format_NC4HW4) {
cl_int4 input_shape = {1, IH_, IW_, CI_SLICES_};
cl_int4 output_shape = {1, OH_, OW_, CO_SLICES_};
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, input_shape);
ocl_runtime->SetKernelArg(kernel_conv_, arg_cn++, output_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, input_shape);
ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, output_shape);
}
}

if (use_winograd_) {
ocl_runtime->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr);
ocl_runtime->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr);
ocl_runtime->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr);
ocl_runtime_->RunKernel(kernel_4x4to36_, {size_t(TILES_XY_), 6, size_t(CI_SLICES_)}, {8, 6, 4}, nullptr);
ocl_runtime_->RunKernel(kernel_conv_, {size_t(TILES_XY_ / 2), 36, size_t(CO_SLICES_ / 2)}, {8, 6, 2}, nullptr);
ocl_runtime_->RunKernel(kernel_36to4x4_, {size_t(TILES_XY_), 4, size_t(CO_SLICES_)}, {32, 4, 2}, nullptr);
} else {
std::vector<size_t> global, local;
SetGlobalLocalConv(&global, &local);
ocl_runtime->RunKernel(kernel_conv_, global, local, nullptr);
ocl_runtime_->RunKernel(kernel_conv_, global, local, nullptr);
}

return RET_OK;
@@ -819,10 +816,9 @@ std::string ConvolutionOpenCLKernel::CodeGenWinograd36To4x4() {
}

int ConvolutionOpenCLKernel::SetGlobalLocalConv(std::vector<size_t> *global, std::vector<size_t> *local) {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
constexpr size_t work_group_size[] = {4, 4, 1};
auto max_work_item_sizes = ocl_runtime->GetWorkItemSize();
size_t max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime->Device())());
auto max_work_item_sizes = ocl_runtime_->GetWorkItemSize();
size_t max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_conv_(), (*ocl_runtime_->Device())());
const size_t max_z_size = std::min<size_t>(16, max_work_item_sizes[2]);

size_t global_h = UP_DIV(OH_, work_group_size[0]) * work_group_size[0];


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h View File

@@ -22,7 +22,6 @@
#include "src/tensor.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "schema/model_generated.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/conv_parameter.h"

namespace mindspore::kernel {


+ 19
- 23
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc View File

@@ -21,7 +21,6 @@
#include <map>
#include <utility>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "nnacl/fp32/common_func.h"
#include "nnacl/op_base.h"
@@ -42,7 +41,6 @@ using mindspore::schema::PrimitiveType_DepthwiseConv2D;
namespace mindspore::kernel {

int DepthwiseConv2dOpenCLKernel::Init() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::string kernel_name = "DepthwiseConv2d";
auto in_format = op_format_;
in_ori_format_ = in_tensors_[0]->GetFormat();
@@ -69,13 +67,13 @@ int DepthwiseConv2dOpenCLKernel::Init() {
kernel_name += "_1x1";
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::string program_name = "DepthwiseConv2d";
std::set<std::string> build_options;
std::string source = depthwise_conv2d_source;
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
this->InitBuffer();
MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_);
@@ -84,9 +82,8 @@ int DepthwiseConv2dOpenCLKernel::Init() {

int DepthwiseConv2dOpenCLKernel::InitBuffer() {
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto allocator = ocl_runtime->GetAllocator();
bool is_fp16 = ocl_runtime->GetFp16Enable();
auto allocator = ocl_runtime_->GetAllocator();
bool is_fp16 = ocl_runtime_->GetFp16Enable();

// weight: o, h, w, i; o == group, i == 1
void *origin_weight = in_tensors_.at(kWeightIndex)->data_c();
@@ -162,7 +159,7 @@ int DepthwiseConv2dOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *i
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
if (lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) {
if (ocl_runtime_->GetFp16Enable()) {
img_dtype = CL_HALF_FLOAT;
}
img_size->clear();
@@ -189,7 +186,6 @@ int DepthwiseConv2dOpenCLKernel::GetLocalSize(size_t idx, const std::vector<size
int DepthwiseConv2dOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4};
@@ -207,19 +203,19 @@ int DepthwiseConv2dOpenCLKernel::Run() {
(cl_int)out_tensors_[0]->Batch()};

int arg_cnt = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dilation);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first);
ocl_runtime->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first);
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h View File

@@ -20,7 +20,6 @@
#include <vector>
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/conv_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"

namespace mindspore::kernel {



+ 11
- 15
mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc View File

@@ -19,7 +19,6 @@
#include <set>
#include <utility>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/gather.h"
#include "src/runtime/kernel/opencl/cl/gather.cl.inc"

@@ -49,9 +48,8 @@ int GatherOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = gather_source;
std::string program_name = "gather";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
// init indices_data_
auto indices_tensor = in_tensors_.at(1);
int indices_num = indices_tensor->ElementsNum();
@@ -104,8 +102,7 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@@ -117,7 +114,6 @@ int GatherOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)
int GatherOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<GatherParameter *>(this->op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();

if (InitBuffer() != RET_OK) {
return RET_ERROR;
@@ -134,14 +130,14 @@ int GatherOpenCLKernel::Run() {
std::vector<size_t> local = {1, 1, 1};
std::vector<size_t> global = {(size_t)out_tensors_[0]->Width(), (size_t)out_tensors_[0]->Height(), CO4};
int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, src_size);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, dst_size);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, indices_num);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_data_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, src_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dst_size);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, indices_num);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->axis_);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);

return RET_OK;
}


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h View File

@@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/gather_parameter.h"

namespace mindspore::kernel {


+ 13
- 16
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc View File

@@ -19,7 +19,6 @@
#include <map>
#include "nnacl/fp32/common_func.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/matmul.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/matmul.cl.inc"
@@ -35,7 +34,6 @@ namespace mindspore::kernel {
int MatMulOpenCLKernel::Init() {
std::string kernel_name = "MatMul";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto param = reinterpret_cast<MatMulParameter *>(op_parameter_);
transposeA = param->a_transpose_;
if (transposeA) {
@@ -43,7 +41,7 @@ int MatMulOpenCLKernel::Init() {
return RET_ERROR;
}
transposeB = param->b_transpose_;
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (in_tensors_[0]->shape().size() != out_tensors_[0]->shape().size() ||
(in_tensors_[0]->shape().size() != 2 && in_tensors_[0]->shape().size() != 4)) {
MS_LOG(ERROR) << "matmul only support input shape size=2 or 4.";
@@ -57,13 +55,13 @@ int MatMulOpenCLKernel::Init() {
std::map<int, std::string> dims2str = {{2, "_2d"}, {4, "_4d"}};
kernel_name += dims2str[dims];
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = matmul_source;
std::string program_name = "MatMul";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif

PadWeight();
@@ -79,7 +77,7 @@ int MatMulOpenCLKernel::ReSize() { return RET_OK; }

void MatMulOpenCLKernel::PadWeight() {
// ABMCI @ ABCICO = ABMCO
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
int ci = inShape[3];
int ci4 = UP_DIV(ci, C4NUM);
int co = outShape[3];
@@ -201,7 +199,6 @@ int MatMulOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size)

int MatMulOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
// local size should less than MAX_GROUP_SIZE
std::vector<size_t> local = {32, 4, 1};
std::vector<size_t> global = {UP_DIV(static_cast<size_t>(outShape[3]), C4NUM),
@@ -210,14 +207,14 @@ int MatMulOpenCLKernel::Run() {
int arg_count = 0;
cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_);
ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_count++, in_shape);
ocl_runtime->SetKernelArg(kernel_, arg_count++, out_shape);
ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h View File

@@ -21,7 +21,6 @@

#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/matmul_parameter.h"
#include "src/runtime/opencl/opencl_runtime.h"

namespace mindspore::kernel {



+ 13
- 17
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc View File

@@ -20,8 +20,6 @@
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/opencl/opencl_wrapper.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/image_format.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/avg_pool2d.cl.inc"
@@ -59,10 +57,9 @@ int PoolingOpenCLKernel::Init() {
MS_LOG(ERROR) << "Init `Pooling2d` kernel failed!";
return RET_INVALID_OP_NAME;
}
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
if (out_mem_type_ == OpenCLMemType::BUF) {
@@ -72,8 +69,8 @@ int PoolingOpenCLKernel::Init() {
kernel_name += "_IMG";
}
std::set<std::string> build_options;
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@@ -124,7 +121,6 @@ int PoolingOpenCLKernel::ReSize() { return RET_OK; }

int PoolingOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();

int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices};
@@ -135,21 +131,21 @@ int PoolingOpenCLKernel::Run() {
cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};

int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, stride);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding);

std::vector<size_t> local_size;
std::vector<size_t> global_size = InitGlobalSize();
int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())());
int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())());
local_size = GetCommonLocalSize(global_size, max_work_group_size);
global_size = GetCommonGlobalSize(local_size, global_size);

ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr);
ocl_runtime_->RunKernel(kernel_, global_size, local_size, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h View File

@@ -21,7 +21,6 @@

#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/fp32/pooling.h"
#include "src/runtime/opencl/opencl_runtime.h"

namespace mindspore::kernel {



+ 11
- 14
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc View File

@@ -24,7 +24,6 @@
#include "include/errorcode.h"
#include "nnacl/fp32/common_func.h"
#include "src/runtime/kernel/opencl/kernel/prelu.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/cl/prelu.cl.inc"

using mindspore::kernel::KERNEL_ARCH::kGPU;
@@ -36,7 +35,7 @@ using mindspore::schema::PrimitiveType_PReLU;
namespace mindspore::kernel {

void PReluOpenCLKernel::InitBuffer() {
auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
auto allocator = ocl_runtime_->GetAllocator();
int elem_num = in_tensors_[0]->shape().size() == 2 ? in_tensors_[0]->shape()[1] : in_tensors_[0]->shape()[3];
int elem_num_c4 = UP_DIV(elem_num, C4NUM);
size_t img_dtype = CL_FLOAT;
@@ -91,12 +90,11 @@ int PReluOpenCLKernel::Init() {
std::string source = prelu_source;
std::string program_name = "PRelu";
std::string kernel_name = "PRelu";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
fp_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
InitBuffer();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
in_ori_format_ = in_tensors_[0]->GetFormat();
in_tensors_[0]->SetFormat(op_format_);
out_ori_format_ = out_tensors_[0]->GetFormat();
@@ -107,18 +105,17 @@ int PReluOpenCLKernel::Init() {

int PReluOpenCLKernel::Run() {
MS_LOG(DEBUG) << op_parameter_->name_ << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::map<schema::Format, int> data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, reinterpret_cast<int>(in_tensors_[1]->shape()[0]));
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, PReluWeight_);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, reinterpret_cast<int>(in_tensors_[1]->shape()[0]));
std::vector<size_t> local = {1, 1};
std::vector<size_t> global = {static_cast<size_t>(global_shape_.s[1]), static_cast<size_t>(global_shape_.s[2])};
auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr);
auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error.";
return RET_ERROR;


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h View File

@@ -22,7 +22,6 @@
#include "src/tensor.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "schema/model_generated.h"
#include "src/runtime/opencl/opencl_runtime.h"

namespace mindspore::kernel {



+ 8
- 11
mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc View File

@@ -19,7 +19,6 @@
#include <map>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/reduce.h"
#include "src/runtime/kernel/opencl/cl/reduce.cl.inc"

@@ -59,8 +58,7 @@ int ReduceOpenCLKernel::Init() {
}
std::string kernel_name = reduce_type2str.at(reduce_param->mode_);
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();

if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) {
MS_LOG(ERROR) << "Reduce input channel " << in_tensors_[0]->shape().back() << " should equal output channel"
@@ -68,12 +66,12 @@ int ReduceOpenCLKernel::Init() {
return RET_ERROR;
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = reduce_source;
ocl_runtime->LoadSource(kernel_name, source);
ocl_runtime->BuildKernel(kernel_, kernel_name, kernel_name, build_options);
ocl_runtime_->LoadSource(kernel_name, source);
ocl_runtime_->BuildKernel(kernel_, kernel_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@@ -130,15 +128,14 @@ int ReduceOpenCLKernel::Run() {
int w = shapex[2];
int c = shapex[3];
int c4 = UP_DIV(c, C4NUM);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {};
std::vector<size_t> global = {static_cast<size_t>(c4)};
cl_int4 size = {h, w, c4, 1};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h View File

@@ -20,7 +20,6 @@
#include <vector>

#include "src/lite_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/reduce_parameter.h"



+ 9
- 12
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc View File

@@ -18,7 +18,6 @@
#include <string>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/reshape.h"
#include "src/runtime/kernel/opencl/cl/reshape.cl.inc"

@@ -34,8 +33,7 @@ namespace mindspore::kernel {
int ReshapeOpenCLKernel::Init() {
std::string kernel_name = "reshape";
kernel_name += "_" + std::string(EnumNameFormat(op_format_));
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) {
MS_LOG(ERROR) << "Reshape output size should in 2,4";
return RET_ERROR;
@@ -46,13 +44,13 @@ int ReshapeOpenCLKernel::Init() {
return RET_ERROR;
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = reshape_source;
std::string program_name = "reshape";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@@ -112,17 +110,16 @@ int ReshapeOpenCLKernel::Run() {
oh = out_tensors_[0]->shape()[1];
ow = out_tensors_[0]->shape()[2];
}
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {};
std::vector<size_t> global = {(size_t)oh, (size_t)ow, (size_t)c4};
cl_int4 size = {h, w, c4, 1};
cl_int4 size_out = {oh, ow, c4, 1};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, size_out);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size_out);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h View File

@@ -20,7 +20,6 @@
#include <vector>

#include "src/lite_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"

namespace mindspore::kernel {


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc View File

@@ -245,7 +245,6 @@ int ScaleOpenCLKernel::InitBuffer() {
}

int ScaleOpenCLKernel::Init() {
ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
std::string kernel_name;

const ScaleParameter *scale_param = reinterpret_cast<const ScaleParameter *>(op_parameter_);


+ 0
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h View File

@@ -19,7 +19,6 @@

#include <vector>
#include "nnacl/scale.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"

namespace mindspore::kernel {
@@ -42,7 +41,6 @@ class ScaleOpenCLKernel : public OpenCLKernel {
int InitBuffer();

cl::Kernel kernel_;
lite::opencl::OpenCLRuntime *ocl_runtime_;
bool element_flag_{true};
void *scale_ptr_{nullptr};
void *offset_ptr_{nullptr};


+ 11
- 15
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc View File

@@ -18,7 +18,6 @@
#include <algorithm>
#include <set>
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/slice.h"
#include "src/runtime/kernel/opencl/utils.h"
#include "src/runtime/kernel/opencl/cl/slice.cl.inc"
@@ -40,8 +39,7 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) {
im_dst_x = out_tensors_[0]->Width();
}
size_t img_dtype = CL_FLOAT;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto enable_fp16_ = ocl_runtime->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
}
@@ -71,9 +69,8 @@ int SliceOpenCLKernel::Init() {
std::set<std::string> build_options;
std::string source = slice_source;
std::string program_name = "slice";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
return RET_OK;
}

@@ -96,7 +93,6 @@ void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l
int SliceOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running! ";
auto param = reinterpret_cast<SliceParameter *>(this->op_parameter_);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto input_shape = in_tensors_[0]->shape();
cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)};
cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)};
@@ -105,18 +101,18 @@ int SliceOpenCLKernel::Run() {
uint32_t OH = param->size_[1];
uint32_t OW = param->size_[2];

const std::vector<size_t> &max_global = ocl_runtime->GetWorkItemSize();
const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
std::vector<size_t> local = {1, 1, 1}; // init local
std::vector<size_t> global = {1, OH, OW};
SlcieGetWorkGroup(global, &local, max_global[0]);
int arg_cn = 0;
ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, size_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, begin_);
ocl_runtime->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);

return RET_OK;
}


+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h View File

@@ -20,7 +20,6 @@
#include <vector>
#include "ir/anf.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "nnacl/fp32/slice.h"

namespace mindspore::kernel {


+ 15
- 17
mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc View File

@@ -19,7 +19,6 @@
#include <set>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/utils.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/softmax.cl.inc"
@@ -51,7 +50,7 @@ int SoftmaxOpenCLKernel::InitGlobalSize() {
int SoftmaxOpenCLKernel::SetWorkGroupSize() {
// set work group size
InitGlobalSize();
int max_work_group_size = runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*runtime_->Device())());
int max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())());
local_size_ = GetCommonLocalSize(global_size_, max_work_group_size);
global_size_ = GetCommonGlobalSize(local_size_, global_size_);
return lite::RET_OK;
@@ -101,8 +100,7 @@ int SoftmaxOpenCLKernel::Init() {
std::string program_name = "SoftMax";

std::string source = softmax_source;
runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = runtime_->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
// framework not set this param yet! just use default.
if (in_tensors_[0]->shape().size() == 4) {
// support 4d tensor
@@ -133,8 +131,8 @@ int SoftmaxOpenCLKernel::Init() {
program_name += "_IMG";
}
std::set<std::string> build_options;
runtime_->LoadSource(program_name, source);
runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
in_ori_format_ = in_tensors_[0]->GetFormat();
out_ori_format_ = out_tensors_[0]->GetFormat();
@@ -158,32 +156,32 @@ int SoftmaxOpenCLKernel::Run() {
auto mask_ = GetMaskForLastChannel(channel_size);
cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]};

runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (is_image_out_) {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
} else {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
}
runtime_->SetKernelArg(kernel_, arg_idx++, mask);
runtime_->SetKernelArg(kernel_, arg_idx++, slices);
runtime_->SetKernelArg(kernel_, arg_idx, slices_x32);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, slices);
ocl_runtime_->SetKernelArg(kernel_, arg_idx, slices_x32);
SetWorkGroupSize1x1();
} else {
int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices};

runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (is_image_out_) {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
} else {
runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
}
runtime_->SetKernelArg(kernel_, arg_idx, input_shape);
ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape);
SetWorkGroupSize();
}

// run opengl kernel
runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr);
return lite::RET_OK;
}



+ 0
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h View File

@@ -21,7 +21,6 @@

#include "src/runtime/kernel/opencl/opencl_kernel.h"
#include "nnacl/fp32/softmax.h"
#include "src/runtime/opencl/opencl_runtime.h"

namespace mindspore::kernel {

@@ -46,7 +45,6 @@ class SoftmaxOpenCLKernel : public OpenCLKernel {
private:
cl::Kernel kernel_;
SoftmaxParameter *parameter_;
lite::opencl::OpenCLRuntime *runtime_;

bool onexone_flag_{false};
std::vector<size_t> local_size_;


+ 9
- 12
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc View File

@@ -21,7 +21,6 @@
#include <utility>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/cl/to_format.cl.inc"

using mindspore::kernel::KERNEL_ARCH::kGPU;
@@ -33,7 +32,6 @@ using mindspore::schema::PrimitiveType_ToFormat;
namespace mindspore::kernel {

int ToFormatOpenCLKernel::Init() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
auto parameter = reinterpret_cast<OpenCLToFormatParameter *>(op_parameter_);
out_mem_type_ = parameter->out_mem_type;
std::string program_name = "to_format";
@@ -53,12 +51,12 @@ int ToFormatOpenCLKernel::Init() {

this->set_name(kernel_name);
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = to_format_source;
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
InitNHWCShape();
MS_LOG(DEBUG) << kernel_name << " Init Done!";
@@ -147,7 +145,7 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
return RET_ERROR;
}
img_size->clear();
auto enable_fp16_ = lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable();
auto enable_fp16_ = ocl_runtime_->GetFp16Enable();
size_t img_dtype = CL_FLOAT;
if (enable_fp16_) {
img_dtype = CL_HALF_FLOAT;
@@ -158,7 +156,6 @@ int ToFormatOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size
}
int ToFormatOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {};
std::vector<size_t> global;
GetGlobalSize(0, &global);
@@ -167,11 +164,11 @@ int ToFormatOpenCLKernel::Run() {
cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1};
auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG;
auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF;
ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type);
ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type);
ocl_runtime->SetKernelArg(kernel_, 2, gsize);
ocl_runtime->SetKernelArg(kernel_, 3, shape);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type);
ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type);
ocl_runtime_->SetKernelArg(kernel_, 2, gsize);
ocl_runtime_->SetKernelArg(kernel_, 3, shape);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h View File

@@ -20,7 +20,6 @@
#include <vector>

#include "src/lite_kernel.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"

namespace mindspore::kernel {


+ 12
- 15
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc View File

@@ -18,7 +18,6 @@
#include <string>
#include "include/errorcode.h"
#include "src/kernel_registry.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/kernel/transpose.h"
#ifndef PROGRAM_WITH_IL
#include "src/runtime/kernel/opencl/cl/transpose.cl.inc"
@@ -34,8 +33,7 @@ namespace mindspore::kernel {

int TransposeOpenCLKernel::Init() {
std::string kernel_name = "transpose";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
enable_fp16_ = ocl_runtime->GetFp16Enable();
enable_fp16_ = ocl_runtime_->GetFp16Enable();
auto param = reinterpret_cast<TransposeParameter *>(op_parameter_);
if (param->num_axes_ == 4 && param->perm_[0] == 0 && param->perm_[1] == 3 && param->perm_[2] == 1 &&
param->perm_[3] == 2) {
@@ -52,13 +50,13 @@ int TransposeOpenCLKernel::Init() {
kernel_name += "_IMG";
}
#ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name);
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else
std::set<std::string> build_options;
std::string source = transpose_source;
std::string program_name = "transpose";
ocl_runtime->LoadSource(program_name, source);
ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
#endif
if ((in_tensors_[0]->shape()[1] * in_tensors_[0]->shape()[2]) % 4 != 0) {
MS_LOG(ERROR) << "input H * W % 4 != 0 not support!";
@@ -114,24 +112,23 @@ int TransposeOpenCLKernel::Run() {
int c = shapex[3];
int c4 = UP_DIV(c, 4);
int hw4 = UP_DIV(h * w, 4);
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
std::vector<size_t> local = {16, 16};
std::vector<size_t> global = {UP_ROUND(hw4, local[0]), UP_ROUND(c4, local[1])};

cl_int2 HW = {h * w, hw4};
cl_int2 C = {c, c4};
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
if (out_mem_type_ == OpenCLMemType::BUF) {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
} else {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
}
ocl_runtime->SetKernelArg(kernel_, arg_idx++, HW);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, C);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, w);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, h);
ocl_runtime->RunKernel(kernel_, global, local, nullptr);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, HW);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, C);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, w);
ocl_runtime_->SetKernelArg(kernel_, arg_idx++, h);
ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
return RET_OK;
}



+ 0
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h View File

@@ -21,7 +21,6 @@

#include "src/lite_kernel.h"
#include "nnacl/transpose.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/opencl_kernel.h"

namespace mindspore::kernel {


+ 12
- 1
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h View File

@@ -20,6 +20,7 @@
#include <vector>
#include "src/lite_kernel.h"
#include "include/errorcode.h"
#include "src/runtime/opencl/opencl_runtime.h"

namespace mindspore::kernel {

@@ -36,7 +37,16 @@ class OpenCLKernel : public LiteKernel {
public:
explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {}
: LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {
ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
}

~OpenCLKernel() {
if (ocl_runtime_ != nullptr) {
lite::opencl::OpenCLRuntime::DeleteInstance();
ocl_runtime_ = nullptr;
}
}

virtual int Init() { return RET_ERROR; }
virtual int Prepare() { return RET_ERROR; }
@@ -59,6 +69,7 @@ class OpenCLKernel : public LiteKernel {
schema::Format in_ori_format_{schema::Format::Format_NHWC};
schema::Format out_ori_format_{schema::Format::Format_NHWC4};
schema::Format op_format_{schema::Format::Format_NHWC4};
lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr};
};
} // namespace mindspore::kernel



+ 15
- 7
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc View File

@@ -99,7 +99,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te

out_tensors->emplace_back(new_tensor);
KernelKey desc{kGPU, kNumberTypeFloat32, schema::PrimitiveType_ToFormat};
if (mem_type == OpenCLMemType::IMG && lite::opencl::OpenCLRuntime::GetInstance()->GetFp16Enable()) {
if (mem_type == OpenCLMemType::IMG && ocl_runtime_->GetFp16Enable()) {
desc.data_type = kNumberTypeFloat16;
new_tensor->set_data_type(kNumberTypeFloat16);
}
@@ -160,7 +160,8 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
}

int SubGraphOpenCLKernel::Init() {
allocator_ = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator();
ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance();
allocator_ = ocl_runtime_->GetAllocator();
MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size();
for (const auto tensor : in_tensors_) {
tensor->set_allocator(allocator_);
@@ -195,8 +196,7 @@ int SubGraphOpenCLKernel::Init() {
}

int SubGraphOpenCLKernel::UpdateTensorDataType() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
bool is_fp16 = ocl_runtime->GetFp16Enable();
bool is_fp16 = ocl_runtime_->GetFp16Enable();
if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) {
std::set<lite::Tensor *> out_set;
out_set.insert(in_tensors_.begin(), in_tensors_.end());
@@ -292,16 +292,25 @@ int SubGraphOpenCLKernel::UnInit() {
delete tensor;
}
}
in_convert_tensors_.clear();
for (const auto &tensor : out_convert_tensors_) {
if (tensor != nullptr) {
delete tensor;
}
}
for (const auto &op : in_convert_ops_) {
out_convert_tensors_.clear();
for (const auto &op : nodes_) {
if (op != nullptr) {
delete op;
}
}
nodes_.clear();
in_convert_ops_.clear();
out_convert_ops_.clear();
if (ocl_runtime_ != nullptr) {
lite::opencl::OpenCLRuntime::DeleteInstance();
ocl_runtime_ = nullptr;
}
return RET_OK;
}

@@ -310,14 +319,13 @@ int SubGraphOpenCLKernel::InferShape() { return RET_OK; }
int SubGraphOpenCLKernel::ReSize() { return RET_OK; }

int SubGraphOpenCLKernel::Run() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
for (auto &tensor : in_tensors_) {
allocator_->UnmapBuffer(tensor->data_c());
}

lite::opencl::OpenCLExecutor executor;
executor.Run(in_tensors_, out_tensors_, nodes_, allocator_);
ocl_runtime->SyncCommandQueue();
ocl_runtime_->SyncCommandQueue();

return RET_OK;
}


+ 1
- 0
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h View File

@@ -64,6 +64,7 @@ class SubGraphOpenCLKernel : public SubGraphKernel {
std::vector<OpenCLToFormatParameter *> out_parameters_;
std::vector<LiteKernel *> in_convert_ops_;
std::vector<LiteKernel *> out_convert_ops_;
lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr};
};
} // namespace mindspore::kernel



+ 0
- 17
mindspore/lite/src/runtime/opencl/opencl_allocator.cc View File

@@ -23,8 +23,6 @@

namespace mindspore::lite::opencl {

OpenCLAllocator::OpenCLAllocator() {}

OpenCLAllocator::OpenCLAllocator(OpenCLRuntime *ocl_runtime) : ocl_runtime_(ocl_runtime) {}

OpenCLAllocator::~OpenCLAllocator() { Clear(); }
@@ -49,9 +47,6 @@ void OpenCLAllocator::UnLock() {
void *OpenCLAllocator::Malloc(size_t size) { return Malloc(size, std::vector<size_t>{}); }

void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size) {
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();

size_t img_pitch = 0;
@@ -144,9 +139,6 @@ void *OpenCLAllocator::CreateImageFromHost(void *data, size_t size, const std::v
MS_LOG(ERROR) << "MallocData out of max_size, size: " << size;
return nullptr;
}
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
Lock();
auto iter = free_list_.lower_bound(size);
while (iter != free_list_.end() && (iter->second->size_ >= size) && (iter->second->size_ < (size << shift_factor_))) {
@@ -258,9 +250,6 @@ void *OpenCLAllocator::GetBuffer(void *buffer) {

void OpenCLAllocator::Clear() {
Lock();
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) {
if (svm_capabilities) {
@@ -306,9 +295,6 @@ void OpenCLAllocator::Clear() {
}

void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, bool sync) {
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
if (svm_capabilities) {
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {
@@ -362,9 +348,6 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
}

int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) {
if (ocl_runtime_ == nullptr) {
ocl_runtime_ = opencl::OpenCLRuntime::GetInstance();
}
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
if (svm_capabilities) {
if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) {


+ 0
- 1
mindspore/lite/src/runtime/opencl/opencl_allocator.h View File

@@ -45,7 +45,6 @@ enum class MemType : char { SVM, BUF, IMG };

class OpenCLAllocator : public Allocator {
public:
OpenCLAllocator();
explicit OpenCLAllocator(OpenCLRuntime *ocl_runtime);
~OpenCLAllocator() override;
void SetContext(const AllocatorContext &ctx) override;


+ 5
- 1
mindspore/lite/src/runtime/opencl/opencl_executor.h View File

@@ -27,7 +27,11 @@
namespace mindspore::lite::opencl {
class OpenCLExecutor : Executor {
public:
OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); }
OpenCLExecutor() : Executor() {
auto ocl_runtime = OpenCLRuntime::GetInstance();
allocator_ = ocl_runtime->GetAllocator();
OpenCLRuntime::DeleteInstance();
}

int Prepare(const std::vector<kernel::LiteKernel *> &kernels);



+ 1
- 1
mindspore/lite/src/scheduler.cc View File

@@ -244,7 +244,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens
TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors);
kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())};
#if SUPPORT_GPU
if (context_->device_type_ == DT_GPU && lite::opencl::OpenCLRuntime::GetInstance()->IsInitOK()) {
if (context_->device_type_ == DT_GPU) {
desc.arch = kernel::KERNEL_ARCH::kGPU;
auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc);
if (kernel != nullptr) {


+ 0
- 15
mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc View File

@@ -157,7 +157,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -167,7 +166,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -182,7 +180,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) {
printf_tensor<float>("ReluFp32--output data--", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -271,7 +268,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -281,7 +277,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -297,7 +292,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) {
printf_tensor<float>("Relu6:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -386,7 +380,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -396,7 +389,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -412,7 +404,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) {
printf_tensor<float>("Sigmoid:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -502,7 +493,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -512,7 +502,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -527,7 +516,6 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) {
printf_tensor<float>("Leaky Relu:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -616,7 +604,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) {
ret = sub_graph->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init sub_graph error.";
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -626,7 +613,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) {
MS_LOG(INFO) << "Run SubGraphOpenCLKernel.";
ret = sub_graph->Run();
if (ret != RET_OK) {
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;
@@ -642,7 +628,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) {
printf_tensor<float>("Tanh:FP32--output data---", outputs[0]);
CompareRes<float>(output_tensor, out_file);
}
delete kernel;
delete param;
delete input_tensor;
delete output_tensor;


+ 0
- 1
mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc View File

@@ -127,7 +127,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) {
delete tensor;
}
delete param;
delete arithmeticself_kernel;
delete sub_graph;
}
} // namespace mindspore

+ 0
- 1
mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc View File

@@ -203,7 +203,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
delete[] data_c_ocl;

delete kernel;
delete arith_kernel;
delete param;
for (auto tensor : inputs) {
delete tensor;


+ 0
- 1
mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc View File

@@ -147,7 +147,6 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) {
delete tensor;
}
delete param;
delete batchnorm_kernel;
delete sub_graph;
}
TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) {


+ 0
- 3
mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc View File

@@ -174,7 +174,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete weight_tensor;
delete sub_graph;
delete param;
delete biasadd_kernel;
return;
}
MS_LOG(INFO) << "Sub graph begin running!";
@@ -186,7 +185,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete weight_tensor;
delete sub_graph;
delete param;
delete biasadd_kernel;
return;
}

@@ -202,7 +200,6 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) {
delete output_tensor;
delete sub_graph;
delete param;
delete biasadd_kernel;
lite::opencl::OpenCLRuntime::DeleteInstance();
}
} // namespace mindspore

+ 0
- 2
mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc View File

@@ -164,7 +164,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) {
delete tensor;
}
delete param;
delete concat_kernel;
delete sub_graph;
}

@@ -284,7 +283,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) {
delete tensor;
}
delete param;
delete concat_kernel;
delete sub_graph;
}
} // namespace mindspore

+ 0
- 1
mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc View File

@@ -78,7 +78,6 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector<in
std::cout << "==================output data================" << std::endl;
auto *output_data = reinterpret_cast<T *>(outputs[0]->data_c());
CommonTest::CompareOutputData<T>(output_data, static_cast<T*>(correct_data), outputs[0]->ElementsNum(), 0.0001);
delete pkernel;
delete sub_graph;
}
TEST_F(TestGatherOpenCL, Axis1Fp32) {


+ 0
- 3
mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc View File

@@ -167,7 +167,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete output_tensor;
delete weight_tensor;
delete param;
delete prelu_kernel;
delete sub_graph;
return;
}
@@ -179,7 +178,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete output_tensor;
delete weight_tensor;
delete param;
delete prelu_kernel;
delete sub_graph;
return;
}
@@ -195,7 +193,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) {
delete output_tensor;
delete weight_tensor;
delete param;
delete prelu_kernel;
delete sub_graph;
lite::opencl::OpenCLRuntime::DeleteInstance();
}


+ 0
- 1
mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc View File

@@ -223,7 +223,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
delete[] data_out_ocl;

delete kernel;
delete scale_kernel;
delete param;
for (auto tensor : inputs) {
delete tensor;


+ 0
- 2
mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc View File

@@ -143,7 +143,6 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
for (auto tensor : outputs) {
delete tensor;
}
delete slice_kernel;
delete sub_graph;
}
TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
@@ -251,7 +250,6 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
for (auto tensor : outputs) {
delete tensor;
}
delete slice_kernel;
delete sub_graph;
}
} // namespace mindspore

Loading…
Cancel
Save