diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl index cdf8e06205..a89146d904 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl @@ -67,7 +67,7 @@ __kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t } } - if (bias) { + if (bias != 0) { out_h0_w0_c0 += bias[co_slice0]; } @@ -135,7 +135,7 @@ __kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t } } - if (bias) { + if (bias != 0) { out_h0_w0_c0 += bias[co_slice0]; out_h1_w0_c0 += bias[co_slice0]; } @@ -224,7 +224,7 @@ __kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t } } - if (bias) { + if (bias != 0) { out_h0_w0_c0 += bias[co_slice0]; out_h1_w0_c0 += bias[co_slice0]; out_h0_w0_c1 += bias[co_slice1]; @@ -357,7 +357,7 @@ __kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t } } - if (bias) { + if (bias != 0) { out_h0_w0_c0 += bias[co_slice0]; out_h0_w1_c0 += bias[co_slice0]; out_h1_w0_c0 += bias[co_slice0]; diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl index a968cec81e..f1aaf13a45 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/space_to_depth.cl @@ -10,6 +10,7 @@ __kernel void SpaceToDepth(__read_only image2d_t src_data, __write_only image2d_ int Y = get_global_id(1); // W int Z = get_global_id(2); // H * N if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return; + if (out_shape.y == 0 || ci_size == 0 || block_size == 0) return; int N = Z / out_shape.y; int H = Z % out_shape.y; int co_base = X * C4NUM; @@ -43,6 +44,7 @@ __kernel void SpaceToDepthAlign(__read_only image2d_t src_data, __write_only ima int Y = get_global_id(1); // W int Z = get_global_id(2); // H * N if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return; + if (out_shape.y == 0 || in_shape.w == 0 || block_size == 0) return; int N = Z / out_shape.y; int H = Z % out_shape.y; diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/winograd.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/winograd.cl index 8e2029532a..fad2a98e96 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/winograd.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/winograd.cl @@ -171,7 +171,7 @@ __kernel void Winograd36To4x4(__read_only image2d_t input, __write_only image2d_ acc += AtM_row[y] * At[idx]; } - if (bias) { + if (bias != 0) { acc += bias[slice]; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index 2a711722d1..3ebb7e90b2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -42,6 +42,8 @@ using mindspore::schema::PrimitiveType_Square; namespace mindspore::kernel { void ArithmeticSelfOpenCLKernel::GetKernelName(std::string *kernel_name, ArithmeticSelfParameter *param) { + MS_ASSERT(kernel_name); + MS_ASSERT(param); switch (param->op_parameter_.type_) { case PrimitiveType_Abs: kernel_name[0] += "_ElementAbs"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc index 9776508fc6..aa4388b0ea 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc @@ -51,6 +51,8 @@ int SpaceToBatchNDOpenCLKernel::CheckSpecs() { MS_LOG(ERROR) << "block_sizes_ must > 1, actual " << param->block_sizes_[0] << ", " << param->block_sizes_[1]; return RET_ERROR; } + MS_ASSERT(param->block_sizes_[0]); + MS_ASSERT(param->block_sizes_[1]); if (param->padded_in_shape_[kNHWC_H] % param->block_sizes_[0] || param->padded_in_shape_[kNHWC_W] % param->block_sizes_[1]) { MS_LOG(ERROR) << "padded shape must be multiple of block!"; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc index a39edd1b6d..482d0ea351 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc @@ -169,6 +169,7 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() { const int max_divider = 8; auto max_work_group_size = ocl_runtime_->DeviceMaxWorkGroupSize(); size_t local_c = GetMaxDivisorStrategy0(global[2], max_divider); + local_c = std::max(local_c, 1); size_t local_hw = max_work_group_size / local_c; size_t local_h = std::min(UP_DIV(global[0], 2), local_hw); size_t local_w = std::min(local_hw / local_h, global[1]); diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index beae7c39f9..c96935afba 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -36,6 +36,8 @@ struct OpenCLToFormatParameter { template void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num) { + MS_ASSERT(dst); + MS_ASSERT(src); auto *N = dst; auto *H = dst + 1; auto *W = dst + 2; @@ -54,13 +56,15 @@ void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num) { *H = src[1]; *W = src[2]; *C = src[3]; - } else if (src_num >= 5) { + } else if (src_num > 4) { MS_LOG(ERROR) << "GPU doesn't support ndim>=" << src_num; } } template void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num, DstT default_value) { + MS_ASSERT(dst); + MS_ASSERT(src); for (int i = 0; i < 4; ++i) { dst[i] = default_value; } @@ -101,6 +105,7 @@ struct GpuTensorInfo { size_t RowPitch() const { auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); int alignment = runtime_wrapper.GetInstance()->GetImagePitchAlignment(); + MS_ASSERT(alignment); size_t row_pitch = UP_ROUND(width, alignment) * FLT4_size; return row_pitch; } @@ -143,31 +148,31 @@ class OpenCLKernel : public LiteKernel { int AlignGlobalLocal(const std::vector &global, const std::vector &local) { std::vector internal_global_ws = global; for (size_t i = 0; i < local.size(); ++i) { - internal_global_ws[i] = UP_ROUND(global[i], local[i]); + internal_global_ws.at(i) = UP_ROUND(global.at(i), local.at(i)); } MS_LOG(DEBUG) << "global size: " << global.size() << ", local size: " << local.size(); for (size_t i = 0; i < global.size(); i++) { - MS_LOG(DEBUG) << "global[" << i << "] = " << global[i]; + MS_LOG(DEBUG) << "global[" << i << "] = " << global.at(i); } for (size_t i = 0; i < local.size(); i++) { - MS_LOG(DEBUG) << "local[" << i << "] = " << local[i]; + MS_LOG(DEBUG) << "local[" << i << "] = " << local.at(i); } if (global.size() == 1) { - global_range_ = cl::NDRange(internal_global_ws[0]); + global_range_ = cl::NDRange(internal_global_ws.at(0)); if (!local.empty()) { - local_range_ = cl::NDRange(local[0]); + local_range_ = cl::NDRange(local.at(0)); } } else if (global.size() == 2) { - global_range_ = cl::NDRange(internal_global_ws[0], internal_global_ws[1]); + global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1)); if (!local.empty()) { - local_range_ = cl::NDRange(local[0], local[1]); + local_range_ = cl::NDRange(local.at(0), local.at(1)); } } else if (global.size() == 3) { - global_range_ = cl::NDRange(internal_global_ws[0], internal_global_ws[1], internal_global_ws[2]); + global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1), internal_global_ws.at(2)); if (!local.empty()) { - local_range_ = cl::NDRange(local[0], local[1], local[2]); + local_range_ = cl::NDRange(local.at(0), local.at(1), local.at(2)); } } else { MS_LOG(ERROR) << "Not supported NDRange!"; @@ -191,6 +196,7 @@ class OpenCLKernel : public LiteKernel { return RET_ERROR; } int GetImageSize(size_t idx, std::vector *img_size) { + MS_ASSERT(img_size); if (idx >= out_tensors_.size()) { return RET_ERROR; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index 3fcb8aab6c..7e77bfd125 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -28,11 +28,12 @@ using mindspore::lite::opencl::MemType; SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); } -int SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull( +void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull( const std::vector &in_tensors, const std::vector> &in_kernels, MemType mem_type) { for (size_t i = 0; i < in_tensors.size(); ++i) { for (auto &jv : in_kernels.at(i)) { + MS_ASSERT(jv); auto tensors = (mem_type == MemType::IMG) ? jv->in_tensors() : jv->out_tensors(); auto ft = std::find_if(tensors.begin(), tensors.end(), [&in_tensors, &i](lite::Tensor *kv) { return kv == in_tensors.at(i); }); @@ -43,6 +44,7 @@ int SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull( std::replace_if( kernels.begin(), kernels.end(), [this, &in_tensors, &i](kernel::LiteKernel *kv) { + MS_ASSERT(kv); return std::find_if(kv->in_tensors().begin(), kv->in_tensors().end(), [&in_tensors, &i](lite::Tensor *xv) { return xv == in_tensors.at(i); }) != kv->in_tensors().end() && @@ -58,14 +60,16 @@ int SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToNull( } } } - return RET_OK; } -int SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, - const std::vector &in_kernels, - lite::Tensor *new_tensor, - kernel::LiteKernel *in_convert_op, MemType mem_type) { + +void SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, + const std::vector &in_kernels, + lite::Tensor *new_tensor, + kernel::LiteKernel *in_convert_op, MemType mem_type) { + MS_ASSERT(in_convert_op); auto in_opencl_op = reinterpret_cast(in_convert_op); for (auto &iv : in_kernels) { + MS_ASSERT(iv); auto kernels = (mem_type == MemType::IMG) ? iv->in_kernels() : iv->out_kernels(); auto fk = std::find_if(kernels.begin(), kernels.end(), [&](kernel::LiteKernel *kv) { return kv == nullptr; }); if (fk != kernels.end()) { @@ -90,13 +94,16 @@ int SubGraphOpenCLKernel::ReplaceOutTensorAndKernelToConvert(const lite::Tensor in_convert_op->AddInKernel(iv); } } - return RET_OK; } + int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_tensors, const std::vector> &in_kernels, std::vector *out_tensors, std::vector *out_parameters, std::vector *out_convert_ops, MemType mem_type) { + MS_ASSERT(out_tensors); + MS_ASSERT(out_parameters); + MS_ASSERT(out_convert_ops); out_tensors->clear(); out_parameters->clear(); out_convert_ops->clear(); @@ -167,6 +174,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te // replace in_tensor of inner kernel which use out tensor if (mem_type == MemType::BUF) { for (auto &iv : loop_kernels[i]) { + MS_ASSERT(iv); auto tensors = iv->in_tensors(); auto jv = std::find(tensors.begin(), tensors.end(), in_tensors.at(i)); if (jv != tensors.end()) { @@ -185,9 +193,11 @@ int SubGraphOpenCLKernel::Init() { allocator_ = ocl_runtime_->GetAllocator(); MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); for (const auto tensor : in_tensors_) { + MS_ASSERT(tensor); tensor->set_allocator(allocator_); } for (const auto tensor : out_tensors_) { + MS_ASSERT(tensor); tensor->set_allocator(allocator_); } @@ -223,72 +233,83 @@ int SubGraphOpenCLKernel::Init() { return RET_OK; } -int SubGraphOpenCLKernel::UpdateTensorDataType() { +void SubGraphOpenCLKernel::UpdateTensorDataType() { bool is_fp16 = ocl_runtime_->GetFp16Enable(); + MS_ASSERT(in_tensors_[0]); if (is_fp16 && (in_tensors_[0]->data_type() == kNumberTypeFloat32)) { std::set out_set; out_set.insert(in_tensors_.begin(), in_tensors_.end()); out_set.insert(out_tensors_.begin(), out_tensors_.end()); for (auto iv : nodes_) { + MS_ASSERT(iv); auto cur_outs = iv->out_tensors(); for (auto jv : cur_outs) { if (out_set.count(jv) == 0) { + MS_ASSERT(jv); jv->set_data_type(kNumberTypeFloat16); } } } } - return RET_OK; } int SubGraphOpenCLKernel::MallocTensorWithReuse() { + int ret; kernel::LiteKernelUtil::InitTensorRefCount(nodes_); for (auto *kernel : nodes_) { - MS_ASSERT(nullptr != kernel); + MS_ASSERT(kernel); auto *op_kernel = reinterpret_cast(kernel); auto outputs = kernel->out_tensors(); for (auto i = 0; i < outputs.size(); ++i) { auto *output = outputs.at(i); - MS_ASSERT(nullptr != output); + MS_ASSERT(output); if (op_kernel->GetMemType() == MemType::IMG) { std::vector img_size; - op_kernel->GetImageSize(i, &img_size); + ret = op_kernel->GetImageSize(i, &img_size); + if (ret != RET_OK) { + MS_LOG(WARNING) << "GetImageSize failed"; + } auto data_ptr = allocator_->Malloc(output->Size(), img_size); output->set_data(data_ptr); } else { - output->MallocData(allocator_); + ret = output->MallocData(allocator_); + if (ret != RET_OK) { + MS_LOG(WARNING) << "MallocData failed"; + } } output->set_allocator(allocator_); } for (auto input_kernel : kernel->in_kernels()) { - MS_ASSERT(nullptr != input_kernel); - auto ret = input_kernel->DecOutTensorRefCount(); - if (0 != ret) { + MS_ASSERT(input_kernel); + ret = input_kernel->DecOutTensorRefCount(); + if (ret != RET_OK) { MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed"; } } } for (auto kernel : out_kernels_) { - MS_ASSERT(nullptr != kernel); - auto ret = kernel->DecOutTensorRefCount(); - if (0 != ret) { + MS_ASSERT(kernel); + ret = kernel->DecOutTensorRefCount(); + if (ret != RET_OK) { MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed"; } } return RET_OK; } -int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector &in_tensors, - const std::vector &in_kernels, - std::vector> *out_kernels, - bool is_from) { +void SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector &in_tensors, + const std::vector &in_kernels, + std::vector> *out_kernels, + bool is_from) { std::vector> ksets; for (auto jv : in_kernels) { + MS_ASSERT(jv); auto tens = is_from ? jv->in_tensors() : jv->out_tensors(); std::set kset; kset.insert(tens.begin(), tens.end()); ksets.emplace_back(kset); } + MS_ASSERT(out_kernels); for (auto in_tensor : in_tensors) { std::vector kvec; for (size_t j = 0; j < in_kernels.size(); ++j) { @@ -298,13 +319,13 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vectoremplace_back(kvec); } - return RET_OK; } -int SubGraphOpenCLKernel::GetInOutNodes() { +void SubGraphOpenCLKernel::GetInOutNodes() { std::vector> ksets_in; std::vector> ksets_out; for (auto jv : nodes_) { + MS_ASSERT(jv); std::set kset; kset.insert(jv->in_tensors().begin(), jv->in_tensors().end()); ksets_in.emplace_back(kset); @@ -323,10 +344,15 @@ int SubGraphOpenCLKernel::GetInOutNodes() { out_nodes_.emplace_back(nodes_.at(j)); } } - return RET_OK; } int SubGraphOpenCLKernel::Prepare() { + executor_ = new (std::nothrow) lite::opencl::OpenCLExecutor(); + if (executor_ == nullptr) { + MS_LOG(ERROR) << "Create OpenCLExecutor fail"; + return RET_ERROR; + } + auto ret = Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "OpenCL subgraph init fail"; @@ -335,7 +361,7 @@ int SubGraphOpenCLKernel::Prepare() { return RET_OK; } -int SubGraphOpenCLKernel::UnInit() { +void SubGraphOpenCLKernel::UnInit() { for (const auto &tensor : in_convert_tensors_) { delete tensor; } @@ -351,7 +377,6 @@ int SubGraphOpenCLKernel::UnInit() { in_convert_ops_.clear(); out_convert_ops_.clear(); delete this->executor_; - return RET_OK; } int SubGraphOpenCLKernel::InferShape() { return RET_OK; } @@ -363,21 +388,28 @@ int SubGraphOpenCLKernel::Run() { MS_LOG(ERROR) << "executor is nullptr"; return RET_ERROR; } + int ret; for (auto &tensor : in_tensors_) { + MS_ASSERT(tensor); if (tensor->data_c() == nullptr) { MS_LOG(ERROR) << "OpenCL subgraph input tensor data is null"; return RET_ERROR; } allocator_->UnmapBuffer(tensor->data_c()); + ret = allocator_->UnmapBuffer(tensor->data_c()); + if (ret != RET_OK) { + return ret; + } } - auto ret = executor_->Run(in_tensors_, out_tensors_, nodes_, allocator_); - if (RET_OK != ret) { + ret = executor_->Run(in_tensors_, out_tensors_, nodes_, allocator_); + if (ret != RET_OK) { MS_LOG(ERROR) << "Run opencl executor failed: " << ret; return ret; } - ocl_runtime_->SyncCommandQueue(); - + if (!ocl_runtime_->SyncCommandQueue()) { + return RET_ERROR; + } return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h index 34de3605c5..630273204a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h @@ -41,7 +41,6 @@ class SubGraphOpenCLKernel : public SubGraphKernel { ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); subgraph_type_ = kGpuSubGraph; this->name_ = "GpuSubGraph"; - this->executor_ = new lite::opencl::OpenCLExecutor(); nodes_set_.insert(nodes.begin(), nodes.end()); } ~SubGraphOpenCLKernel() override; @@ -56,23 +55,23 @@ class SubGraphOpenCLKernel : public SubGraphKernel { int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }; private: - int UnInit(); - int UpdateTensorDataType(); + void UnInit(); + void UpdateTensorDataType(); int MallocTensorWithReuse(); - int ReplaceOutTensorAndKernelToNull(const std::vector &in_tensors, - const std::vector> &in_kernels, - lite::opencl::MemType mem_type); - int ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, - const std::vector &in_kernels, lite::Tensor *new_tensor, - kernel::LiteKernel *in_convert_op, lite::opencl::MemType mem_type); - int GetInOutNodes(); + void ReplaceOutTensorAndKernelToNull(const std::vector &in_tensors, + const std::vector> &in_kernels, + lite::opencl::MemType mem_type); + void ReplaceOutTensorAndKernelToConvert(const lite::Tensor *in_tensor, + const std::vector &in_kernels, lite::Tensor *new_tensor, + kernel::LiteKernel *in_convert_op, lite::opencl::MemType mem_type); + void GetInOutNodes(); int GenToFormatOp(const std::vector &in_tensors, const std::vector> &in_kernels, std::vector *out_tensors, std::vector *out_parameters, std::vector *out_convert_ops, lite::opencl::MemType mem_type); - int GetKernelFromToTensor(const std::vector &in_tensors, - const std::vector &in_kernels, - std::vector> *out_kernels, bool is_from); + void GetKernelFromToTensor(const std::vector &in_tensors, + const std::vector &in_kernels, + std::vector> *out_kernels, bool is_from); lite::opencl::OpenCLAllocator *allocator_{nullptr}; std::vector in_convert_tensors_; std::vector out_convert_tensors_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.cc b/mindspore/lite/src/runtime/kernel/opencl/utils.cc index fa3e8fc5c5..6904c800f4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/utils.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/utils.cc @@ -87,6 +87,7 @@ int GetMaxDivisorStrategy1(int x, int divisor) { } std::vector GetCommonGlobalSize(const std::vector &local, const std::vector &global) { + MS_ASSERT(local.size() == global.size() && local.size() == 3); std::vector result(3); for (int i = 0; i < 3; ++i) { result[i] = UP_ROUND(global[i], local[i]); @@ -95,6 +96,7 @@ std::vector GetCommonGlobalSize(const std::vector &local, const } std::vector GetCommonLocalSize(const std::vector &global, int max_size) { + MS_ASSERT(global.size() == 3); size_t local_z = GetMaxDivisorStrategy0(global[2], 8); if (local_z == 0) { MS_LOG(ERROR) << "Divide by zero"; @@ -239,6 +241,7 @@ std::string CLErrorCode(cl_int error_code) { } int WriteToBin(const std::string &file_path, void *data, size_t size) { + MS_ASSERT(data); std::ofstream out_file; out_file.open(file_path.c_str(), std::ios::binary); @@ -256,7 +259,7 @@ int WriteToBin(const std::string &file_path, void *data, size_t size) { } void PrintTensor(const lite::Tensor *tensor, MemType mem_type, int n, const std::string &out_file) { - if (tensor->data_c() == nullptr) { + if (tensor == nullptr || tensor->data_c() == nullptr) { return; } @@ -305,6 +308,9 @@ void PrintTensor(const lite::Tensor *tensor, MemType mem_type, int n, const std: } void PrintKernelOutput(OpenCLKernel *kernel, int n, const std::string &out_file) { + if (kernel == nullptr) { + return; + } printf("%-30s", kernel->name().c_str()); if (!kernel->out_tensors().empty()) { PrintTensor(kernel->out_tensors()[0], kernel->GetMemType(), n, out_file); diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.h b/mindspore/lite/src/runtime/kernel/opencl/utils.h index 228682358c..5e71f8bb1d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/utils.h +++ b/mindspore/lite/src/runtime/kernel/opencl/utils.h @@ -61,6 +61,8 @@ std::vector GetImage2dShapeFromNHWC(const std::vector &tensor_shape template void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane, int channel, const std::function &to_dtype) { + MS_ASSERT(src); + MS_ASSERT(dst); int c4 = UP_DIV(channel, C4NUM); for (int b = 0; b < batch; b++) { int src_offset = b * plane * channel; @@ -81,6 +83,8 @@ void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane, int channel, c template void PackNHWCToNHWC4(void *src, void *dst, int batch, int plane, int channel, const std::function &to_dtype) { + MS_ASSERT(src); + MS_ASSERT(dst); int c4 = UP_DIV(channel, C4NUM); int nhwc4_batch_unit_offset = c4 * C4NUM * plane; int ic_remainder_ = channel % C4NUM; @@ -106,6 +110,8 @@ void PackNHWCToNHWC4(void *src, void *dst, int batch, int plane, int channel, co template void PackNHWCToNC4HW4(void *src, void *dst, int batch, int plane, int channel, const std::function &to_dtype) { + MS_ASSERT(src); + MS_ASSERT(dst); int c4 = UP_DIV(channel, C4NUM); for (int b = 0; b < batch; b++) { int src_oc_offset = b * plane * channel; @@ -142,6 +148,11 @@ std::vector MatrixMultiply(const T A[], const T B[], int M, int N, int K) { template void ConvertConvWeight4DTo7D(void *src, void *dst, size_t CO, size_t KH, size_t KW, size_t CI, size_t OGroup = 1, const size_t CI_TILE = 4, const size_t CO_TILE = 4) { + MS_ASSERT(src); + MS_ASSERT(dst); + MS_ASSERT(CI_TILE); + MS_ASSERT(CO_TILE); + MS_ASSERT(OGroup); if (CO_TILE == 0 || CI_TILE == 0) return; auto origin_weight = reinterpret_cast(src); auto packed_weight = reinterpret_cast(dst); diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index 998399a1a9..ce98df79e1 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -66,6 +66,7 @@ void *OpenCLAllocator::MinimumFit(size_t size, const std::vector &img_si void *OpenCLAllocator::CreateBuffer(size_t size, void *data, size_t flags, cl::Buffer **buffer) { cl_int ret = CL_SUCCESS; + MS_ASSERT(buffer); *buffer = new (std::nothrow) cl::Buffer(*ocl_runtime_->Context(), flags, size, data, &ret); if (*buffer == nullptr) { MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")"; @@ -78,7 +79,11 @@ void *OpenCLAllocator::CreateBuffer(size_t size, void *data, size_t flags, cl::B return nullptr; } cl::Memory *mem = *buffer; - ocl_runtime_->UnmapBuffer(*mem, host_ptr); + MS_ASSERT(mem); + ret = ocl_runtime_->UnmapBuffer(*mem, host_ptr); + if (ret != RET_OK) { + MS_LOG(WARNING) << "UnmapBuffer failed."; + } return host_ptr; } @@ -110,7 +115,10 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const std::vector &img return nullptr; } cl::Memory *mem = *image; - ocl_runtime_->UnmapBuffer(*mem, host_ptr); + ret = ocl_runtime_->UnmapBuffer(*mem, host_ptr); + if (ret != CL_SUCCESS) { + MS_LOG(WARNING) << "UnmapBuffer failed."; + } } return host_ptr; } @@ -131,7 +139,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector &img_size, } Lock(); void *host_ptr = MinimumFit(size, img_size); - if ((host_ptr != nullptr) && (data == nullptr)) { + if (host_ptr != nullptr && data == nullptr) { UnLock(); return host_ptr; } @@ -188,7 +196,10 @@ void OpenCLAllocator::Free(void *buf) { auto iter = allocated_list_.find(buf); if (iter != allocated_list_.end()) { if (iter->second->map_flags) { - UnmapBuffer(buf); + int ret = UnmapBuffer(buf); + if (ret != RET_OK) { + MS_LOG(WARNING) << "UnmapBuffer failed."; + } iter->second->map_flags = false; } auto mem_buf = iter->second; @@ -240,7 +251,10 @@ void OpenCLAllocator::Clear() { auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) { if (it->second->map_flags) { - UnmapBuffer(it->second->host_ptr_); + int ret = UnmapBuffer(it->second->host_ptr_); + if (ret != RET_OK) { + MS_LOG(WARNING) << "UnmapBuffer failed."; + } } if (svm_capabilities) { clSVMFree((*ocl_runtime_->Context())(), it->second->host_ptr_); @@ -295,7 +309,11 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; return nullptr; } - ocl_runtime_->MapBuffer(host_ptr, flags, it->second->size_, static_cast(command_queue), sync); + int ret = ocl_runtime_->MapBuffer(host_ptr, flags, it->second->size_, + static_cast(command_queue), sync); + if (ret != RET_OK) { + MS_LOG(WARNING) << "MapBuffer failed."; + } } return host_ptr; } @@ -313,14 +331,16 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, return host_ptr; } MemBuf *mem_buf = it->second; - void *new_host_ptr{nullptr}; + MS_ASSERT(mem_buf); + void *new_host_ptr; if (mem_buf->img_size.empty()) { cl::Buffer *buffer = static_cast(mem_buf->device_ptr_); + MS_ASSERT(buffer); new_host_ptr = ocl_runtime_->MapBuffer(*buffer, flags, mem_buf->size_, nullptr, sync); } else { - cl::ImageFormat image_format(CL_RGBA, mem_buf->img_size[2]); std::vector region{mem_buf->img_size[0], mem_buf->img_size[1], 1}; cl::Image2D *image = static_cast(mem_buf->image_ptr_); + MS_ASSERT(image); new_host_ptr = ocl_runtime_->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); } if (new_host_ptr == nullptr) { @@ -373,6 +393,7 @@ MemType OpenCLAllocator::GetMemType(void *host_ptr) { return mem_type; } MemBuf *mem_buf = it->second; + MS_ASSERT(mem_buf); if (mem_buf->img_size.empty()) { mem_type = MemType::BUF; } else { @@ -383,6 +404,7 @@ MemType OpenCLAllocator::GetMemType(void *host_ptr) { } int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector *img_size) { + MS_ASSERT(img_size); Lock(); auto it = allocated_list_.find(host_ptr); if (it == allocated_list_.end()) { @@ -391,6 +413,7 @@ int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector *img_size) return RET_OK; } MemBuf *mem_buf = it->second; + MS_ASSERT(mem_buf); if (!mem_buf->img_size.empty()) { *img_size = mem_buf->img_size; } diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/opencl/opencl_allocator.h index b1ebafbeb1..f8a5f6e59a 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.h +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_OPENCL_ALLOCATOR_H_ -#define MINDSPORE_LITE_SRC_OPENCL_ALLOCATOR_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_OPENCL_ALLOCATOR_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_OPENCL_ALLOCATOR_H_ #include #include @@ -29,17 +29,6 @@ namespace mindspore::lite::opencl { -#define MS_HOST_BUFFER 0 -#define MS_CL_BUFFER (1 << 1) -#define MS_CL_IMAGE2D (1 << 2) -typedef int32_t OpenCLMemoryType; - -struct OpenclMemory { - void *host_ptr{nullptr}; - void *device_ptr{nullptr}; - OpenCLMemoryType mem_type{MS_HOST_BUFFER | MS_CL_BUFFER}; -}; - class OpenCLRuntime; enum class MemType : char { BUF, IMG }; @@ -95,4 +84,4 @@ class OpenCLAllocator : public Allocator { } // namespace mindspore::lite::opencl -#endif // MINDSPORE_LITE_SRC_OPENCL_ALLOCATOR_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_OPENCL_ALLOCATOR_H_ diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/opencl/opencl_executor.cc index 5110067d5f..67e5fbb979 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.cc @@ -21,14 +21,13 @@ namespace mindspore::lite::opencl { -int OpenCLExecutor::Prepare(const std::vector &kernels) { return RET_OK; } - int OpenCLExecutor::Run(std::vector &inputs, std::vector &outputs, std::vector &kernels, Allocator *allocator, const KernelCallBack &before, const KernelCallBack &after) { + int ret; kernel::LiteKernelUtil::InitTensorRefCount(kernels); for (auto *kernel : kernels) { - MS_ASSERT(nullptr != kernel); + MS_ASSERT(kernel); CallBackParam callbackParam; callbackParam.node_name = kernel->name(); @@ -41,19 +40,27 @@ int OpenCLExecutor::Run(std::vector &inputs, std::vector &ou auto cur_outputs = kernel->out_tensors(); for (auto i = 0; i < cur_outputs.size(); ++i) { auto *output = cur_outputs.at(i); - MS_ASSERT(nullptr != output); + MS_ASSERT(output); if (op_kernel->GetMemType() == lite::opencl::MemType::IMG) { std::vector img_size; - op_kernel->GetImageSize(i, &img_size); + ret = op_kernel->GetImageSize(i, &img_size); + if (ret != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed"; + return ret; + } auto data_ptr = allocator_->Malloc(output->Size(), img_size); output->set_data(data_ptr); } else { - output->MallocData(allocator_); + ret = output->MallocData(allocator_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "MallocData failed"; + return ret; + } } } - auto ret = kernel->Run(); - if (0 != ret) { + ret = kernel->Run(); + if (ret != RET_OK) { MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); return ret; } @@ -64,9 +71,9 @@ int OpenCLExecutor::Run(std::vector &inputs, std::vector &ou } } for (auto input_kernel : kernel->in_kernels()) { - MS_ASSERT(nullptr != input_kernel); + MS_ASSERT(input_kernel); ret = input_kernel->DecOutTensorRefCount(); - if (0 != ret) { + if (ret != RET_OK) { MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed"; } } diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.h b/mindspore/lite/src/runtime/opencl/opencl_executor.h index 58d4e06733..fcf692f19b 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.h +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_OPENCL_EXECUTOR_H_ -#define MINDSPORE_LITE_SRC_OPENCL_EXECUTOR_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_OPENCL_EXECUTOR_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_OPENCL_EXECUTOR_H_ #include #include "src/runtime/opencl/opencl_runtime.h" @@ -29,7 +29,7 @@ class OpenCLExecutor : public Executor { public: OpenCLExecutor() : Executor() { allocator_ = ocl_runtime.GetInstance()->GetAllocator(); } - int Prepare(const std::vector &kernels) override; + int Prepare(const std::vector &kernels) override { return RET_OK; } int Run(std::vector &inputs, std::vector &outputs, std::vector &kernels, Allocator *allocator = nullptr, const KernelCallBack &before = nullptr, diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc index 70137d416d..881005522c 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc @@ -100,8 +100,14 @@ int OpenCLRuntime::Init() { std::vector devices; for (auto it = platforms.begin(); it != platforms.end(); ++it) { std::string platform_name; - it->getInfo(CL_PLATFORM_NAME, &platform_name); - it->getDevices(CL_DEVICE_TYPE_GPU, &devices); + ret = it->getInfo(CL_PLATFORM_NAME, &platform_name); + if (ret != CL_SUCCESS) { + MS_LOG(WARNING) << CLErrorCode(ret); + } + ret = it->getDevices(CL_DEVICE_TYPE_GPU, &devices); + if (ret != CL_SUCCESS) { + MS_LOG(WARNING) << CLErrorCode(ret); + } MS_LOG(INFO) << "Platform (" << platform_name << ") has " << devices.size() << " GPUs"; if (devices.size() > 0) { @@ -178,9 +184,18 @@ int OpenCLRuntime::Init() { } // get cache size, compute units and frequency. - device_->getInfo(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, &global_memery_cachesize_); - device_->getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &compute_units_); - device_->getInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY, &max_freq_); + ret = device_->getInfo(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, &global_memery_cachesize_); + if (ret != CL_SUCCESS) { + MS_LOG(WARNING) << CLErrorCode(ret); + } + ret = device_->getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &compute_units_); + if (ret != CL_SUCCESS) { + MS_LOG(WARNING) << CLErrorCode(ret); + } + ret = device_->getInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY, &max_freq_); + if (ret != CL_SUCCESS) { + MS_LOG(WARNING) << CLErrorCode(ret); + } cl_device_fp_config fp_config; auto success = device_->getInfo(CL_DEVICE_HALF_FP_CONFIG, &fp_config); support_fp16_ = CL_SUCCESS == success && fp_config > 0; @@ -281,7 +296,9 @@ uint32_t OpenCLRuntime::DeviceMaxFreq() const { return max_freq_; } uint64_t OpenCLRuntime::GetMaxWorkGroupSize(const cl::Kernel &kernel) { uint64_t max_workgroup_size = 0; int ret = kernel.getWorkGroupInfo(*device_, CL_KERNEL_WORK_GROUP_SIZE, &max_workgroup_size); - if (ret != 0) max_workgroup_size = 0; + if (ret != CL_SUCCESS) { + max_workgroup_size = 0; + } return max_workgroup_size; } @@ -421,7 +438,10 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector static int cnt = 0; const int flush_period = 10; if (cnt % flush_period == 0) { - command_queue->flush(); + auto flush_ret = command_queue->flush(); + if (flush_ret != CL_SUCCESS) { + MS_LOG(WARNING) << "CL Flush failed:" << CLErrorCode(ret); + } } cnt++; MS_LOG(DEBUG) << "RunKernel success!"; @@ -454,7 +474,10 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const cl::NDRange &global static int cnt = 0; const int flush_period = 10; if (cnt % flush_period == 0) { - command_queue->flush(); + auto flush_ret = command_queue->flush(); + if (flush_ret != CL_SUCCESS) { + MS_LOG(WARNING) << "CL Flush failed:" << CLErrorCode(ret); + } } cnt++; MS_LOG(DEBUG) << "RunKernel success!";