diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index 633bc67bc6..7fb0a15a58 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -70,7 +70,7 @@ int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector *img_si img_size->clear(); std::vector vec{im_dst_x, im_dst_y, img_dtype}; *img_size = vec; - return 0; + return RET_OK; } int ArithmeticOpenCLKernel::Init() { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index f5f7ae1e0a..f14a00103c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -59,7 +59,7 @@ int Conv2dTransposeOpenCLKernel::Init() { return RET_OK; } -int Conv2dTransposeOpenCLKernel::ReSize() { return 0; } +int Conv2dTransposeOpenCLKernel::ReSize() { return RET_OK; } void Conv2dTransposeOpenCLKernel::PadWeight() { ConvParameter *param = reinterpret_cast(op_parameter_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index db2bbd638c..88673b9dfc 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -67,10 +67,10 @@ int MatMulOpenCLKernel::Init() { in_tensors_[0]->SetFormat(schema::Format_NC4); } MS_LOG(DEBUG) << kernel_name << " Init Done!"; - return 0; + return RET_OK; } -int MatMulOpenCLKernel::ReSize() { return 0; } +int MatMulOpenCLKernel::ReSize() { return RET_OK; } void MatMulOpenCLKernel::PadWeight() { auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); @@ -147,7 +147,7 @@ int MatMulOpenCLKernel::Run() { ocl_runtime->SetKernelArg(kernel_, arg_count++, sizeCO); ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); ocl_runtime->RunKernel(kernel_, global, local, nullptr); - return 0; + return RET_OK; } kernel::LiteKernel *OpenCLMatMulKernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index cc5055be1a..6fc2f4ce31 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -63,7 +63,7 @@ int ReshapeOpenCLKernel::Init() { return RET_OK; } -int ReshapeOpenCLKernel::ReSize() { return 0; } +int ReshapeOpenCLKernel::ReSize() { return RET_OK; } int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { size_t im_dst_x, im_dst_y; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index 328db29850..9c2df7bc7b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -64,7 +64,7 @@ int TransposeOpenCLKernel::Init() { return RET_OK; } -int TransposeOpenCLKernel::ReSize() { return 0; } +int TransposeOpenCLKernel::ReSize() { return RET_OK; } int TransposeOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { size_t im_dst_x, im_dst_y; @@ -100,7 +100,7 @@ int TransposeOpenCLKernel::Run() { ocl_runtime->SetKernelArg(kernel_, 2, HW); ocl_runtime->SetKernelArg(kernel_, 3, C); ocl_runtime->RunKernel(kernel_, global, local, nullptr); - return 0; + return RET_OK; } kernel::LiteKernel *OpenCLTransposeKernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 167841e0af..f7acc71fcb 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -19,6 +19,7 @@ #include #include "src/lite_kernel.h" +#include "include/errorcode.h" namespace mindspore::kernel { @@ -37,15 +38,15 @@ class OpenCLKernel : public LiteKernel { const std::vector &outputs) : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {} - virtual int Init() { return -1; } - virtual int Prepare() { return -1; } - virtual int InferShape() { return -1; } - virtual int ReSize() { return -1; } - virtual int Run() { return -1; } - virtual int GetImageSize(size_t idx, std::vector *img_size) { return -1; } - virtual int GetGlobalSize(size_t idx, std::vector *global_size) { return -1; } + virtual int Init() { return RET_ERROR; } + virtual int Prepare() { return RET_ERROR; } + virtual int InferShape() { return RET_ERROR; } + virtual int ReSize() { return RET_ERROR; } + virtual int Run() { return RET_ERROR; } + virtual int GetImageSize(size_t idx, std::vector *img_size) { return RET_ERROR; } + virtual int GetGlobalSize(size_t idx, std::vector *global_size) { return RET_ERROR; } virtual int GetLocalSize(size_t idx, const std::vector &global_size, std::vector *local_size) { - return -1; + return RET_ERROR; } OpenCLMemType GetMemType() { return out_mem_type_; } void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc index 445b7f7163..e39dcdf277 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.cc @@ -91,15 +91,15 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector &img_size) cl::Buffer *buffer = new (std::nothrow) cl::Buffer(*ocl_runtime->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret); if (buffer == nullptr || ret != CL_SUCCESS) { - MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")"; UnLock(); + MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")"; return nullptr; } device_ptr = static_cast(buffer); host_ptr = ocl_runtime->MapBuffer(*buffer, CL_MAP_READ | CL_MAP_WRITE, size); if (host_ptr == nullptr) { - MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr; UnLock(); + MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr; return nullptr; } cl::Memory *mem = buffer; @@ -199,12 +199,15 @@ void OpenCLAllocator::Free(void *buf) { Lock(); auto iter = allocated_list_.find(buf); if (iter != allocated_list_.end()) { + if (iter->second->map_flags) { + UnmapBuffer(buf); + iter->second->map_flags = false; + } auto mem_buf = iter->second; allocated_list_.erase(iter); free_list_.insert(std::make_pair(mem_buf->size_, mem_buf)); UnLock(); - buf = nullptr; - MS_LOG(DEBUG) << "Free a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ + MS_LOG(DEBUG) << "Free device buffer. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ << ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_ << ", free list size: " << free_list_.size(); return; @@ -291,10 +294,16 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, Lock(); auto it = allocated_list_.find(host_ptr); if (it == allocated_list_.end()) { - MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; UnLock(); + MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; return nullptr; } + + if (it->second->map_flags) { + UnLock(); + MS_LOG(WARNING) << "Host ptr " << host_ptr << " has mapped"; + return host_ptr; + } MemBuf *mem_buf = it->second; void *new_host_ptr{nullptr}; if (mem_buf->img_size.empty()) { @@ -307,11 +316,13 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); } if (new_host_ptr == nullptr) { + UnLock(); MS_LOG(WARNING) << "Map buffer failed, can not found buffer or already mapped, dev_ptr=" << mem_buf->device_ptr_ << ", host_ptr=" << host_ptr; - UnLock(); return nullptr; } + + mem_buf->map_flags = true; mem_buf->host_ptr_ = new_host_ptr; allocated_list_.erase(it); allocated_list_[new_host_ptr] = mem_buf; @@ -327,16 +338,22 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { return ocl_runtime->UnmapBuffer(host_ptr); } - return 0; + return RET_OK; } auto it = allocated_list_.find(host_ptr); if (it == allocated_list_.end()) { MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; - return 1; + return RET_ERROR; + } + if (it->second->map_flags) { + it->second->map_flags = false; + cl::Memory *mem = + static_cast(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_); + return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast(command_queue)); + } else { + MS_LOG(WARNING) << "Host ptr " << host_ptr << " do not mapped"; + return RET_OK; } - cl::Memory *mem = - static_cast(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_); - return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast(command_queue)); } MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) { @@ -344,8 +361,8 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) { Lock(); auto it = allocated_list_.find(host_ptr); if (it == allocated_list_.end()) { - MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; UnLock(); + MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; return mem_type; } MemBuf *mem_buf = it->second; @@ -362,8 +379,8 @@ int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector *img_size) Lock(); auto it = allocated_list_.find(host_ptr); if (it == allocated_list_.end()) { - MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; UnLock(); + MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; return RET_OK; } MemBuf *mem_buf = it->second; diff --git a/mindspore/lite/src/runtime/opencl/opencl_allocator.h b/mindspore/lite/src/runtime/opencl/opencl_allocator.h index f76d832616..87bf03cc5c 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_allocator.h +++ b/mindspore/lite/src/runtime/opencl/opencl_allocator.h @@ -76,6 +76,7 @@ class OpenCLAllocator : public Allocator { void *host_ptr_; void *image_ptr_; std::vector img_size; + bool map_flags{false}; }; std::mutex lock; diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/opencl/opencl_executor.cc index aa95bd0922..92a6e47e46 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.cc @@ -21,6 +21,9 @@ #include "include/errorcode.h" namespace mindspore::lite::opencl { + +int OpenCLExecutor::Prepare(const std::vector &kernels) { return RET_OK; } + int OpenCLExecutor::Run(std::vector &inputs, std::vector &outputs, std::vector &kernels, Allocator *allocator, const session::KernelCallBack &before, const session::KernelCallBack &after) { @@ -71,136 +74,4 @@ int OpenCLExecutor::Run(std::vector &inputs, std::vectorshape().size()); - auto data_type = tensor->data_type(); - switch (data_type) { - case kNumberTypeInt8: - return TransformTensorLayoutUint8(tensor, src_format, dst_format, trans_dir); - case kNumberTypeFloat32: - return TransformTensorLayoutFp32(tensor, src_format, dst_format, trans_dir); - default: - MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " - << schema::EnumNameFormat(dst_format); - return RET_ERROR; - } -} - -int OpenCLExecutor::TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format, - schema::Format dst_format, bool trans_dir) { - MS_ASSERT(nullptr != tensor); - MS_ASSERT(nullptr != allocator_); - MS_ASSERT(4 == tensor->shape().size()); - if (trans_dir) { - if (is_image2d_out_) { - return TransformTensorLayoutToImage(tensor, src_format, dst_format); - } else { - return TransformTensorLayoutToBuffer(tensor, src_format, dst_format); - } - } else { - if (is_image2d_out_) { - return TransformTensorLayoutFromImage(tensor, src_format, dst_format); - } else { - return TransformTensorLayoutToBuffer(tensor, src_format, dst_format); - } - } -} - -int OpenCLExecutor::TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format, - schema::Format dst_format) { - if (dst_format == schema::Format_NHWC4) { - auto *src_data = tensor->Data(); - size_t C4 = UP_DIV(tensor->Channel(), C4NUM); - std::vector img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT}; - if (src_format == schema::Format_NHWC) { - auto *dst_data = allocator_->Malloc(tensor->Size(), img_size); - if (dst_data == nullptr) { - MS_LOG(ERROR) << "Malloc data failed"; - return RET_ERROR; - } - dst_data = reinterpret_cast(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true)); - PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel()); - tensor->SetData(dst_data); - allocator_->Free(src_data); - allocator_->UnmapBuffer(dst_data); - } - tensor->SetFormat(dst_format); - return RET_OK; - } else if (dst_format == schema::Format_NHWC) { - return RET_OK; - } else { - MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " - << schema::EnumNameFormat(dst_format) << " in float32"; - return RET_ERROR; - } -} - -int OpenCLExecutor::TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format, - schema::Format dst_format) { - if (dst_format == schema::Format_NHWC4) { - tensor->SetFormat(schema::Format_NHWC4); - // convert to nhwc4 - auto *src_data = tensor->Data(); - auto *dst_data{src_data}; - if (src_format == schema::Format_NHWC) { - dst_data = allocator_->Malloc(tensor->Size()); - if (dst_data == nullptr) { - MS_LOG(ERROR) << "Malloc data failed"; - return RET_ERROR; - } - dst_data = reinterpret_cast(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true)); - PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel()); - tensor->SetData(dst_data); - allocator_->Free(src_data); - allocator_->UnmapBuffer(dst_data); - } - // copy to image2d - src_data = dst_data; - size_t C4 = UP_DIV(tensor->Channel(), C4NUM); - std::vector img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT}; - dst_data = allocator_->CreateImageFromHost(src_data, tensor->Size(), img_size); - tensor->SetData(dst_data); - allocator_->Free(src_data); - return RET_OK; - } else { - MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " - << schema::EnumNameFormat(dst_format) << " in float32"; - return RET_ERROR; - } -} - -int OpenCLExecutor::TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format, - schema::Format dst_format) { - if (dst_format == schema::Format_NHWC) { - auto src_data = tensor->Data(); - auto dst_data = allocator_->Malloc(tensor->Size()); - cl::Image2D *out_mem = reinterpret_cast(allocator_->GetImage(src_data)); - std::vector img_size; - allocator_->GetImageSize(src_data, &img_size); - auto origin = cl::array{0, 0, 0}; - auto region = cl::array{img_size[0], img_size[1], 1}; - auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); - ocl_runtime->GetDefaultCommandQueue()->enqueueReadImage(*out_mem, CL_TRUE, origin, region, 0, 0, dst_data); - tensor->SetData(dst_data); - allocator_->Free(src_data); - return RET_OK; - } else { - MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " - << schema::EnumNameFormat(dst_format) << " in float32"; - return RET_ERROR; - } -} - -int OpenCLExecutor::TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format, - schema::Format dst_format, bool is_image) { - MS_ASSERT(nullptr != tensor); - MS_ASSERT(4 == tensor->shape().size()); - // auto src_format = tensor->GetFormat(); - MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " - << schema::EnumNameFormat(dst_format) << " in uint8"; - return RET_ERROR; -} } // namespace mindspore::lite::opencl diff --git a/mindspore/lite/src/runtime/opencl/opencl_executor.h b/mindspore/lite/src/runtime/opencl/opencl_executor.h index d40a13574f..02058a8ab2 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_executor.h +++ b/mindspore/lite/src/runtime/opencl/opencl_executor.h @@ -27,38 +27,17 @@ namespace mindspore::lite::opencl { class OpenCLExecutor : Executor { public: - OpenCLExecutor() : Executor() { - allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); - } + OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); } - int Prepare(const std::vector &kernels) { return 0; } + int Prepare(const std::vector &kernels); int Run(std::vector &inputs, std::vector &outputs, std::vector &kernels, Allocator *allocator = nullptr, const session::KernelCallBack &before = nullptr, const session::KernelCallBack &after = nullptr); - protected: - int TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format, - bool trans_dir = false); - - int TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format, - bool trans_dir = false); - - int TransformTensorLayout(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format, - bool trans_dir = false); - - int TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format); - - int TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format); - - int TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format); - protected: Context *context = nullptr; OpenCLAllocator *allocator_; - bool is_image2d_out_{true}; }; - } // namespace mindspore::lite::opencl #endif - diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc index 96229e8ada..af6ed55904 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.cc @@ -20,6 +20,7 @@ #ifdef SHARING_MEM_WITH_OPENGL #include #endif +#include "include/errorcode.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/opencl/opencl_allocator.h" #ifdef PROGRAM_WITH_IL @@ -80,7 +81,7 @@ int OpenCLRuntime::Init() { std::unique_lock lck(g_init_mtx); if (init_done_) { - return 0; + return RET_OK; } MS_LOG(INFO) << "OpenCL version: CL_TARGET_OPENCL_VERSION " << CL_TARGET_OPENCL_VERSION; MS_LOG(INFO) << "CL_HPP_TARGET_OPENCL_VERSION " << CL_HPP_TARGET_OPENCL_VERSION; @@ -89,7 +90,7 @@ int OpenCLRuntime::Init() { #ifdef USE_OPENCL_WRAPPER if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) { MS_LOG(ERROR) << "Load OpenCL symbols failed!"; - return 1; + return RET_ERROR; } #endif // USE_OPENCL_WRAPPER @@ -97,7 +98,7 @@ int OpenCLRuntime::Init() { cl::Platform::get(&platforms); if (platforms.size() == 0) { MS_LOG(ERROR) << "OpenCL Platform not found!"; - return 1; + return RET_ERROR; } // search GPU @@ -119,7 +120,7 @@ int OpenCLRuntime::Init() { // not found, return error code. if (devices.size() == 0) { MS_LOG(ERROR) << "OpenCL Device not found!"; - return 1; + return RET_ERROR; } device_ = std::make_shared(); @@ -158,7 +159,7 @@ int OpenCLRuntime::Init() { #endif if (err != CL_SUCCESS) { MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err); - return 1; + return RET_ERROR; } // get cache size, compute units and frequency. @@ -206,7 +207,7 @@ int OpenCLRuntime::Init() { default_command_queue_ = std::make_shared(*context_, *device_, properties, &err); if (err != CL_SUCCESS) { MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err); - return 1; + return RET_ERROR; } allocator_ = std::make_shared(); @@ -217,7 +218,7 @@ int OpenCLRuntime::Init() { init_done_ = true; MS_LOG(INFO) << "OpenCLRuntime init done!"; - return 0; + return RET_OK; } OpenCLRuntime::~OpenCLRuntime() { @@ -314,12 +315,12 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na auto status = this->LoadProgram(program_name, &program); if (!status) { MS_LOG(ERROR) << "load program (" << program_name << ") failed!"; - return 1; + return RET_ERROR; } status = this->BuildProgram(build_options_str, &program); if (!status) { MS_LOG(ERROR) << program_name << " build failed!"; - return 1; + return RET_ERROR; } program_map_.emplace(build_program_key, program); } @@ -328,9 +329,9 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na kernel = cl::Kernel(program, kernel_name.c_str(), &err); if (err != CL_SUCCESS) { MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err); - return 1; + return RET_ERROR; } - return 0; + return RET_OK; } // Run Kernel with 1D, 2D, 3D group size, and local size can be empty. @@ -365,10 +366,10 @@ int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector if (error != CL_SUCCESS) { MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error); - return 1; + return RET_ERROR; } MS_LOG(DEBUG) << "RunKernel success!"; - return 0; + return RET_OK; } // Run Kernel with 1D, 2D, 3D group size, and local size can be empty. @@ -413,14 +414,14 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector } } else { MS_LOG(ERROR) << "Not supported NDRange!"; - return 1; + return RET_ERROR; } err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event); if (err != CL_SUCCESS) { MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err); - return 1; + return RET_ERROR; } MS_LOG(DEBUG) << "RunKernel success!"; #if MS_OPENCL_PROFILE @@ -432,7 +433,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector double nanoSeconds = time_end - time_start; MS_LOG(INFO) << "OpenCl Execution time is: " << nanoSeconds / 1000000.0 << "ms"; #endif - return 0; + return RET_OK; } // get gpu divce type @@ -534,7 +535,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size, int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const { if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { - return 0; + return RET_OK; } if (command_queue == nullptr) { command_queue = default_command_queue_.get(); @@ -563,7 +564,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const { if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { - return 0; + return RET_OK; } if (command_queue == nullptr) { command_queue = default_command_queue_.get(); @@ -578,7 +579,7 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) { cl_int ret = command_queue->finish(); if (ret != CL_SUCCESS) { MS_LOG(ERROR) << "Command queue sync failed: " << CLErrorCode(ret); - return 1; + return RET_ERROR; } return ret == CL_SUCCESS; } diff --git a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc index 084afc344a..b97f0259ed 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc +++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.cc @@ -41,11 +41,14 @@ static const std::vector g_opencl_library_paths = { "/system/lib64/libOpenCL.so", #else // Qualcomm Adreno - "/system/vendor/lib/libOpenCL.so", "/system/lib/libOpenCL.so", + "/system/vendor/lib/libOpenCL.so", + "/system/lib/libOpenCL.so", // Mali - "/system/vendor/lib/egl/libGLES_mali.so", "/system/lib/egl/libGLES_mali.so", + "/system/vendor/lib/egl/libGLES_mali.so", + "/system/lib/egl/libGLES_mali.so", // other - "/system/vendor/lib/libPVROCL.so", "/data/data/org.pocl.libs/files/lib/libpocl.so" + "/system/vendor/lib/libPVROCL.so", + "/data/data/org.pocl.libs/files/lib/libpocl.so" #endif "libOpenCL.so", "libGLES_mali.so", @@ -680,4 +683,3 @@ cl_int clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint index, const void *hos #endif #endif // USE_OPENCL_WRAPPER - diff --git a/mindspore/lite/src/runtime/opencl/opencl_wrapper.h b/mindspore/lite/src/runtime/opencl/opencl_wrapper.h index d4f0d98f9a..fa56d86b43 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_wrapper.h +++ b/mindspore/lite/src/runtime/opencl/opencl_wrapper.h @@ -237,4 +237,3 @@ class OpenCLWrapper { } // namespace mindspore::lite::opencl #endif // USE_OPENCL_WRAPPER #endif // MINDSPORE_LITE_SRC_OPENCL_WRAPPER_H_ - diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc index 4c068c8b82..a752594142 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc @@ -179,13 +179,13 @@ void TestCase(const std::vector &shape_a, const std::vector &shape_b) memcpy(data_c_ocl, outputs[0]->Data(), sizeof(float) * element_num); - // ocl_runtime->SyncCommandQueue(); LogData(data_a, 10, "Data A : "); LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : "); LogData(data_c_cpu, 10, "Expect compute : "); LogData(outputs[0]->Data(), 10, "OpenCL compute : "); bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num); MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); + EXPECT_EQ(true, cmp); // free delete[] data_a;