| @@ -70,7 +70,7 @@ int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_si | |||
| img_size->clear(); | |||
| std::vector<size_t> vec{im_dst_x, im_dst_y, img_dtype}; | |||
| *img_size = vec; | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| int ArithmeticOpenCLKernel::Init() { | |||
| @@ -59,7 +59,7 @@ int Conv2dTransposeOpenCLKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int Conv2dTransposeOpenCLKernel::ReSize() { return 0; } | |||
| int Conv2dTransposeOpenCLKernel::ReSize() { return RET_OK; } | |||
| void Conv2dTransposeOpenCLKernel::PadWeight() { | |||
| ConvParameter *param = reinterpret_cast<ConvParameter *>(op_parameter_); | |||
| @@ -67,10 +67,10 @@ int MatMulOpenCLKernel::Init() { | |||
| in_tensors_[0]->SetFormat(schema::Format_NC4); | |||
| } | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| int MatMulOpenCLKernel::ReSize() { return 0; } | |||
| int MatMulOpenCLKernel::ReSize() { return RET_OK; } | |||
| void MatMulOpenCLKernel::PadWeight() { | |||
| auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||
| @@ -147,7 +147,7 @@ int MatMulOpenCLKernel::Run() { | |||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, sizeCO); | |||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); | |||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenCLMatMulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| @@ -63,7 +63,7 @@ int ReshapeOpenCLKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int ReshapeOpenCLKernel::ReSize() { return 0; } | |||
| int ReshapeOpenCLKernel::ReSize() { return RET_OK; } | |||
| int ReshapeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| @@ -64,7 +64,7 @@ int TransposeOpenCLKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int TransposeOpenCLKernel::ReSize() { return 0; } | |||
| int TransposeOpenCLKernel::ReSize() { return RET_OK; } | |||
| int TransposeOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_size) { | |||
| size_t im_dst_x, im_dst_y; | |||
| @@ -100,7 +100,7 @@ int TransposeOpenCLKernel::Run() { | |||
| ocl_runtime->SetKernelArg(kernel_, 2, HW); | |||
| ocl_runtime->SetKernelArg(kernel_, 3, C); | |||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenCLTransposeKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| @@ -19,6 +19,7 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "include/errorcode.h" | |||
| namespace mindspore::kernel { | |||
| @@ -37,15 +38,15 @@ class OpenCLKernel : public LiteKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs) | |||
| : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {} | |||
| virtual int Init() { return -1; } | |||
| virtual int Prepare() { return -1; } | |||
| virtual int InferShape() { return -1; } | |||
| virtual int ReSize() { return -1; } | |||
| virtual int Run() { return -1; } | |||
| virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return -1; } | |||
| virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return -1; } | |||
| virtual int Init() { return RET_ERROR; } | |||
| virtual int Prepare() { return RET_ERROR; } | |||
| virtual int InferShape() { return RET_ERROR; } | |||
| virtual int ReSize() { return RET_ERROR; } | |||
| virtual int Run() { return RET_ERROR; } | |||
| virtual int GetImageSize(size_t idx, std::vector<size_t> *img_size) { return RET_ERROR; } | |||
| virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; } | |||
| virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) { | |||
| return -1; | |||
| return RET_ERROR; | |||
| } | |||
| OpenCLMemType GetMemType() { return out_mem_type_; } | |||
| void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; } | |||
| @@ -91,15 +91,15 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size) | |||
| cl::Buffer *buffer = new (std::nothrow) | |||
| cl::Buffer(*ocl_runtime->Context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, size, NULL, &ret); | |||
| if (buffer == nullptr || ret != CL_SUCCESS) { | |||
| MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")"; | |||
| UnLock(); | |||
| MS_LOG(ERROR) << "Create OpenCL buffer failed! (ERROR CODE: " << ret << ")"; | |||
| return nullptr; | |||
| } | |||
| device_ptr = static_cast<void *>(buffer); | |||
| host_ptr = ocl_runtime->MapBuffer(*buffer, CL_MAP_READ | CL_MAP_WRITE, size); | |||
| if (host_ptr == nullptr) { | |||
| MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr; | |||
| UnLock(); | |||
| MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << device_ptr << ", host_ptr=" << host_ptr; | |||
| return nullptr; | |||
| } | |||
| cl::Memory *mem = buffer; | |||
| @@ -199,12 +199,15 @@ void OpenCLAllocator::Free(void *buf) { | |||
| Lock(); | |||
| auto iter = allocated_list_.find(buf); | |||
| if (iter != allocated_list_.end()) { | |||
| if (iter->second->map_flags) { | |||
| UnmapBuffer(buf); | |||
| iter->second->map_flags = false; | |||
| } | |||
| auto mem_buf = iter->second; | |||
| allocated_list_.erase(iter); | |||
| free_list_.insert(std::make_pair(mem_buf->size_, mem_buf)); | |||
| UnLock(); | |||
| buf = nullptr; | |||
| MS_LOG(DEBUG) << "Free a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ | |||
| MS_LOG(DEBUG) << "Free device buffer. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ | |||
| << ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_ | |||
| << ", free list size: " << free_list_.size(); | |||
| return; | |||
| @@ -291,10 +294,16 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, | |||
| Lock(); | |||
| auto it = allocated_list_.find(host_ptr); | |||
| if (it == allocated_list_.end()) { | |||
| MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; | |||
| UnLock(); | |||
| MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; | |||
| return nullptr; | |||
| } | |||
| if (it->second->map_flags) { | |||
| UnLock(); | |||
| MS_LOG(WARNING) << "Host ptr " << host_ptr << " has mapped"; | |||
| return host_ptr; | |||
| } | |||
| MemBuf *mem_buf = it->second; | |||
| void *new_host_ptr{nullptr}; | |||
| if (mem_buf->img_size.empty()) { | |||
| @@ -307,11 +316,13 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, | |||
| new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); | |||
| } | |||
| if (new_host_ptr == nullptr) { | |||
| UnLock(); | |||
| MS_LOG(WARNING) << "Map buffer failed, can not found buffer or already mapped, dev_ptr=" << mem_buf->device_ptr_ | |||
| << ", host_ptr=" << host_ptr; | |||
| UnLock(); | |||
| return nullptr; | |||
| } | |||
| mem_buf->map_flags = true; | |||
| mem_buf->host_ptr_ = new_host_ptr; | |||
| allocated_list_.erase(it); | |||
| allocated_list_[new_host_ptr] = mem_buf; | |||
| @@ -327,16 +338,22 @@ int OpenCLAllocator::UnmapBuffer(void *host_ptr, void *command_queue) { | |||
| if (!(svm_capabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER)) { | |||
| return ocl_runtime->UnmapBuffer(host_ptr); | |||
| } | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| auto it = allocated_list_.find(host_ptr); | |||
| if (it == allocated_list_.end()) { | |||
| MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << host_ptr; | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| if (it->second->map_flags) { | |||
| it->second->map_flags = false; | |||
| cl::Memory *mem = | |||
| static_cast<cl::Memory *>(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_); | |||
| return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue)); | |||
| } else { | |||
| MS_LOG(WARNING) << "Host ptr " << host_ptr << " do not mapped"; | |||
| return RET_OK; | |||
| } | |||
| cl::Memory *mem = | |||
| static_cast<cl::Memory *>(it->second->img_size.empty() ? it->second->device_ptr_ : it->second->image_ptr_); | |||
| return ocl_runtime->UnmapBuffer(*mem, it->second->host_ptr_, static_cast<cl::CommandQueue *>(command_queue)); | |||
| } | |||
| MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) { | |||
| @@ -344,8 +361,8 @@ MEM_TYPE OpenCLAllocator::GetMemType(void *host_ptr) { | |||
| Lock(); | |||
| auto it = allocated_list_.find(host_ptr); | |||
| if (it == allocated_list_.end()) { | |||
| MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; | |||
| UnLock(); | |||
| MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; | |||
| return mem_type; | |||
| } | |||
| MemBuf *mem_buf = it->second; | |||
| @@ -362,8 +379,8 @@ int OpenCLAllocator::GetImageSize(void *host_ptr, std::vector<size_t> *img_size) | |||
| Lock(); | |||
| auto it = allocated_list_.find(host_ptr); | |||
| if (it == allocated_list_.end()) { | |||
| MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; | |||
| UnLock(); | |||
| MS_LOG(ERROR) << "Can not found buffer :" << host_ptr; | |||
| return RET_OK; | |||
| } | |||
| MemBuf *mem_buf = it->second; | |||
| @@ -76,6 +76,7 @@ class OpenCLAllocator : public Allocator { | |||
| void *host_ptr_; | |||
| void *image_ptr_; | |||
| std::vector<size_t> img_size; | |||
| bool map_flags{false}; | |||
| }; | |||
| std::mutex lock; | |||
| @@ -21,6 +21,9 @@ | |||
| #include "include/errorcode.h" | |||
| namespace mindspore::lite::opencl { | |||
| int OpenCLExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return RET_OK; } | |||
| int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Tensor *> &outputs, | |||
| std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, | |||
| const session::KernelCallBack &before, const session::KernelCallBack &after) { | |||
| @@ -71,136 +74,4 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int OpenCLExecutor::TransformTensorLayout(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format, | |||
| bool trans_dir) { | |||
| MS_ASSERT(nullptr != tensor); | |||
| MS_ASSERT(4 == tensor->shape().size()); | |||
| auto data_type = tensor->data_type(); | |||
| switch (data_type) { | |||
| case kNumberTypeInt8: | |||
| return TransformTensorLayoutUint8(tensor, src_format, dst_format, trans_dir); | |||
| case kNumberTypeFloat32: | |||
| return TransformTensorLayoutFp32(tensor, src_format, dst_format, trans_dir); | |||
| default: | |||
| MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " | |||
| << schema::EnumNameFormat(dst_format); | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| int OpenCLExecutor::TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format, | |||
| schema::Format dst_format, bool trans_dir) { | |||
| MS_ASSERT(nullptr != tensor); | |||
| MS_ASSERT(nullptr != allocator_); | |||
| MS_ASSERT(4 == tensor->shape().size()); | |||
| if (trans_dir) { | |||
| if (is_image2d_out_) { | |||
| return TransformTensorLayoutToImage(tensor, src_format, dst_format); | |||
| } else { | |||
| return TransformTensorLayoutToBuffer(tensor, src_format, dst_format); | |||
| } | |||
| } else { | |||
| if (is_image2d_out_) { | |||
| return TransformTensorLayoutFromImage(tensor, src_format, dst_format); | |||
| } else { | |||
| return TransformTensorLayoutToBuffer(tensor, src_format, dst_format); | |||
| } | |||
| } | |||
| } | |||
| int OpenCLExecutor::TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format, | |||
| schema::Format dst_format) { | |||
| if (dst_format == schema::Format_NHWC4) { | |||
| auto *src_data = tensor->Data(); | |||
| size_t C4 = UP_DIV(tensor->Channel(), C4NUM); | |||
| std::vector<size_t> img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT}; | |||
| if (src_format == schema::Format_NHWC) { | |||
| auto *dst_data = allocator_->Malloc(tensor->Size(), img_size); | |||
| if (dst_data == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc data failed"; | |||
| return RET_ERROR; | |||
| } | |||
| dst_data = reinterpret_cast<FLOAT_t *>(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true)); | |||
| PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel()); | |||
| tensor->SetData(dst_data); | |||
| allocator_->Free(src_data); | |||
| allocator_->UnmapBuffer(dst_data); | |||
| } | |||
| tensor->SetFormat(dst_format); | |||
| return RET_OK; | |||
| } else if (dst_format == schema::Format_NHWC) { | |||
| return RET_OK; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " | |||
| << schema::EnumNameFormat(dst_format) << " in float32"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| int OpenCLExecutor::TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format, | |||
| schema::Format dst_format) { | |||
| if (dst_format == schema::Format_NHWC4) { | |||
| tensor->SetFormat(schema::Format_NHWC4); | |||
| // convert to nhwc4 | |||
| auto *src_data = tensor->Data(); | |||
| auto *dst_data{src_data}; | |||
| if (src_format == schema::Format_NHWC) { | |||
| dst_data = allocator_->Malloc(tensor->Size()); | |||
| if (dst_data == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc data failed"; | |||
| return RET_ERROR; | |||
| } | |||
| dst_data = reinterpret_cast<FLOAT_t *>(allocator_->MapBuffer(dst_data, CL_MAP_WRITE, nullptr, true)); | |||
| PackNHWCToNHWC4Fp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel()); | |||
| tensor->SetData(dst_data); | |||
| allocator_->Free(src_data); | |||
| allocator_->UnmapBuffer(dst_data); | |||
| } | |||
| // copy to image2d | |||
| src_data = dst_data; | |||
| size_t C4 = UP_DIV(tensor->Channel(), C4NUM); | |||
| std::vector<size_t> img_size{tensor->Width() * C4, (size_t)tensor->Height(), CL_FLOAT}; | |||
| dst_data = allocator_->CreateImageFromHost(src_data, tensor->Size(), img_size); | |||
| tensor->SetData(dst_data); | |||
| allocator_->Free(src_data); | |||
| return RET_OK; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " | |||
| << schema::EnumNameFormat(dst_format) << " in float32"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| int OpenCLExecutor::TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format, | |||
| schema::Format dst_format) { | |||
| if (dst_format == schema::Format_NHWC) { | |||
| auto src_data = tensor->Data(); | |||
| auto dst_data = allocator_->Malloc(tensor->Size()); | |||
| cl::Image2D *out_mem = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data)); | |||
| std::vector<size_t> img_size; | |||
| allocator_->GetImageSize(src_data, &img_size); | |||
| auto origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | |||
| auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1}; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||
| ocl_runtime->GetDefaultCommandQueue()->enqueueReadImage(*out_mem, CL_TRUE, origin, region, 0, 0, dst_data); | |||
| tensor->SetData(dst_data); | |||
| allocator_->Free(src_data); | |||
| return RET_OK; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " | |||
| << schema::EnumNameFormat(dst_format) << " in float32"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| int OpenCLExecutor::TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format, | |||
| schema::Format dst_format, bool is_image) { | |||
| MS_ASSERT(nullptr != tensor); | |||
| MS_ASSERT(4 == tensor->shape().size()); | |||
| // auto src_format = tensor->GetFormat(); | |||
| MS_LOG(ERROR) << "Unsupported layout transform: " << schema::EnumNameFormat(tensor->GetFormat()) << " to " | |||
| << schema::EnumNameFormat(dst_format) << " in uint8"; | |||
| return RET_ERROR; | |||
| } | |||
| } // namespace mindspore::lite::opencl | |||
| @@ -27,38 +27,17 @@ | |||
| namespace mindspore::lite::opencl { | |||
| class OpenCLExecutor : Executor { | |||
| public: | |||
| OpenCLExecutor() : Executor() { | |||
| allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); | |||
| } | |||
| OpenCLExecutor() : Executor() { allocator_ = OpenCLRuntime::GetInstance()->GetAllocator(); } | |||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels) { return 0; } | |||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels); | |||
| int Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Tensor *> &outputs, | |||
| std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr, | |||
| const session::KernelCallBack &before = nullptr, const session::KernelCallBack &after = nullptr); | |||
| protected: | |||
| int TransformTensorLayoutFp32(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format, | |||
| bool trans_dir = false); | |||
| int TransformTensorLayoutUint8(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format, | |||
| bool trans_dir = false); | |||
| int TransformTensorLayout(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format, | |||
| bool trans_dir = false); | |||
| int TransformTensorLayoutToBuffer(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format); | |||
| int TransformTensorLayoutToImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format); | |||
| int TransformTensorLayoutFromImage(tensor::Tensor *tensor, schema::Format src_format, schema::Format dst_format); | |||
| protected: | |||
| Context *context = nullptr; | |||
| OpenCLAllocator *allocator_; | |||
| bool is_image2d_out_{true}; | |||
| }; | |||
| } // namespace mindspore::lite::opencl | |||
| #endif | |||
| @@ -20,6 +20,7 @@ | |||
| #ifdef SHARING_MEM_WITH_OPENGL | |||
| #include <EGL/egl.h> | |||
| #endif | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/kernel/opencl/utils.h" | |||
| #include "src/runtime/opencl/opencl_allocator.h" | |||
| #ifdef PROGRAM_WITH_IL | |||
| @@ -80,7 +81,7 @@ int OpenCLRuntime::Init() { | |||
| std::unique_lock<std::mutex> lck(g_init_mtx); | |||
| if (init_done_) { | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| MS_LOG(INFO) << "OpenCL version: CL_TARGET_OPENCL_VERSION " << CL_TARGET_OPENCL_VERSION; | |||
| MS_LOG(INFO) << "CL_HPP_TARGET_OPENCL_VERSION " << CL_HPP_TARGET_OPENCL_VERSION; | |||
| @@ -89,7 +90,7 @@ int OpenCLRuntime::Init() { | |||
| #ifdef USE_OPENCL_WRAPPER | |||
| if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) { | |||
| MS_LOG(ERROR) << "Load OpenCL symbols failed!"; | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| #endif // USE_OPENCL_WRAPPER | |||
| @@ -97,7 +98,7 @@ int OpenCLRuntime::Init() { | |||
| cl::Platform::get(&platforms); | |||
| if (platforms.size() == 0) { | |||
| MS_LOG(ERROR) << "OpenCL Platform not found!"; | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| // search GPU | |||
| @@ -119,7 +120,7 @@ int OpenCLRuntime::Init() { | |||
| // not found, return error code. | |||
| if (devices.size() == 0) { | |||
| MS_LOG(ERROR) << "OpenCL Device not found!"; | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| device_ = std::make_shared<cl::Device>(); | |||
| @@ -158,7 +159,7 @@ int OpenCLRuntime::Init() { | |||
| #endif | |||
| if (err != CL_SUCCESS) { | |||
| MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(err); | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| // get cache size, compute units and frequency. | |||
| @@ -206,7 +207,7 @@ int OpenCLRuntime::Init() { | |||
| default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &err); | |||
| if (err != CL_SUCCESS) { | |||
| MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(err); | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| allocator_ = std::make_shared<OpenCLAllocator>(); | |||
| @@ -217,7 +218,7 @@ int OpenCLRuntime::Init() { | |||
| init_done_ = true; | |||
| MS_LOG(INFO) << "OpenCLRuntime init done!"; | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| OpenCLRuntime::~OpenCLRuntime() { | |||
| @@ -314,12 +315,12 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na | |||
| auto status = this->LoadProgram(program_name, &program); | |||
| if (!status) { | |||
| MS_LOG(ERROR) << "load program (" << program_name << ") failed!"; | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| status = this->BuildProgram(build_options_str, &program); | |||
| if (!status) { | |||
| MS_LOG(ERROR) << program_name << " build failed!"; | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| program_map_.emplace(build_program_key, program); | |||
| } | |||
| @@ -328,9 +329,9 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na | |||
| kernel = cl::Kernel(program, kernel_name.c_str(), &err); | |||
| if (err != CL_SUCCESS) { | |||
| MS_LOG(ERROR) << kernel_name << " Kernel create failed:" << CLErrorCode(err); | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| // Run Kernel with 1D, 2D, 3D group size, and local size can be empty. | |||
| @@ -365,10 +366,10 @@ int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t> | |||
| if (error != CL_SUCCESS) { | |||
| MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error); | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| MS_LOG(DEBUG) << "RunKernel success!"; | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| // Run Kernel with 1D, 2D, 3D group size, and local size can be empty. | |||
| @@ -413,14 +414,14 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Not supported NDRange!"; | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| err = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event); | |||
| if (err != CL_SUCCESS) { | |||
| MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err); | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| MS_LOG(DEBUG) << "RunKernel success!"; | |||
| #if MS_OPENCL_PROFILE | |||
| @@ -432,7 +433,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> | |||
| double nanoSeconds = time_end - time_start; | |||
| MS_LOG(INFO) << "OpenCl Execution time is: " << nanoSeconds / 1000000.0 << "ms"; | |||
| #endif | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| // get gpu divce type | |||
| @@ -534,7 +535,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer buffer, int flags, size_t size, | |||
| int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const { | |||
| if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| if (command_queue == nullptr) { | |||
| command_queue = default_command_queue_.get(); | |||
| @@ -563,7 +564,7 @@ int OpenCLRuntime::UnmapBuffer(const cl::Memory buffer, void *host_ptr, cl::Comm | |||
| int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) const { | |||
| if (svm_capabilities_ & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { | |||
| return 0; | |||
| return RET_OK; | |||
| } | |||
| if (command_queue == nullptr) { | |||
| command_queue = default_command_queue_.get(); | |||
| @@ -578,7 +579,7 @@ bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) { | |||
| cl_int ret = command_queue->finish(); | |||
| if (ret != CL_SUCCESS) { | |||
| MS_LOG(ERROR) << "Command queue sync failed: " << CLErrorCode(ret); | |||
| return 1; | |||
| return RET_ERROR; | |||
| } | |||
| return ret == CL_SUCCESS; | |||
| } | |||
| @@ -41,11 +41,14 @@ static const std::vector<std::string> g_opencl_library_paths = { | |||
| "/system/lib64/libOpenCL.so", | |||
| #else | |||
| // Qualcomm Adreno | |||
| "/system/vendor/lib/libOpenCL.so", "/system/lib/libOpenCL.so", | |||
| "/system/vendor/lib/libOpenCL.so", | |||
| "/system/lib/libOpenCL.so", | |||
| // Mali | |||
| "/system/vendor/lib/egl/libGLES_mali.so", "/system/lib/egl/libGLES_mali.so", | |||
| "/system/vendor/lib/egl/libGLES_mali.so", | |||
| "/system/lib/egl/libGLES_mali.so", | |||
| // other | |||
| "/system/vendor/lib/libPVROCL.so", "/data/data/org.pocl.libs/files/lib/libpocl.so" | |||
| "/system/vendor/lib/libPVROCL.so", | |||
| "/data/data/org.pocl.libs/files/lib/libpocl.so" | |||
| #endif | |||
| "libOpenCL.so", | |||
| "libGLES_mali.so", | |||
| @@ -680,4 +683,3 @@ cl_int clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint index, const void *hos | |||
| #endif | |||
| #endif // USE_OPENCL_WRAPPER | |||
| @@ -237,4 +237,3 @@ class OpenCLWrapper { | |||
| } // namespace mindspore::lite::opencl | |||
| #endif // USE_OPENCL_WRAPPER | |||
| #endif // MINDSPORE_LITE_SRC_OPENCL_WRAPPER_H_ | |||
| @@ -179,13 +179,13 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) | |||
| memcpy(data_c_ocl, outputs[0]->Data(), sizeof(float) * element_num); | |||
| // ocl_runtime->SyncCommandQueue(); | |||
| LogData(data_a, 10, "Data A : "); | |||
| LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : "); | |||
| LogData(data_c_cpu, 10, "Expect compute : "); | |||
| LogData(outputs[0]->Data(), 10, "OpenCL compute : "); | |||
| bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num); | |||
| MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); | |||
| EXPECT_EQ(true, cmp); | |||
| // free | |||
| delete[] data_a; | |||