| @@ -318,6 +318,7 @@ size_t LiteTensor::Size() const { | |||||
| void *LiteTensor::MutableData() const { | void *LiteTensor::MutableData() const { | ||||
| MS_ASSERT(this->tensor_impl_ != nullptr); | MS_ASSERT(this->tensor_impl_ != nullptr); | ||||
| this->tensor_impl_->Prepare(); | |||||
| auto data = this->tensor_impl_->Data(); | auto data = this->tensor_impl_->Data(); | ||||
| if (nullptr == data) { | if (nullptr == data) { | ||||
| auto ret = tensor_impl_->MallocData(); | auto ret = tensor_impl_->MallocData(); | ||||
| @@ -177,6 +177,12 @@ class Tensor : public mindspore::tensor::MetaTensor { | |||||
| std::vector<tensor::QuantArg> GetQuantParams() const; | std::vector<tensor::QuantArg> GetQuantParams() const; | ||||
| void Prepare() { | |||||
| if (allocator_ != nullptr) { | |||||
| data_ = allocator_->Prepare(data_); | |||||
| } | |||||
| } | |||||
| protected: | protected: | ||||
| void *data_ = nullptr; | void *data_ = nullptr; | ||||
| void *device_data_ = nullptr; | void *device_data_ = nullptr; | ||||
| @@ -41,6 +41,7 @@ class Allocator { | |||||
| virtual size_t GetTotalSize() { return 0; } | virtual size_t GetTotalSize() { return 0; } | ||||
| virtual void Clear() {} | virtual void Clear() {} | ||||
| static std::shared_ptr<Allocator> Create(); | static std::shared_ptr<Allocator> Create(); | ||||
| virtual void *Prepare(void *ptr) { return ptr; } | |||||
| std::string name; | std::string name; | ||||
| }; | }; | ||||
| @@ -165,17 +165,6 @@ int SubGraphOpenCLKernel::Init() { | |||||
| MallocTensorWithReuse(); | MallocTensorWithReuse(); | ||||
| // Map buffer for write, it is not necessary for fine-grained | |||||
| for (auto &tensor : in_tensors_) { | |||||
| void *data = tensor->Data(); | |||||
| // It is required with coarse-grained SVM | |||||
| if (data != nullptr) { | |||||
| data = allocator_->MapBuffer(data, CL_MAP_WRITE, nullptr, true); | |||||
| tensor->SetData(data); | |||||
| } else { | |||||
| MS_LOG(ERROR) << "SubGraphOpenCLKernel input nullptr!"; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -254,26 +243,13 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::tensor:: | |||||
| } | } | ||||
| int SubGraphOpenCLKernel::UnInit() { | int SubGraphOpenCLKernel::UnInit() { | ||||
| for (const auto tensor : in_tensors_) { | |||||
| if (tensor != nullptr) { | |||||
| tensor->FreeData(); | |||||
| } | |||||
| } | |||||
| for (const auto tensor : out_tensors_) { | |||||
| if (tensor != nullptr) { | |||||
| allocator_->UnmapBuffer(tensor->Data()); | |||||
| tensor->FreeData(); | |||||
| } | |||||
| } | |||||
| for (const auto tensor : in_convert_tensors_) { | for (const auto tensor : in_convert_tensors_) { | ||||
| if (tensor != nullptr) { | if (tensor != nullptr) { | ||||
| tensor->FreeData(); | |||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| } | } | ||||
| for (const auto tensor : out_convert_tensors_) { | for (const auto tensor : out_convert_tensors_) { | ||||
| if (tensor != nullptr) { | if (tensor != nullptr) { | ||||
| tensor->FreeData(); | |||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| } | } | ||||
| @@ -202,13 +202,14 @@ void OpenCLAllocator::Free(void *buf) { | |||||
| allocated_list_.erase(iter); | allocated_list_.erase(iter); | ||||
| free_list_.insert(std::make_pair(mem_buf->size_, mem_buf)); | free_list_.insert(std::make_pair(mem_buf->size_, mem_buf)); | ||||
| UnLock(); | UnLock(); | ||||
| buf = nullptr; | |||||
| MS_LOG(DEBUG) << "Free a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ | MS_LOG(DEBUG) << "Free a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_ | ||||
| << ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_; | |||||
| << ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_ | |||||
| << ", free list size: " << free_list_.size(); | |||||
| return; | return; | ||||
| } | } | ||||
| UnLock(); | UnLock(); | ||||
| free(buf); | |||||
| MS_LOG(DEBUG) << "Free host ptr: " << buf; | |||||
| MS_LOG(WARNING) << "Host ptr " << buf << " has freed"; | |||||
| } | } | ||||
| size_t OpenCLAllocator::GetTotalSize() { | size_t OpenCLAllocator::GetTotalSize() { | ||||
| @@ -305,7 +306,8 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue, | |||||
| new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); | new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region); | ||||
| } | } | ||||
| if (new_host_ptr == nullptr) { | if (new_host_ptr == nullptr) { | ||||
| MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << mem_buf->device_ptr_ << ", host_ptr=" << host_ptr; | |||||
| MS_LOG(WARNING) << "Map buffer failed, can not found buffer or already mapped, dev_ptr=" << mem_buf->device_ptr_ | |||||
| << ", host_ptr=" << host_ptr; | |||||
| UnLock(); | UnLock(); | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -25,6 +25,7 @@ | |||||
| #include <unordered_map> | #include <unordered_map> | ||||
| #include <unordered_set> | #include <unordered_set> | ||||
| #include "src/runtime/allocator.h" | #include "src/runtime/allocator.h" | ||||
| #include "CL/cl2.hpp" | |||||
| namespace mindspore::lite::opencl { | namespace mindspore::lite::opencl { | ||||
| @@ -59,6 +60,12 @@ class OpenCLAllocator : public Allocator { | |||||
| int UnmapBuffer(void *host_ptr, void *command_queue = nullptr); | int UnmapBuffer(void *host_ptr, void *command_queue = nullptr); | ||||
| MEM_TYPE GetMemType(void *host_ptr); | MEM_TYPE GetMemType(void *host_ptr); | ||||
| int GetImageSize(void *host_ptr, std::vector<size_t> *img_size); | int GetImageSize(void *host_ptr, std::vector<size_t> *img_size); | ||||
| void *Prepare(void *ptr) override { | |||||
| if (ptr != nullptr) { | |||||
| ptr = MapBuffer(ptr, CL_MAP_WRITE, nullptr, true); | |||||
| } | |||||
| return ptr; | |||||
| } | |||||
| private: | private: | ||||
| void Lock(); | void Lock(); | ||||
| @@ -201,26 +201,10 @@ kernel::LiteKernel *Scheduler::CreateSubKernel(const std::vector<kernel::LiteKer | |||||
| kernel::LiteKernel *sub_kernel = nullptr; | kernel::LiteKernel *sub_kernel = nullptr; | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| if (arch == kernel::KERNEL_ARCH::kGPU) { | if (arch == kernel::KERNEL_ARCH::kGPU) { | ||||
| auto head_kernel = kernels.front(); | |||||
| auto tail_kernel = kernels.back(); | |||||
| std::vector<kernel::LiteKernel *> input_kernels{head_kernel}; | |||||
| std::vector<kernel::LiteKernel *> output_kernels{tail_kernel}; | |||||
| std::vector<tensor::Tensor *> input_tensors; | |||||
| std::vector<tensor::Tensor *> output_tensors; | |||||
| for (auto tensor : head_kernel->in_tensors()) { | |||||
| if (tensor->Data() == nullptr) { | |||||
| input_tensors.emplace_back(tensor); | |||||
| } | |||||
| } | |||||
| for (auto tensor : tail_kernel->out_tensors()) { | |||||
| if (tensor->Data() == nullptr) { | |||||
| output_tensors.emplace_back(tensor); | |||||
| } | |||||
| } | |||||
| // std::vector<tensor::Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); | |||||
| // std::vector<tensor::Tensor *> output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels); | |||||
| // std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels); | |||||
| // std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels); | |||||
| std::vector<tensor::Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); | |||||
| std::vector<tensor::Tensor *> output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels); | |||||
| std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels); | |||||
| std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels); | |||||
| sub_kernel = | sub_kernel = | ||||
| new kernel::SubGraphOpenCLKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels); | new kernel::SubGraphOpenCLKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels); | ||||
| sub_kernel->Init(); | sub_kernel->Init(); | ||||