Browse Source

add Prepare for tensor read/write

tags/v0.7.0-beta
wandongdong 5 years ago
parent
commit
db06da7cc1
7 changed files with 25 additions and 48 deletions
  1. +1
    -0
      mindspore/lite/src/ir/tensor.cc
  2. +6
    -0
      mindspore/lite/src/ir/tensor.h
  3. +1
    -0
      mindspore/lite/src/runtime/allocator.h
  4. +0
    -24
      mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
  5. +6
    -4
      mindspore/lite/src/runtime/opencl/opencl_allocator.cc
  6. +7
    -0
      mindspore/lite/src/runtime/opencl/opencl_allocator.h
  7. +4
    -20
      mindspore/lite/src/scheduler.cc

+ 1
- 0
mindspore/lite/src/ir/tensor.cc View File

@@ -318,6 +318,7 @@ size_t LiteTensor::Size() const {

void *LiteTensor::MutableData() const {
MS_ASSERT(this->tensor_impl_ != nullptr);
this->tensor_impl_->Prepare();
auto data = this->tensor_impl_->Data();
if (nullptr == data) {
auto ret = tensor_impl_->MallocData();


+ 6
- 0
mindspore/lite/src/ir/tensor.h View File

@@ -177,6 +177,12 @@ class Tensor : public mindspore::tensor::MetaTensor {

std::vector<tensor::QuantArg> GetQuantParams() const;

void Prepare() {
if (allocator_ != nullptr) {
data_ = allocator_->Prepare(data_);
}
}

protected:
void *data_ = nullptr;
void *device_data_ = nullptr;


+ 1
- 0
mindspore/lite/src/runtime/allocator.h View File

@@ -41,6 +41,7 @@ class Allocator {
virtual size_t GetTotalSize() { return 0; }
virtual void Clear() {}
static std::shared_ptr<Allocator> Create();
virtual void *Prepare(void *ptr) { return ptr; }
std::string name;
};



+ 0
- 24
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc View File

@@ -165,17 +165,6 @@ int SubGraphOpenCLKernel::Init() {

MallocTensorWithReuse();

// Map buffer for write, it is not necessary for fine-grained
for (auto &tensor : in_tensors_) {
void *data = tensor->Data();
// It is required with coarse-grained SVM
if (data != nullptr) {
data = allocator_->MapBuffer(data, CL_MAP_WRITE, nullptr, true);
tensor->SetData(data);
} else {
MS_LOG(ERROR) << "SubGraphOpenCLKernel input nullptr!";
}
}
return RET_OK;
}

@@ -254,26 +243,13 @@ int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::tensor::
}

int SubGraphOpenCLKernel::UnInit() {
for (const auto tensor : in_tensors_) {
if (tensor != nullptr) {
tensor->FreeData();
}
}
for (const auto tensor : out_tensors_) {
if (tensor != nullptr) {
allocator_->UnmapBuffer(tensor->Data());
tensor->FreeData();
}
}
for (const auto tensor : in_convert_tensors_) {
if (tensor != nullptr) {
tensor->FreeData();
delete tensor;
}
}
for (const auto tensor : out_convert_tensors_) {
if (tensor != nullptr) {
tensor->FreeData();
delete tensor;
}
}


+ 6
- 4
mindspore/lite/src/runtime/opencl/opencl_allocator.cc View File

@@ -202,13 +202,14 @@ void OpenCLAllocator::Free(void *buf) {
allocated_list_.erase(iter);
free_list_.insert(std::make_pair(mem_buf->size_, mem_buf));
UnLock();
buf = nullptr;
MS_LOG(DEBUG) << "Free a new Image2D. size: " << mem_buf->size_ << ", host addr: " << mem_buf->host_ptr_
<< ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_;
<< ", device addr: " << mem_buf->device_ptr_ << ", image addr: " << mem_buf->image_ptr_
<< ", free list size: " << free_list_.size();
return;
}
UnLock();
free(buf);
MS_LOG(DEBUG) << "Free host ptr: " << buf;
MS_LOG(WARNING) << "Host ptr " << buf << " has freed";
}

size_t OpenCLAllocator::GetTotalSize() {
@@ -305,7 +306,8 @@ void *OpenCLAllocator::MapBuffer(void *host_ptr, int flags, void *command_queue,
new_host_ptr = ocl_runtime->MapBuffer(*image, 0, CL_MAP_READ | CL_MAP_WRITE, region);
}
if (new_host_ptr == nullptr) {
MS_LOG(ERROR) << "Map buffer failed, can not found buffer :" << mem_buf->device_ptr_ << ", host_ptr=" << host_ptr;
MS_LOG(WARNING) << "Map buffer failed, can not found buffer or already mapped, dev_ptr=" << mem_buf->device_ptr_
<< ", host_ptr=" << host_ptr;
UnLock();
return nullptr;
}


+ 7
- 0
mindspore/lite/src/runtime/opencl/opencl_allocator.h View File

@@ -25,6 +25,7 @@
#include <unordered_map>
#include <unordered_set>
#include "src/runtime/allocator.h"
#include "CL/cl2.hpp"

namespace mindspore::lite::opencl {

@@ -59,6 +60,12 @@ class OpenCLAllocator : public Allocator {
int UnmapBuffer(void *host_ptr, void *command_queue = nullptr);
MEM_TYPE GetMemType(void *host_ptr);
int GetImageSize(void *host_ptr, std::vector<size_t> *img_size);
void *Prepare(void *ptr) override {
if (ptr != nullptr) {
ptr = MapBuffer(ptr, CL_MAP_WRITE, nullptr, true);
}
return ptr;
}

private:
void Lock();


+ 4
- 20
mindspore/lite/src/scheduler.cc View File

@@ -201,26 +201,10 @@ kernel::LiteKernel *Scheduler::CreateSubKernel(const std::vector<kernel::LiteKer
kernel::LiteKernel *sub_kernel = nullptr;
#if SUPPORT_GPU
if (arch == kernel::KERNEL_ARCH::kGPU) {
auto head_kernel = kernels.front();
auto tail_kernel = kernels.back();
std::vector<kernel::LiteKernel *> input_kernels{head_kernel};
std::vector<kernel::LiteKernel *> output_kernels{tail_kernel};
std::vector<tensor::Tensor *> input_tensors;
std::vector<tensor::Tensor *> output_tensors;
for (auto tensor : head_kernel->in_tensors()) {
if (tensor->Data() == nullptr) {
input_tensors.emplace_back(tensor);
}
}
for (auto tensor : tail_kernel->out_tensors()) {
if (tensor->Data() == nullptr) {
output_tensors.emplace_back(tensor);
}
}
// std::vector<tensor::Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels);
// std::vector<tensor::Tensor *> output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels);
// std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels);
// std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels);
std::vector<tensor::Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels);
std::vector<tensor::Tensor *> output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels);
std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels);
std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels);
sub_kernel =
new kernel::SubGraphOpenCLKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels);
sub_kernel->Init();


Loading…
Cancel
Save