Browse Source

minddata gpu mode add pin memory

tags/v1.1.0
xiefangqi 5 years ago
parent
commit
26acb9f801
7 changed files with 126 additions and 12 deletions
  1. +1
    -1
      mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h
  2. +24
    -10
      mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
  3. +1
    -1
      mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc
  4. +30
    -0
      mindspore/ccsrc/minddata/dataset/util/arena.cc
  5. +22
    -0
      mindspore/ccsrc/minddata/dataset/util/arena.h
  6. +36
    -0
      mindspore/ccsrc/minddata/dataset/util/circular_pool.cc
  7. +12
    -0
      mindspore/ccsrc/minddata/dataset/util/circular_pool.h

+ 1
- 1
mindspore/ccsrc/backend/kernel_compiler/gpu/data/dataset_init_kernel.h View File

@@ -49,7 +49,7 @@ class DatasetInitKernel : public GpuKernel {
std::vector<size_t> workspace_size_list_; std::vector<size_t> workspace_size_list_;


// The capacity of buffer Q. // The capacity of buffer Q.
size_t buffer_q_capacity_{1};
size_t buffer_q_capacity_{2};
}; };


MS_REG_GPU_KERNEL(InitDataSetQueue, DatasetInitKernel) MS_REG_GPU_KERNEL(InitDataSetQueue, DatasetInitKernel)


+ 24
- 10
mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt View File

@@ -42,16 +42,30 @@ if (ENABLE_CACHE)
storage_container.cc) storage_container.cc)


add_executable(cache_server cache_main.cc) add_executable(cache_server cache_main.cc)
target_link_libraries(cache_server
engine-cache-server
_c_dataengine
_c_mindrecord
mindspore::protobuf
mindspore::grpc++
mindspore_gvar
${PYTHON_LIBRARIES}
${SECUREC_LIBRARY}
pthread)
if (ENABLE_GPU)
target_link_libraries(cache_server
engine-cache-server
_c_dataengine
_c_mindrecord
mindspore::protobuf
mindspore::grpc++
mindspore_gvar
${CUDNN_LIBRARY_PATH}
${PYTHON_LIBRARIES}
${SECUREC_LIBRARY}
pthread)
else()
target_link_libraries(cache_server
engine-cache-server
_c_dataengine
_c_mindrecord
mindspore::protobuf
mindspore::grpc++
mindspore_gvar
${PYTHON_LIBRARIES}
${SECUREC_LIBRARY}
pthread)
endif()


if (USE_GLOG) if (USE_GLOG)
target_link_libraries(cache_server mindspore::glog) target_link_libraries(cache_server mindspore::glog)


+ 1
- 1
mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc View File

@@ -91,7 +91,7 @@ Status DeviceQueueOp::operator()() {
#endif #endif
} else if (device_type_ == DeviceType::GPU) { } else if (device_type_ == DeviceType::GPU) {
#ifdef ENABLE_GPUQUE #ifdef ENABLE_GPUQUE
RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool_));
RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool_, -1, 1024, false, true));
RETURN_IF_NOT_OK(SendDataToGPU()); RETURN_IF_NOT_OK(SendDataToGPU());
#endif #endif
} else if (device_type_ == DeviceType::CPU) { } else if (device_type_ == DeviceType::CPU) {


+ 30
- 0
mindspore/ccsrc/minddata/dataset/util/arena.cc View File

@@ -235,14 +235,43 @@ std::ostream &operator<<(std::ostream &os, const ArenaImpl &s) {
Status Arena::Init() { Status Arena::Init() {
try { try {
int64_t sz = size_in_MB_ * 1048576L; int64_t sz = size_in_MB_ * 1048576L;
#ifdef ENABLE_GPUQUE
if (is_cuda_malloc_) {
auto ret = cudaHostAlloc(&ptr_, sz, cudaHostAllocDefault);
if (ret != cudaSuccess) {
MS_LOG(ERROR) << "cudaHostAlloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
return Status(StatusCode::kOutOfMemory);
}
impl_ = std::make_unique<ArenaImpl>(ptr_, sz);
} else {
RETURN_IF_NOT_OK(DeMalloc(sz, &ptr_, false));
impl_ = std::make_unique<ArenaImpl>(ptr_, sz);
}
#else
RETURN_IF_NOT_OK(DeMalloc(sz, &ptr_, false)); RETURN_IF_NOT_OK(DeMalloc(sz, &ptr_, false));
impl_ = std::make_unique<ArenaImpl>(ptr_, sz); impl_ = std::make_unique<ArenaImpl>(ptr_, sz);
#endif
} catch (std::bad_alloc &e) { } catch (std::bad_alloc &e) {
return Status(StatusCode::kOutOfMemory); return Status(StatusCode::kOutOfMemory);
} }
return Status::OK(); return Status::OK();
} }


#ifdef ENABLE_GPUQUE
Arena::Arena(size_t val_in_MB, bool is_cuda_malloc)
: ptr_(nullptr), size_in_MB_(val_in_MB), is_cuda_malloc_(is_cuda_malloc) {}

Status Arena::CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB, bool is_cuda_malloc) {
RETURN_UNEXPECTED_IF_NULL(p_ba);
auto ba = new (std::nothrow) Arena(val_in_MB, is_cuda_malloc);
if (ba == nullptr) {
return Status(StatusCode::kOutOfMemory);
}
(*p_ba).reset(ba);
RETURN_IF_NOT_OK(ba->Init());
return Status::OK();
}
#else
Arena::Arena(size_t val_in_MB) : ptr_(nullptr), size_in_MB_(val_in_MB) {} Arena::Arena(size_t val_in_MB) : ptr_(nullptr), size_in_MB_(val_in_MB) {}


Status Arena::CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB) { Status Arena::CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB) {
@@ -255,5 +284,6 @@ Status Arena::CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB) {
RETURN_IF_NOT_OK(ba->Init()); RETURN_IF_NOT_OK(ba->Init());
return Status::OK(); return Status::OK();
} }
#endif
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

+ 22
- 0
mindspore/ccsrc/minddata/dataset/util/arena.h View File

@@ -22,6 +22,9 @@
#include "minddata/dataset/util/allocator.h" #include "minddata/dataset/util/allocator.h"
#include "minddata/dataset/util/memory_pool.h" #include "minddata/dataset/util/memory_pool.h"
#include "minddata/dataset/util/treap.h" #include "minddata/dataset/util/treap.h"
#ifdef ENABLE_GPUQUE
#include <cuda_runtime_api.h>
#endif


#define ARENA_LOG_BLK_SZ (6u) #define ARENA_LOG_BLK_SZ (6u)
#define ARENA_BLK_SZ (static_cast<uint16_t>(1u << ARENA_LOG_BLK_SZ)) #define ARENA_BLK_SZ (static_cast<uint16_t>(1u << ARENA_LOG_BLK_SZ))
@@ -105,10 +108,18 @@ class Arena : public MemoryPool {
Arena(const Arena &) = delete; Arena(const Arena &) = delete;
Arena &operator=(const Arena &) = delete; Arena &operator=(const Arena &) = delete;
~Arena() override { ~Arena() override {
#ifdef ENABLE_GPUQUE
if (is_cuda_malloc_) {
if (ptr_) {
(void)cudaFreeHost(ptr_);
}
}
#else
if (ptr_ != nullptr) { if (ptr_ != nullptr) {
free(ptr_); free(ptr_);
} }
ptr_ = nullptr; ptr_ = nullptr;
#endif
} }


/// As a derived class of MemoryPool, we have to implement the following. /// As a derived class of MemoryPool, we have to implement the following.
@@ -140,16 +151,27 @@ class Arena : public MemoryPool {
return os; return os;
} }


#ifdef ENABLE_GPUQUE
/// The only method to create an arena.
static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096, bool is_cuda_malloc = false);
#else
/// The only method to create an arena. /// The only method to create an arena.
static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096); static Status CreateArena(std::shared_ptr<Arena> *p_ba, size_t val_in_MB = 4096);
#endif


protected: protected:
mutable std::mutex mux_; mutable std::mutex mux_;
std::unique_ptr<ArenaImpl> impl_; std::unique_ptr<ArenaImpl> impl_;
void *ptr_; void *ptr_;
size_t size_in_MB_; size_t size_in_MB_;
#ifdef ENABLE_GPUQUE
bool is_cuda_malloc_;

explicit Arena(size_t val_in_MB = 4096, bool is_cuda_malloc = false);
#else


explicit Arena(size_t val_in_MB = 4096); explicit Arena(size_t val_in_MB = 4096);
#endif


Status Init(); Status Init();
}; };


+ 36
- 0
mindspore/ccsrc/minddata/dataset/util/circular_pool.cc View File

@@ -27,7 +27,11 @@ namespace dataset {
Status CircularPool::AddOneArena() { Status CircularPool::AddOneArena() {
Status rc; Status rc;
std::shared_ptr<Arena> b; std::shared_ptr<Arena> b;
#ifdef ENABLE_GPUQUE
RETURN_IF_NOT_OK(Arena::CreateArena(&b, arena_size_, is_cuda_malloc_));
#else
RETURN_IF_NOT_OK(Arena::CreateArena(&b, arena_size_)); RETURN_IF_NOT_OK(Arena::CreateArena(&b, arena_size_));
#endif
tail_ = b.get(); tail_ = b.get();
cur_size_in_mb_ += arena_size_; cur_size_in_mb_ += arena_size_;
mem_segments_.push_back(std::move(b)); mem_segments_.push_back(std::move(b));
@@ -194,12 +198,43 @@ int CircularPool::PercentFree() const {
} }
} }


#ifdef ENABLE_GPUQUE
CircularPool::CircularPool(int max_size_in_gb, int arena_size, bool is_cuda_malloc)
: unlimited_(max_size_in_gb <= 0),
max_size_in_mb_(unlimited_ ? std::numeric_limits<int32_t>::max() : max_size_in_gb * 1024),
arena_size_(arena_size),
is_cuda_malloc_(is_cuda_malloc),
cur_size_in_mb_(0) {}
#else
CircularPool::CircularPool(int max_size_in_gb, int arena_size) CircularPool::CircularPool(int max_size_in_gb, int arena_size)
: unlimited_(max_size_in_gb <= 0), : unlimited_(max_size_in_gb <= 0),
max_size_in_mb_(unlimited_ ? std::numeric_limits<int32_t>::max() : max_size_in_gb * 1024), max_size_in_mb_(unlimited_ ? std::numeric_limits<int32_t>::max() : max_size_in_gb * 1024),
arena_size_(arena_size), arena_size_(arena_size),
cur_size_in_mb_(0) {} cur_size_in_mb_(0) {}
#endif


#ifdef ENABLE_GPUQUE
Status CircularPool::CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb, int arena_size,
bool createOneArena, bool is_cuda_malloc) {
Status rc;
if (out_pool == nullptr) {
RETURN_STATUS_UNEXPECTED("pPool is null");
}
auto pool = new (std::nothrow) CircularPool(max_size_in_gb, arena_size, is_cuda_malloc);
if (pool == nullptr) {
return Status(StatusCode::kOutOfMemory);
}
if (createOneArena) {
rc = pool->AddOneArena();
}
if (rc.IsOk()) {
(*out_pool).reset(pool);
} else {
delete pool;
}
return rc;
}
#else
Status CircularPool::CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb, int arena_size, Status CircularPool::CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb, int arena_size,
bool createOneArena) { bool createOneArena) {
Status rc; Status rc;
@@ -220,6 +255,7 @@ Status CircularPool::CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, i
} }
return rc; return rc;
} }
#endif


CircularPool::~CircularPool() = default; CircularPool::~CircularPool() = default;
} // namespace dataset } // namespace dataset


+ 12
- 0
mindspore/ccsrc/minddata/dataset/util/circular_pool.h View File

@@ -85,8 +85,13 @@ class CircularPool : public MemoryPool {
return os; return os;
} }


#ifdef ENABLE_GPUQUE
static Status CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb = -1,
int arena_size = 4096, bool create_one_arena = false, bool is_cuda_malloc = false);
#else
static Status CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb = -1, static Status CreateCircularPool(std::shared_ptr<MemoryPool> *out_pool, int max_size_in_gb = -1,
int arena_size = 4096, bool create_one_arena = false); int arena_size = 4096, bool create_one_arena = false);
#endif


private: private:
ListOfArenas mem_segments_; ListOfArenas mem_segments_;
@@ -96,9 +101,16 @@ class CircularPool : public MemoryPool {
int arena_size_; int arena_size_;
int cur_size_in_mb_; int cur_size_in_mb_;
RWLock rw_lock_; RWLock rw_lock_;
#ifdef ENABLE_GPU
bool is_cuda_malloc_;

// We can take negative or 0 as input which means unlimited.
CircularPool(int max_size_in_gb, int arena_size, bool is_cuda_malloc);
#else


// We can take negative or 0 as input which means unlimited. // We can take negative or 0 as input which means unlimited.
CircularPool(int max_size_in_gb, int arena_size); CircularPool(int max_size_in_gb, int arena_size);
#endif


Status AddOneArena(); Status AddOneArena();
}; };


Loading…
Cancel
Save