Browse Source

!32 auto-enable-dynamic-mem-pool

Merge pull request !32 from JoyLvliang/master
tags/v0.2.0-alpha
mindspore-ci-bot Gitee 6 years ago
parent
commit
e4b404e8ae
4 changed files with 17 additions and 33 deletions
  1. +14
    -30
      mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
  2. +1
    -1
      mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc
  3. +1
    -1
      mindspore/ccsrc/device/ascend/ascend_memory_allocator.h
  4. +1
    -1
      mindspore/ccsrc/session/session_basic.cc

+ 14
- 30
mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc View File

@@ -239,22 +239,11 @@ DeviceAddressPtr AscendKernelRuntime::CreateDeviceAddress(void *device_ptr, size
return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id); return std::make_shared<AscendDeviceAddress>(device_ptr, device_size, format, type_id);
} }


void AscendKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int flag) {
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
if (MsContext::GetInstance()->enable_dynamic_mem_pool()) {
auto device_ptr = AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
MS_EXCEPTION_IF_NULL(device_ptr);
address->ptr_ = device_ptr;
address->mem_dynamic_alloc_ = true;
return;
}
if (flag == kStaticMem) {
address->ptr_ = MallocStaticMem(size, false);
} else if (flag == kDynamicMem) {
address->ptr_ = MallocDynamicMem(size, false);
} else {
MS_LOG(EXCEPTION) << "Unknown memory type!";
}
void AscendKernelRuntime::MallocOpMemory(const DeviceAddressPtr address, size_t size, int) {
auto device_ptr = AscendMemoryAllocator::GetInstance().AllocTensorMem(size);
MS_EXCEPTION_IF_NULL(device_ptr);
address->ptr_ = device_ptr;
address->mem_dynamic_alloc_ = true;
} }


bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) { bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
@@ -488,23 +477,18 @@ bool AscendKernelRuntime::DestroyHccl() {


bool AscendKernelRuntime::MallocDeviceMemory() { bool AscendKernelRuntime::MallocDeviceMemory() {
device_mem_size_ = ASCEND_MEM_SIZE_BYTE; device_mem_size_ = ASCEND_MEM_SIZE_BYTE;
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
if (MsContext::GetInstance()->enable_dynamic_mem_pool()) {
static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_DAVINCI_MEM_RATIO);
device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_DAVINCI_MEM_RATIO));
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
}
AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
} else {
static_mem_offset_ = device_mem_size_;
static_mem_offset_ = FloatToSize(device_mem_size_ * GRAPH_INIT_ASCEND_MEM_RATIO);
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
} }
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), device_mem_size_, RT_MEMORY_HBM);
device_mem_pool_size_ = FloatToSize(device_mem_size_ * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) { if (ret != RT_ERROR_NONE) {
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_size_ << "] fail, ret[" << ret << "]";
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
} }
AscendMemoryAllocator::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
AscendMemoryAllocator::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
return true; return true;
} }




+ 1
- 1
mindspore/ccsrc/device/ascend/ascend_memory_allocator.cc View File

@@ -26,7 +26,7 @@ const uint64_t MEM_SIZE_BYTE = (MEM_SIZE << 30);


AscendMemoryAllocator::AscendMemoryAllocator() { AscendMemoryAllocator::AscendMemoryAllocator() {
hasMalloc_ = false; hasMalloc_ = false;
free_mem_size_ = FloatToSize(MEM_SIZE_BYTE * (1 - GRAPH_INIT_DAVINCI_MEM_RATIO));
free_mem_size_ = FloatToSize(MEM_SIZE_BYTE * (1 - GRAPH_INIT_ASCEND_MEM_RATIO));
total_mem_size_ = free_mem_size_; total_mem_size_ = free_mem_size_;
} }




+ 1
- 1
mindspore/ccsrc/device/ascend/ascend_memory_allocator.h View File

@@ -24,7 +24,7 @@ namespace mindspore {
namespace device { namespace device {
namespace ascend { namespace ascend {
// The fraction of total ascend memory used to compute the graph. // The fraction of total ascend memory used to compute the graph.
static const float GRAPH_INIT_DAVINCI_MEM_RATIO = 0.8;
static const float GRAPH_INIT_ASCEND_MEM_RATIO = 0.8;


class AscendMemoryAllocator : public DynamicMemPoolBestFit { class AscendMemoryAllocator : public DynamicMemPoolBestFit {
public: public:


+ 1
- 1
mindspore/ccsrc/session/session_basic.cc View File

@@ -497,7 +497,7 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0); auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
bool need_sync = false; bool need_sync = false;
if (ms_context->enable_pynative_infer()) { if (ms_context->enable_pynative_infer()) {
if (tensor->device_address().get() == nullptr) {
if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) {
need_sync = true; need_sync = true;
} }
} else { } else {


Loading…
Cancel
Save