!2012 gpu support the max device memory config

Merge pull request !2012 from limingqi107/master
5 years ago · c3d78e2aa2
--- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.cc
@@ -14,21 +14,29 @@
 * limitations under the License.
 */

 #include <algorithm>
 #include "device/gpu/gpu_memory_allocator.h"
 #include "device/gpu/cuda_driver.h"
 #include "utils/log_adapter.h"
 #include "utils/context/ms_context.h"
 #include "utils/convert_utils_base.h"

 namespace mindspore {
 namespace device {
 namespace gpu {
 bool GPUMemoryAllocator::Init() {
  size_t total_size = total_mem_size();
  size_t free_size = free_mem_size();
  if (total_size > 0 && free_size > 0) {
    MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size;
  size_t free_size = CudaDriver::free_mem_size();
  auto context_ptr = MsContext::GetInstance();
  MS_EXCEPTION_IF_NULL(context_ptr);
  float max_device_memory = context_ptr->max_device_memory();
  max_available_device_memory_ = FloatToSize(max_device_memory * 1024 * 1024 * 1024);
  if (total_size > 0 && free_size > 0 && max_available_device_memory_ > 0) {
    MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size
                 << ", set max available memory size " << max_available_device_memory_;
  } else {
    MS_LOG(EXCEPTION) << "GPU device memory error, total memory size " << total_size << ", current free memory size "
                      << free_size;
                      << free_size << ", set max available memory size " << max_available_device_memory_;
  }
  return true;
 }
@@ -64,13 +72,18 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
  if (alloc_size == 0) {
    MS_LOG(EXCEPTION) << "Alloc device memory[" << size << "] failed.";
  }
  MS_LOG(INFO) << "Current free memory size[" << free_size << "], current alloc size[" << alloc_size << "].";
  total_used_device_memory_ += alloc_size;
  max_available_device_memory_ -= alloc_size;
  MS_LOG(INFO) << "Current free memory size[" << free_size - alloc_size << "], current alloc size[" << alloc_size
               << "], total used size[" << total_used_device_memory_ << "].";
  return alloc_size;
 }

 bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); }

 size_t GPUMemoryAllocator::free_mem_size() { return CudaDriver::free_mem_size(); }
 size_t GPUMemoryAllocator::free_mem_size() {
  return std::min(CudaDriver::free_mem_size(), max_available_device_memory_);
 }

 size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); }
 }  // namespace gpu
--- a/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_allocator.h
@@ -48,6 +48,9 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit {

  // Used to track address of data buffer queue.
  DeviceMemPtr buffer_q_addr_{nullptr};

  size_t total_used_device_memory_{0};
  size_t max_available_device_memory_{0};
 };
 }  // namespace gpu
 }  // namespace device
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@@ -143,7 +143,9 @@ PYBIND11_MODULE(_c_expression, m) {
    .def("get_profiling_options", &mindspore::MsContext::profiling_options, "Get options to profiling.")
    .def("set_profiling_options", &mindspore::MsContext::set_profiling_options, "Set options to profiling.")
    .def("get_check_bprop_flag", &mindspore::MsContext::check_bprop_flag, "Get whether to check bprop.")
    .def("set_check_bprop_flag", &mindspore::MsContext::set_check_bprop_flag, "Set whether to check bprop.");
    .def("set_check_bprop_flag", &mindspore::MsContext::set_check_bprop_flag, "Set whether to check bprop.")
    .def("get_max_device_memory", &mindspore::MsContext::max_device_memory, "Get deivce memory max size.")
    .def("set_max_device_memory", &mindspore::MsContext::set_max_device_memory, "Set deivce memory max size.");

  (void)py::class_<ParallelContext, std::shared_ptr<ParallelContext>>(m, "AutoParallelContext")
    .def_static("get_instance", &ParallelContext::GetInstance, "Get auto parallel context instance.")
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@@ -81,6 +81,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
  profiling_mode_ = false;
  profiling_options_ = "training_trace";
  check_bprop_flag_ = false;
  max_device_memory_ = kDefaultMaxDeviceMemory;
 }

 std::shared_ptr<MsContext> MsContext::GetInstance() {
--- a/mindspore/ccsrc/utils/context/ms_context.h
+++ b/mindspore/ccsrc/utils/context/ms_context.h
@@ -44,6 +44,8 @@ const char kAscendDevice[] = "Ascend";
 const char kDavinciDevice[] = "Davinci";
 const char KNpuLog[] = "_npu_log";
 const std::set<std::string> kTargetSet = {kCPUDevice, kGPUDevice, kAscendDevice, kDavinciDevice};
 // The default max available device memory is 1024GB.
 const float kDefaultMaxDeviceMemory = 1024;

 class MsContext {
 public:
@@ -143,6 +145,9 @@ class MsContext {
  bool check_bprop_flag() const { return check_bprop_flag_; }
  void set_check_bprop_flag(bool check_bprop_flag) { check_bprop_flag_ = check_bprop_flag; }

  float max_device_memory() const { return max_device_memory_; }
  void set_max_device_memory(float max_device_memory) { max_device_memory_ = max_device_memory; }

 private:
  MsContext(const std::string &backend_policy, const std::string &target);
  void GetGeOptions(std::map<std::string, std::string> *ge_options) const;
@@ -182,6 +187,7 @@ class MsContext {
  bool profiling_mode_;
  std::string profiling_options_;
  bool check_bprop_flag_;
  float max_device_memory_;
 };

 }  // namespace mindspore
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -332,6 +332,17 @@ class _Context:
    def check_bprop(self, check_bprop_flag):
        self._context_handle.set_check_bprop_flag(check_bprop_flag)

    @property
    def max_device_memory(self):
        return self._context_handle.get_max_device_memory()

    @max_device_memory.setter
    def max_device_memory(self, max_device_memory):
        if not check_input_format(max_device_memory):
            raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"")
        max_device_memory_value = float(max_device_memory[:-2])
        self._context_handle.set_max_device_memory(max_device_memory_value)

 def check_input_format(x):
    import re
    pattern = r'[1-9][0-9]*(\.)?[0-9]*GB|0\.[0-9]*GB'
@@ -459,7 +470,7 @@ def reset_auto_parallel_context():
                 save_graphs_path=str, save_ms_model=bool, save_ms_model_path=str, enable_dump=bool,
                 save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str,
                 enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool,
                 check_bprop=bool)
                 check_bprop=bool, max_device_memory=str)
 def set_context(**kwargs):
    """
    Sets context for running environment.
@@ -511,6 +522,7 @@ def set_context(**kwargs):
            separated by colons; single operator can choose op_trace, op_trace cannot be combined with
            training_trace and task_trace. Default: "training_trace".
        check_bprop (bool): Whether to check bprop. Default: False.
        max_device_memory (str): Sets the maximum memory available for device. Default: "1024GB".

    Raises:
        ValueError: If input key is not an attribute in context.
@@ -530,6 +542,7 @@ def set_context(**kwargs):
        >>>                     device_target="Ascend",device_id=0, save_graphs=True,
        >>>                     save_graphs_path="/mindspore")
        >>> context.set_context(enable_profiling=True, profiling_options="training_trace")
        >>> context.set_context(max_device_memory="3.5GB")
    """
    for key, value in kwargs.items():
        if not hasattr(_context(), key):