Merge pull request !2012 from limingqi107/mastertags/v0.5.0-beta
| @@ -14,21 +14,29 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include <algorithm> | |||||
| #include "device/gpu/gpu_memory_allocator.h" | #include "device/gpu/gpu_memory_allocator.h" | ||||
| #include "device/gpu/cuda_driver.h" | #include "device/gpu/cuda_driver.h" | ||||
| #include "utils/log_adapter.h" | #include "utils/log_adapter.h" | ||||
| #include "utils/context/ms_context.h" | |||||
| #include "utils/convert_utils_base.h" | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace device { | namespace device { | ||||
| namespace gpu { | namespace gpu { | ||||
| bool GPUMemoryAllocator::Init() { | bool GPUMemoryAllocator::Init() { | ||||
| size_t total_size = total_mem_size(); | size_t total_size = total_mem_size(); | ||||
| size_t free_size = free_mem_size(); | |||||
| if (total_size > 0 && free_size > 0) { | |||||
| MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size; | |||||
| size_t free_size = CudaDriver::free_mem_size(); | |||||
| auto context_ptr = MsContext::GetInstance(); | |||||
| MS_EXCEPTION_IF_NULL(context_ptr); | |||||
| float max_device_memory = context_ptr->max_device_memory(); | |||||
| max_available_device_memory_ = FloatToSize(max_device_memory * 1024 * 1024 * 1024); | |||||
| if (total_size > 0 && free_size > 0 && max_available_device_memory_ > 0) { | |||||
| MS_LOG(INFO) << "GPU device total memory size " << total_size << ", current free memory size " << free_size | |||||
| << ", set max available memory size " << max_available_device_memory_; | |||||
| } else { | } else { | ||||
| MS_LOG(EXCEPTION) << "GPU device memory error, total memory size " << total_size << ", current free memory size " | MS_LOG(EXCEPTION) << "GPU device memory error, total memory size " << total_size << ", current free memory size " | ||||
| << free_size; | |||||
| << free_size << ", set max available memory size " << max_available_device_memory_; | |||||
| } | } | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -64,13 +72,18 @@ size_t GPUMemoryAllocator::AllocDeviceMem(size_t size, DeviceMemPtr *addr) { | |||||
| if (alloc_size == 0) { | if (alloc_size == 0) { | ||||
| MS_LOG(EXCEPTION) << "Alloc device memory[" << size << "] failed."; | MS_LOG(EXCEPTION) << "Alloc device memory[" << size << "] failed."; | ||||
| } | } | ||||
| MS_LOG(INFO) << "Current free memory size[" << free_size << "], current alloc size[" << alloc_size << "]."; | |||||
| total_used_device_memory_ += alloc_size; | |||||
| max_available_device_memory_ -= alloc_size; | |||||
| MS_LOG(INFO) << "Current free memory size[" << free_size - alloc_size << "], current alloc size[" << alloc_size | |||||
| << "], total used size[" << total_used_device_memory_ << "]."; | |||||
| return alloc_size; | return alloc_size; | ||||
| } | } | ||||
| bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); } | bool GPUMemoryAllocator::FreeDeviceMem(const DeviceMemPtr &addr) { return CudaDriver::FreeDeviceMem(addr); } | ||||
| size_t GPUMemoryAllocator::free_mem_size() { return CudaDriver::free_mem_size(); } | |||||
| size_t GPUMemoryAllocator::free_mem_size() { | |||||
| return std::min(CudaDriver::free_mem_size(), max_available_device_memory_); | |||||
| } | |||||
| size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); } | size_t GPUMemoryAllocator::total_mem_size() { return CudaDriver::total_mem_size(); } | ||||
| } // namespace gpu | } // namespace gpu | ||||
| @@ -48,6 +48,9 @@ class GPUMemoryAllocator : public DynamicMemPoolBestFit { | |||||
| // Used to track address of data buffer queue. | // Used to track address of data buffer queue. | ||||
| DeviceMemPtr buffer_q_addr_{nullptr}; | DeviceMemPtr buffer_q_addr_{nullptr}; | ||||
| size_t total_used_device_memory_{0}; | |||||
| size_t max_available_device_memory_{0}; | |||||
| }; | }; | ||||
| } // namespace gpu | } // namespace gpu | ||||
| } // namespace device | } // namespace device | ||||
| @@ -143,7 +143,9 @@ PYBIND11_MODULE(_c_expression, m) { | |||||
| .def("get_profiling_options", &mindspore::MsContext::profiling_options, "Get options to profiling.") | .def("get_profiling_options", &mindspore::MsContext::profiling_options, "Get options to profiling.") | ||||
| .def("set_profiling_options", &mindspore::MsContext::set_profiling_options, "Set options to profiling.") | .def("set_profiling_options", &mindspore::MsContext::set_profiling_options, "Set options to profiling.") | ||||
| .def("get_check_bprop_flag", &mindspore::MsContext::check_bprop_flag, "Get whether to check bprop.") | .def("get_check_bprop_flag", &mindspore::MsContext::check_bprop_flag, "Get whether to check bprop.") | ||||
| .def("set_check_bprop_flag", &mindspore::MsContext::set_check_bprop_flag, "Set whether to check bprop."); | |||||
| .def("set_check_bprop_flag", &mindspore::MsContext::set_check_bprop_flag, "Set whether to check bprop.") | |||||
| .def("get_max_device_memory", &mindspore::MsContext::max_device_memory, "Get deivce memory max size.") | |||||
| .def("set_max_device_memory", &mindspore::MsContext::set_max_device_memory, "Set deivce memory max size."); | |||||
| (void)py::class_<ParallelContext, std::shared_ptr<ParallelContext>>(m, "AutoParallelContext") | (void)py::class_<ParallelContext, std::shared_ptr<ParallelContext>>(m, "AutoParallelContext") | ||||
| .def_static("get_instance", &ParallelContext::GetInstance, "Get auto parallel context instance.") | .def_static("get_instance", &ParallelContext::GetInstance, "Get auto parallel context instance.") | ||||
| @@ -81,6 +81,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) { | |||||
| profiling_mode_ = false; | profiling_mode_ = false; | ||||
| profiling_options_ = "training_trace"; | profiling_options_ = "training_trace"; | ||||
| check_bprop_flag_ = false; | check_bprop_flag_ = false; | ||||
| max_device_memory_ = kDefaultMaxDeviceMemory; | |||||
| } | } | ||||
| std::shared_ptr<MsContext> MsContext::GetInstance() { | std::shared_ptr<MsContext> MsContext::GetInstance() { | ||||
| @@ -44,6 +44,8 @@ const char kAscendDevice[] = "Ascend"; | |||||
| const char kDavinciDevice[] = "Davinci"; | const char kDavinciDevice[] = "Davinci"; | ||||
| const char KNpuLog[] = "_npu_log"; | const char KNpuLog[] = "_npu_log"; | ||||
| const std::set<std::string> kTargetSet = {kCPUDevice, kGPUDevice, kAscendDevice, kDavinciDevice}; | const std::set<std::string> kTargetSet = {kCPUDevice, kGPUDevice, kAscendDevice, kDavinciDevice}; | ||||
| // The default max available device memory is 1024GB. | |||||
| const float kDefaultMaxDeviceMemory = 1024; | |||||
| class MsContext { | class MsContext { | ||||
| public: | public: | ||||
| @@ -143,6 +145,9 @@ class MsContext { | |||||
| bool check_bprop_flag() const { return check_bprop_flag_; } | bool check_bprop_flag() const { return check_bprop_flag_; } | ||||
| void set_check_bprop_flag(bool check_bprop_flag) { check_bprop_flag_ = check_bprop_flag; } | void set_check_bprop_flag(bool check_bprop_flag) { check_bprop_flag_ = check_bprop_flag; } | ||||
| float max_device_memory() const { return max_device_memory_; } | |||||
| void set_max_device_memory(float max_device_memory) { max_device_memory_ = max_device_memory; } | |||||
| private: | private: | ||||
| MsContext(const std::string &backend_policy, const std::string &target); | MsContext(const std::string &backend_policy, const std::string &target); | ||||
| void GetGeOptions(std::map<std::string, std::string> *ge_options) const; | void GetGeOptions(std::map<std::string, std::string> *ge_options) const; | ||||
| @@ -182,6 +187,7 @@ class MsContext { | |||||
| bool profiling_mode_; | bool profiling_mode_; | ||||
| std::string profiling_options_; | std::string profiling_options_; | ||||
| bool check_bprop_flag_; | bool check_bprop_flag_; | ||||
| float max_device_memory_; | |||||
| }; | }; | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -332,6 +332,17 @@ class _Context: | |||||
| def check_bprop(self, check_bprop_flag): | def check_bprop(self, check_bprop_flag): | ||||
| self._context_handle.set_check_bprop_flag(check_bprop_flag) | self._context_handle.set_check_bprop_flag(check_bprop_flag) | ||||
| @property | |||||
| def max_device_memory(self): | |||||
| return self._context_handle.get_max_device_memory() | |||||
| @max_device_memory.setter | |||||
| def max_device_memory(self, max_device_memory): | |||||
| if not check_input_format(max_device_memory): | |||||
| raise ValueError("Context param max_device_memory should be in correct format! Such as \"3.5GB\"") | |||||
| max_device_memory_value = float(max_device_memory[:-2]) | |||||
| self._context_handle.set_max_device_memory(max_device_memory_value) | |||||
| def check_input_format(x): | def check_input_format(x): | ||||
| import re | import re | ||||
| pattern = r'[1-9][0-9]*(\.)?[0-9]*GB|0\.[0-9]*GB' | pattern = r'[1-9][0-9]*(\.)?[0-9]*GB|0\.[0-9]*GB' | ||||
| @@ -459,7 +470,7 @@ def reset_auto_parallel_context(): | |||||
| save_graphs_path=str, save_ms_model=bool, save_ms_model_path=str, enable_dump=bool, | save_graphs_path=str, save_ms_model=bool, save_ms_model_path=str, enable_dump=bool, | ||||
| save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, | save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, | ||||
| enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, | enable_profiling=bool, profiling_options=str, enable_auto_mixed_precision=bool, | ||||
| check_bprop=bool) | |||||
| check_bprop=bool, max_device_memory=str) | |||||
| def set_context(**kwargs): | def set_context(**kwargs): | ||||
| """ | """ | ||||
| Sets context for running environment. | Sets context for running environment. | ||||
| @@ -511,6 +522,7 @@ def set_context(**kwargs): | |||||
| separated by colons; single operator can choose op_trace, op_trace cannot be combined with | separated by colons; single operator can choose op_trace, op_trace cannot be combined with | ||||
| training_trace and task_trace. Default: "training_trace". | training_trace and task_trace. Default: "training_trace". | ||||
| check_bprop (bool): Whether to check bprop. Default: False. | check_bprop (bool): Whether to check bprop. Default: False. | ||||
| max_device_memory (str): Sets the maximum memory available for device. Default: "1024GB". | |||||
| Raises: | Raises: | ||||
| ValueError: If input key is not an attribute in context. | ValueError: If input key is not an attribute in context. | ||||
| @@ -530,6 +542,7 @@ def set_context(**kwargs): | |||||
| >>> device_target="Ascend",device_id=0, save_graphs=True, | >>> device_target="Ascend",device_id=0, save_graphs=True, | ||||
| >>> save_graphs_path="/mindspore") | >>> save_graphs_path="/mindspore") | ||||
| >>> context.set_context(enable_profiling=True, profiling_options="training_trace") | >>> context.set_context(enable_profiling=True, profiling_options="training_trace") | ||||
| >>> context.set_context(max_device_memory="3.5GB") | |||||
| """ | """ | ||||
| for key, value in kwargs.items(): | for key, value in kwargs.items(): | ||||
| if not hasattr(_context(), key): | if not hasattr(_context(), key): | ||||