| @@ -137,7 +137,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size, | |||||
| uint32_t image_alignment = ocl_runtime_->GetImagePitchAlignment(); | uint32_t image_alignment = ocl_runtime_->GetImagePitchAlignment(); | ||||
| size = UP_ROUND(img_size[0], image_alignment) * img_size[1] * dtype_size; | size = UP_ROUND(img_size[0], image_alignment) * img_size[1] * dtype_size; | ||||
| } | } | ||||
| if (size > MAX_MALLOC_SIZE) { | |||||
| if (size > ocl_runtime_->GetMaxAllocSize()) { | |||||
| MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; | MS_LOG(ERROR) << "MallocData out of max_size, size: " << size; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| @@ -148,7 +148,7 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size, | |||||
| return host_ptr; | return host_ptr; | ||||
| } | } | ||||
| total_size_ += size; | total_size_ += size; | ||||
| const uint64_t max_size = ocl_runtime_->GetGlobalMemSize(); | |||||
| const uint64_t max_size = ocl_runtime_->GetGlobalMemSize() * 0.8; | |||||
| if (total_size_ >= max_size) { | if (total_size_ >= max_size) { | ||||
| UnLock(); | UnLock(); | ||||
| MS_LOG(ERROR) << "Mem pool out of max_size, total size: " << total_size_ << ", max size: " << max_size; | MS_LOG(ERROR) << "Mem pool out of max_size, total size: " << total_size_ << ", max size: " << max_size; | ||||
| @@ -226,11 +226,11 @@ int OpenCLRuntime::Init() { | |||||
| } | } | ||||
| } | } | ||||
| global_memery_size_ = device_->getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>(); | global_memery_size_ = device_->getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>(); | ||||
| max_alloc_size_ = device_->getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>(); | |||||
| MS_LOG(INFO) << "Address space bits: " << device_->getInfo<CL_DEVICE_ADDRESS_BITS>(); | MS_LOG(INFO) << "Address space bits: " << device_->getInfo<CL_DEVICE_ADDRESS_BITS>(); | ||||
| MS_LOG(INFO) << "Global Mem Size: " << global_memery_size_; | MS_LOG(INFO) << "Global Mem Size: " << global_memery_size_; | ||||
| MS_LOG(INFO) << "Global Mem Cache Size: " << global_memery_cachesize_; | MS_LOG(INFO) << "Global Mem Cache Size: " << global_memery_cachesize_; | ||||
| MS_LOG(INFO) << "Max Alloc Size: " << device_->getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>(); | |||||
| MS_LOG(INFO) << "Max Alloc Size: " << max_alloc_size_; | |||||
| MS_LOG(INFO) << "Compute Unit: " << compute_units_; | MS_LOG(INFO) << "Compute Unit: " << compute_units_; | ||||
| MS_LOG(INFO) << "Clock Frequency: " << max_freq_ << " MHz"; | MS_LOG(INFO) << "Clock Frequency: " << max_freq_ << " MHz"; | ||||
| @@ -62,6 +62,7 @@ class OpenCLRuntime { | |||||
| uint64_t GetMaxWorkGroupSize(const cl::Kernel &kernel); | uint64_t GetMaxWorkGroupSize(const cl::Kernel &kernel); | ||||
| uint32_t GetSubGroupSize(const cl::Kernel &kernel, const cl::NDRange &range = cl::NullRange); | uint32_t GetSubGroupSize(const cl::Kernel &kernel, const cl::NDRange &range = cl::NullRange); | ||||
| uint64_t GetGlobalMemSize() { return global_memery_size_; } | uint64_t GetGlobalMemSize() { return global_memery_size_; } | ||||
| uint64_t GetMaxAllocSize() { return max_alloc_size_; } | |||||
| GpuInfo GetGpuInfo(); | GpuInfo GetGpuInfo(); | ||||
| bool GetFp16Enable() const; | bool GetFp16Enable() const; | ||||
| bool SetFp16Enable(bool enable); | bool SetFp16Enable(bool enable); | ||||
| @@ -170,6 +171,7 @@ class OpenCLRuntime { | |||||
| cl::Program binary_program_{0}; | cl::Program binary_program_{0}; | ||||
| uint64_t global_memery_cachesize_{0}; | uint64_t global_memery_cachesize_{0}; | ||||
| uint64_t global_memery_size_{0}; | uint64_t global_memery_size_{0}; | ||||
| uint64_t max_alloc_size_{0}; | |||||
| int max_work_group_size_{1}; | int max_work_group_size_{1}; | ||||
| uint32_t compute_units_{0}; | uint32_t compute_units_{0}; | ||||
| uint32_t max_freq_{0}; | uint32_t max_freq_{0}; | ||||