Browse Source

optimize opencl load/store program cache

tags/v1.2.0-rc1
wangdongxu 5 years ago
parent
commit
e153665a48
14 changed files with 179 additions and 183 deletions
  1. +4
    -2
      mindspore/lite/CMakeLists.txt
  2. +4
    -3
      mindspore/lite/schema/gpu_cache.fbs
  3. +9
    -1
      mindspore/lite/src/lite_session.cc
  4. +1
    -1
      mindspore/lite/src/lite_session.h
  5. +1
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
  6. +1
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
  7. +1
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc
  8. +0
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc
  9. +2
    -3
      mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
  10. +1
    -2
      mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
  11. +119
    -125
      mindspore/lite/src/runtime/opencl/opencl_runtime.cc
  12. +18
    -15
      mindspore/lite/src/runtime/opencl/opencl_runtime.h
  13. +17
    -10
      mindspore/lite/src/runtime/opencl/opencl_wrapper.cc
  14. +1
    -13
      mindspore/lite/src/runtime/opencl/opencl_wrapper.h

+ 4
- 2
mindspore/lite/CMakeLists.txt View File

@@ -156,13 +156,15 @@ if (SUPPORT_GPU)
gene_opencl(${CMAKE_CURRENT_SOURCE_DIR}) gene_opencl(${CMAKE_CURRENT_SOURCE_DIR})
add_definitions(-DUSE_OPENCL_WRAPPER) add_definitions(-DUSE_OPENCL_WRAPPER)
add_definitions(-DMS_OPENCL_PROFILE=false) add_definitions(-DMS_OPENCL_PROFILE=false)
add_definitions(-DCL_TARGET_OPENCL_VERSION=200)
add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200) add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200)
add_definitions(-DCL_HPP_MINIMUM_OPENCL_VERSION=110)
add_compile_definitions(SUPPORT_GPU) add_compile_definitions(SUPPORT_GPU)
if (OFFLINE_COMPILE) if (OFFLINE_COMPILE)
add_compile_definitions(PROGRAM_WITH_IL) add_compile_definitions(PROGRAM_WITH_IL)
endif () endif ()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/build/_deps/opencl-headers-src/)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/build/_deps/opencl-clhpp-src/include)
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-headers-src/)
include_directories(${CMAKE_BINARY_DIR}/_deps/opencl-clhpp-src/include)
endif () endif ()


if (WIN32) if (WIN32)


+ 4
- 3
mindspore/lite/schema/gpu_cache.fbs View File

@@ -23,8 +23,9 @@ table TuneParam {
opPara: [int]; opPara: [int];
} }


table KernelBin {
name: string;
table ProgramBinary {
program_name: string;
build_option: string;
tune: TuneParam; tune: TuneParam;
data: [ubyte]; data: [ubyte];
} }
@@ -32,7 +33,7 @@ table KernelBin {
table GpuCache { table GpuCache {
name: string; name: string;
version: string; version: string;
allBins: [KernelBin];
allBins: [ProgramBinary];
} }


root_type GpuCache; root_type GpuCache;

+ 9
- 1
mindspore/lite/src/lite_session.cc View File

@@ -546,6 +546,9 @@ LiteSession::~LiteSession() {
#if SUPPORT_NPU #if SUPPORT_NPU
mindspore::lite::NPUPassManager::GetInstance()->Clear(); mindspore::lite::NPUPassManager::GetInstance()->Clear();
mindspore::lite::NPUManager::GetInstance()->Reset(); mindspore::lite::NPUManager::GetInstance()->Reset();
#endif
#if SUPPORT_GPU && !SUPPORT_TRAIN
delete opencl_runtime_wrapper_;
#endif #endif
delete (model_); delete (model_);
is_running_.store(false); is_running_.store(false);
@@ -676,8 +679,13 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
int LiteSession::InitGPURuntime() { int LiteSession::InitGPURuntime() {
#if SUPPORT_GPU && !SUPPORT_TRAIN #if SUPPORT_GPU && !SUPPORT_TRAIN
if (this->context_->IsGpuEnabled()) { if (this->context_->IsGpuEnabled()) {
opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeWrapper();
if (opencl_runtime_wrapper_ == nullptr) {
MS_LOG(ERROR) << "create OpenCLRuntimeWrapper failed";
return RET_ERROR;
}
auto gpu_device_info = this->context_->GetGpuInfo(); auto gpu_device_info = this->context_->GetGpuInfo();
auto opencl_runtime = ocl_runtime_wrap_.GetInstance();
auto opencl_runtime = opencl_runtime_wrapper_->GetInstance();
opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_); opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
if (opencl_runtime->Init() != RET_OK) { if (opencl_runtime->Init() != RET_OK) {
this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}}; this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};


+ 1
- 1
mindspore/lite/src/lite_session.h View File

@@ -128,7 +128,7 @@ class LiteSession : public session::LiteSession {
Model *model_ = nullptr; Model *model_ = nullptr;
std::atomic<bool> is_running_ = false; std::atomic<bool> is_running_ = false;
#if SUPPORT_GPU && !SUPPORT_TRAIN #if SUPPORT_GPU && !SUPPORT_TRAIN
opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_;
opencl::OpenCLRuntimeWrapper *opencl_runtime_wrapper_{nullptr};
#endif #endif
}; };
} // namespace lite } // namespace lite


+ 1
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc View File

@@ -139,11 +139,10 @@ int ArgMinMaxOpenCLKernel::Prepare() {
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else #else


std::set<std::string> build_options;
std::string source = argminmax_source; std::string source = argminmax_source;
std::string program_name = "argminmax"; std::string program_name = "argminmax";
ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
#endif #endif


InitWeights(); InitWeights();


+ 1
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc View File

@@ -165,11 +165,10 @@ int BatchNormOpenCLKernel::Initweight() {
int BatchNormOpenCLKernel::Prepare() { int BatchNormOpenCLKernel::Prepare() {
use_fp16_enable_ = ocl_runtime_->GetFp16Enable(); use_fp16_enable_ = ocl_runtime_->GetFp16Enable();
std::string kernel_name = "Batch_normalization_NHWC4"; std::string kernel_name = "Batch_normalization_NHWC4";
std::set<std::string> build_options;
std::string source = batchnorm_source; std::string source = batchnorm_source;
std::string program_name = "Batch_normalization"; std::string program_name = "Batch_normalization";
ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
int ret = Initweight(); int ret = Initweight();
if (ret) { if (ret) {


+ 1
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc View File

@@ -94,11 +94,10 @@ int CastOpenCLKernel::Prepare() {
std::string kernel_name = "Cast"; std::string kernel_name = "Cast";
GetKernelName(&kernel_name, param); GetKernelName(&kernel_name, param);
kernel_name += "_NHWC4"; kernel_name += "_NHWC4";
std::set<std::string> build_options;
std::string source = cast_source; std::string source = cast_source;
std::string program_name = "cast"; std::string program_name = "cast";
ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
SetConstArgs(); SetConstArgs();
SetGlobalLocal(); SetGlobalLocal();


+ 0
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc View File

@@ -145,7 +145,6 @@ int FusionEltwiseOpenCLKernel::Prepare() {
static std::set<std::string> code_map; static std::set<std::string> code_map;
std::string source = Codegen(); std::string source = Codegen();
code_map.insert(source); code_map.insert(source);
// std::cout << name() << "\n" << source;


std::string program_name = "FusionEltwise" + std::to_string(code_map.size()); std::string program_name = "FusionEltwise" + std::to_string(code_map.size());
std::string kernel_name = "FusionEltwise"; std::string kernel_name = "FusionEltwise";
@@ -182,7 +181,6 @@ int FusionEltwiseOpenCLKernel::InitWeights() {
if (IsScalar(tensor->shape())) { if (IsScalar(tensor->shape())) {
float value = (tensor->data_type() == kNumberTypeFloat16) ? *(reinterpret_cast<float16_t *>(tensor->data_c())) float value = (tensor->data_type() == kNumberTypeFloat16) ? *(reinterpret_cast<float16_t *>(tensor->data_c()))
: *(reinterpret_cast<float32_t *>(tensor->data_c())); : *(reinterpret_cast<float32_t *>(tensor->data_c()));
// std::cout << "value=" << value << std::endl;
scalar_weights_.push_back(value); scalar_weights_.push_back(value);
} else { } else {
auto tensor_info = GpuTensorInfo(tensor); auto tensor_info = GpuTensorInfo(tensor);


+ 2
- 3
mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc View File

@@ -212,13 +212,12 @@ int LayerNormOpenCLKernel::Prepare() {
} }
std::string kernel_name = "LayerNormalization_NHWC4"; std::string kernel_name = "LayerNormalization_NHWC4";
std::string kernel_name_mean_var = "ComputeMeanVar"; std::string kernel_name_mean_var = "ComputeMeanVar";
std::set<std::string> build_options;
std::string source = layer_norm_source; std::string source = layer_norm_source;
std::string program_name = "LayerNormalization"; std::string program_name = "LayerNormalization";
ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
kernel_name_mean_var += "Dim" + std::to_string(normalized_dims_) + "NHWC4"; kernel_name_mean_var += "Dim" + std::to_string(normalized_dims_) + "NHWC4";
ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options);
ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var);
MS_LOG(DEBUG) << kernel_name << " Init Done!"; MS_LOG(DEBUG) << kernel_name << " Init Done!";
SetConstArgs(); SetConstArgs();
SetGlobalLocal(); SetGlobalLocal();


+ 1
- 2
mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc View File

@@ -51,11 +51,10 @@ int OneHotOpenCLKernel::Prepare() {
#ifdef PROGRAM_WITH_IL #ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else #else
std::set<std::string> build_options;
std::string source = one_hot_source; std::string source = one_hot_source;
std::string program_name = "OneHot"; std::string program_name = "OneHot";
ocl_runtime_->LoadSource(program_name, source); ocl_runtime_->LoadSource(program_name, source);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
#endif #endif
InitWeights(); InitWeights();
SetConstArgs(); SetConstArgs();


+ 119
- 125
mindspore/lite/src/runtime/opencl/opencl_runtime.cc View File

@@ -37,11 +37,11 @@ using mindspore::kernel::CLErrorCode;


namespace mindspore::lite::opencl { namespace mindspore::lite::opencl {


static std::map<std::string, std::string> g_opencl_program_map;
static std::map<std::string, std::string> g_source_map;
static std::mutex g_mtx; static std::mutex g_mtx;
static std::mutex g_init_mtx; static std::mutex g_init_mtx;


bool OpenCLRuntime::init_done_ = false;
InitState OpenCLRuntime::init_state_ = UnInit;
OpenCLRuntime *OpenCLRuntime::ocl_runtime_instance_ = nullptr; OpenCLRuntime *OpenCLRuntime::ocl_runtime_instance_ = nullptr;
size_t OpenCLRuntime::instance_count_ = 0; size_t OpenCLRuntime::instance_count_ = 0;


@@ -60,6 +60,7 @@ void OpenCLRuntime::DeleteInstance() {
std::unique_lock<std::mutex> lck(g_mtx); std::unique_lock<std::mutex> lck(g_mtx);
if (instance_count_ == 0) { if (instance_count_ == 0) {
MS_LOG(ERROR) << "No OpenCLRuntime instance could delete!"; MS_LOG(ERROR) << "No OpenCLRuntime instance could delete!";
return;
} }
instance_count_--; instance_count_--;
if (instance_count_ == 0) { if (instance_count_ == 0) {
@@ -67,8 +68,6 @@ void OpenCLRuntime::DeleteInstance() {
} }
} }


OpenCLRuntime::OpenCLRuntime() { default_build_opts_ = " -cl-mad-enable -cl-fast-relaxed-math -Werror"; }

void printf_callback(const char *buffer, size_t length, size_t final, void *user_data) { void printf_callback(const char *buffer, size_t length, size_t final, void *user_data) {
fwrite(buffer, 1, length, stdout); fwrite(buffer, 1, length, stdout);
} }
@@ -76,16 +75,19 @@ void printf_callback(const char *buffer, size_t length, size_t final, void *user
// Init will get platforms info, get devices info, create opencl context. // Init will get platforms info, get devices info, create opencl context.
int OpenCLRuntime::Init() { int OpenCLRuntime::Init() {
std::unique_lock<std::mutex> lck(g_init_mtx); std::unique_lock<std::mutex> lck(g_init_mtx);

if (init_done_) {
if (init_state_ == InitSuccess) {
return RET_OK; return RET_OK;
} else if (init_state_ == InitFailed) {
return RET_ERROR;
} }
init_state_ = InitFailed;

MS_LOG(INFO) << "OpenCL version: CL_TARGET_OPENCL_VERSION " << CL_TARGET_OPENCL_VERSION; MS_LOG(INFO) << "OpenCL version: CL_TARGET_OPENCL_VERSION " << CL_TARGET_OPENCL_VERSION;
MS_LOG(INFO) << "CL_HPP_TARGET_OPENCL_VERSION " << CL_HPP_TARGET_OPENCL_VERSION; MS_LOG(INFO) << "CL_HPP_TARGET_OPENCL_VERSION " << CL_HPP_TARGET_OPENCL_VERSION;
MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION; MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION;


#ifdef USE_OPENCL_WRAPPER #ifdef USE_OPENCL_WRAPPER
if (lite::opencl::LoadOpenCLLibrary(handle_) == false) {
if (!lite::opencl::LoadOpenCLLibrary(&handle_)) {
MS_LOG(ERROR) << "Load OpenCL symbols failed!"; MS_LOG(ERROR) << "Load OpenCL symbols failed!";
return RET_ERROR; return RET_ERROR;
} }
@@ -93,35 +95,35 @@ int OpenCLRuntime::Init() {


std::vector<cl::Platform> platforms; std::vector<cl::Platform> platforms;
cl_int ret = cl::Platform::get(&platforms); cl_int ret = cl::Platform::get(&platforms);
if (platforms.size() == 0) {
if (platforms.empty()) {
MS_LOG(ERROR) << "OpenCL Platform not found!" << CLErrorCode(ret); MS_LOG(ERROR) << "OpenCL Platform not found!" << CLErrorCode(ret);
return RET_ERROR; return RET_ERROR;
} }


// search GPU // search GPU
std::vector<cl::Device> devices; std::vector<cl::Device> devices;
for (auto it = platforms.begin(); it != platforms.end(); ++it) {
for (auto &platform : platforms) {
std::string platform_name; std::string platform_name;
ret = it->getInfo(CL_PLATFORM_NAME, &platform_name);
ret = platform.getInfo(CL_PLATFORM_NAME, &platform_name);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
MS_LOG(WARNING) << CLErrorCode(ret); MS_LOG(WARNING) << CLErrorCode(ret);
} }
ret = it->getDevices(CL_DEVICE_TYPE_GPU, &devices);
ret = platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
MS_LOG(WARNING) << CLErrorCode(ret); MS_LOG(WARNING) << CLErrorCode(ret);
} }
MS_LOG(INFO) << "Platform (" << platform_name << ") has " << devices.size() << " GPUs"; MS_LOG(INFO) << "Platform (" << platform_name << ") has " << devices.size() << " GPUs";


if (devices.size() > 0) {
if (!devices.empty()) {
std::string device_name = devices[0].getInfo<CL_DEVICE_NAME>(); std::string device_name = devices[0].getInfo<CL_DEVICE_NAME>();
MS_LOG(INFO) << "Find GPU: " << device_name.c_str(); MS_LOG(INFO) << "Find GPU: " << device_name.c_str();
cl::Platform::setDefault(*it);
cl::Platform::setDefault(platform);
break; break;
} }
} }


// not found, return error code. // not found, return error code.
if (devices.size() == 0) {
if (devices.empty()) {
MS_LOG(ERROR) << "OpenCL Device not found!"; MS_LOG(ERROR) << "OpenCL Device not found!";
return RET_ERROR; return RET_ERROR;
} }
@@ -264,23 +266,18 @@ int OpenCLRuntime::Init() {
std::string flag = ""; std::string flag = "";
binary_program_ = CreateProgramFromIL(g_program_binary, flag); binary_program_ = CreateProgramFromIL(g_program_binary, flag);
#endif #endif
if (enable_cache_) {
InitGpuCache();
}
init_done_ = true;
LoadCache();
init_state_ = InitSuccess;
MS_LOG(INFO) << "OpenCLRuntime init done!"; MS_LOG(INFO) << "OpenCLRuntime init done!";

return RET_OK; return RET_OK;
} }


int OpenCLRuntime::Uninit() { int OpenCLRuntime::Uninit() {
if (!init_done_) {
std::unique_lock<std::mutex> lck(g_init_mtx);
if (init_state_ != InitSuccess) {
return RET_OK; return RET_OK;
} }
if (enable_cache_ && !binary_map_.empty()) {
StoreCache();
}
binary_map_.clear();
StoreCache();
program_map_.clear(); program_map_.clear();
delete allocator_; delete allocator_;
delete default_command_queue_; delete default_command_queue_;
@@ -296,7 +293,7 @@ int OpenCLRuntime::Uninit() {
lite::opencl::UnLoadOpenCLLibrary(handle_); lite::opencl::UnLoadOpenCLLibrary(handle_);
handle_ = nullptr; handle_ = nullptr;
#endif #endif
init_done_ = false;
init_state_ = UnInit;
return RET_OK; return RET_OK;
} }


@@ -355,54 +352,39 @@ bool OpenCLRuntime::SetFp16Enable(bool enable) {
} }


int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name,
const std::set<std::string> &build_options) {
std::string build_options_str;
// set default macro
const std::vector<std::string> &build_options_ext) {
std::string build_option = default_build_option_;
if (fp16_enable_) { if (fp16_enable_) {
// fp16 enable, kernel will use half and read_imageh and write_imageh.
build_options_str =
"-DFLT=half -DFLT4=half4 -DFLT16=half16 -DAS_FLT4=as_half4 -DAS_UINT4=as_ushort4 -DUINT4=ushort4 "
"-DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT=convert_half -DTO_FLT4=convert_half4 ";
build_option +=
" -DFLT=half -DFLT4=half4 -DFLT16=half16 -DAS_FLT4=as_half4 -DAS_UINT4=as_ushort4 -DUINT4=ushort4 "
"-DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT=convert_half -DTO_FLT4=convert_half4";
} else { } else {
// fp16 not enable, kernel will use float and read_imagef and write_imagef.
build_options_str =
"-DFLT=float -DFLT4=float4 -DFLT16=float16 -DAS_FLT4=as_float4 -DAS_UINT4=as_uint4 -DUINT4=uint4 "
"-DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef -DTO_FLT=convert_float -DTO_FLT4=convert_float4 ";
}

auto build_options_ext = std::accumulate(build_options.begin(), build_options.end(), std::string(""),
[](const std::string &options, const std::string &option) -> std::string {
auto res = options + " " + option;
return res;
});
build_options_str += default_build_opts_;
// program identifier = program_name + build_options
std::string build_program_key = program_name + build_options_str + build_options_ext;

auto build_program_it = program_map_.find(build_program_key);
build_option +=
" -DFLT=float -DFLT4=float4 -DFLT16=float16 -DAS_FLT4=as_float4 -DAS_UINT4=as_uint4 -DUINT4=uint4 "
"-DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef -DTO_FLT=convert_float -DTO_FLT4=convert_float4";
}
build_option =
std::accumulate(build_options_ext.begin(), build_options_ext.end(), build_option,
[](const std::string &options, const std::string &option) { return options + " " + option; });

cl::Program program; cl::Program program;
// if search program identifier exist, then use it.
if (build_program_it != program_map_.end()) {
program = build_program_it->second;
auto program_key = std::make_pair(program_name, build_option);
auto iter = program_map_.find(program_key);
if (iter != program_map_.end()) {
program = iter->second;
} else { } else {
// load program and build program
flush_cache_ = true;
auto status = this->LoadProgram(program_name, &program); auto status = this->LoadProgram(program_name, &program);
if (!status) { if (!status) {
MS_LOG(ERROR) << "load program (" << program_name << ") failed!"; MS_LOG(ERROR) << "load program (" << program_name << ") failed!";
return RET_ERROR; return RET_ERROR;
} }
status = this->BuildProgram(build_options_str, program);
status = this->BuildProgram(build_option, program);
if (!status) { if (!status) {
MS_LOG(ERROR) << program_name << " build failed!"; MS_LOG(ERROR) << program_name << " build failed!";
return RET_ERROR; return RET_ERROR;
} }
if (enable_cache_) {
need_write_ = true;
auto bin = GetProgramBinaries(program);
MS_ASSERT(bin.size() >= 1);
binary_map_.emplace(build_program_key, bin[0]);
}
program_map_.emplace(build_program_key, program);
program_map_.emplace(program_key, program);
} }


cl_int ret; cl_int ret;
@@ -446,6 +428,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const cl::NDRange &global
} }
return RET_OK; return RET_OK;
} }

// get gpu divce type // get gpu divce type
GpuInfo OpenCLRuntime::ParseGpuInfo(std::string device_name, std::string device_version) { GpuInfo OpenCLRuntime::ParseGpuInfo(std::string device_name, std::string device_version) {
GpuInfo info; GpuInfo info;
@@ -472,17 +455,17 @@ GpuInfo OpenCLRuntime::ParseGpuInfo(std::string device_name, std::string device_
} }


bool OpenCLRuntime::LoadSource(const std::string &program_name, const std::string &source) { bool OpenCLRuntime::LoadSource(const std::string &program_name, const std::string &source) {
auto it_source = g_opencl_program_map.find(program_name);
if (it_source == g_opencl_program_map.end()) {
g_opencl_program_map.emplace(program_name, source);
auto it_source = g_source_map.find(program_name);
if (it_source == g_source_map.end()) {
g_source_map.emplace(program_name, source);
} }
return true; return true;
} }


// load program with program name. // load program with program name.
bool OpenCLRuntime::LoadProgram(const std::string &program_name, cl::Program *program) { bool OpenCLRuntime::LoadProgram(const std::string &program_name, cl::Program *program) {
auto it_source = g_opencl_program_map.find(program_name);
if (it_source != g_opencl_program_map.end()) {
auto it_source = g_source_map.find(program_name);
if (it_source != g_source_map.end()) {
cl::Program::Sources sources; cl::Program::Sources sources;
sources.push_back(it_source->second); sources.push_back(it_source->second);
*program = cl::Program(*context_, sources); *program = cl::Program(*context_, sources);
@@ -494,8 +477,8 @@ bool OpenCLRuntime::LoadProgram(const std::string &program_name, cl::Program *pr
} }


// build program with build options // build program with build options
bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Program &program) {
cl_int ret = program.build({*device_}, build_options.c_str());
bool OpenCLRuntime::BuildProgram(const std::string &build_option, const cl::Program &program) {
cl_int ret = program.build({*device_}, build_option.c_str());
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) { if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(*device_) == CL_BUILD_ERROR) {
std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_); std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*device_);
@@ -658,92 +641,103 @@ cl::Program OpenCLRuntime::CreateProgramFromIL(const std::vector<char> &binary,
} }


// build program with binary // build program with binary
cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector<unsigned char> &binary, const std::string &flag) {
cl::Program OpenCLRuntime::CreateProgramFromBinary(const std::vector<unsigned char> &binary,
const std::string &build_option) {
cl::Program program = cl::Program(*context_, {*device_}, {binary}); cl::Program program = cl::Program(*context_, {*device_}, {binary});
bool status = BuildProgram(default_build_opts_, program);
bool status = BuildProgram(build_option, program);
if (!status) { if (!status) {
MS_LOG(ERROR) << "Build program with binary failed!"; MS_LOG(ERROR) << "Build program with binary failed!";
} }
return program; return program;
} }


std::vector<std::vector<unsigned char>> OpenCLRuntime::GetProgramBinaries(const cl::Program &program) {
std::vector<unsigned char> OpenCLRuntime::GetProgramBinary(const cl::Program &program) {
cl_int ret = CL_SUCCESS; cl_int ret = CL_SUCCESS;
auto binary = program.getInfo<CL_PROGRAM_BINARIES>(&ret);
auto binarys = program.getInfo<CL_PROGRAM_BINARIES>(&ret);
if (ret != CL_SUCCESS) { if (ret != CL_SUCCESS) {
MS_LOG(ERROR) << "Get program binary failed: " << CLErrorCode(ret); MS_LOG(ERROR) << "Get program binary failed: " << CLErrorCode(ret);
} }
return binary;
}
void OpenCLRuntime::InitGpuCache() {
size_t len;
char *buf = lite::ReadFile(cache_path_.c_str(), &len);
if (LoadCache(buf) != RET_OK) {
MS_LOG(ERROR) << "Load opencl cache fail";
if (binarys.empty()) {
MS_LOG(ERROR) << "binarys is empty";
return {};
} }
delete buf;
MS_LOG(INFO) << "Init opencl cache success";
return binarys.front();
} }
int OpenCLRuntime::LoadCache(const void *buf) {

void OpenCLRuntime::LoadCache() {
if (!enable_cache_) {
return;
}
size_t len;
std::unique_ptr<char[]> buf(lite::ReadFile(cache_path_.c_str(), &len));
if (buf == nullptr) { if (buf == nullptr) {
return RET_ERROR;
MS_LOG(ERROR) << "Load opencl cache fail: buf == nullptr";
return;
} }
auto gpu_cache = schema::GetGpuCache(buf);
auto gpu_cache = schema::GetGpuCache(buf.get());
if (gpu_cache == nullptr) { if (gpu_cache == nullptr) {
return RET_ERROR;
MS_LOG(ERROR) << "Load opencl cache fail: gpu_cache == nullptr";
return;
} }
auto *bins = gpu_cache->allBins(); auto *bins = gpu_cache->allBins();
if (bins == nullptr) { if (bins == nullptr) {
return RET_ERROR;
MS_LOG(ERROR) << "Load opencl cache fail: bins == nullptr";
return;
} }
auto n = bins->size();
for (auto i = 0; i < n; ++i) {
auto *kernel_bin = bins->template GetAs<schema::KernelBin>(i);
if (kernel_bin == nullptr) {
for (auto i = 0; i < bins->size(); ++i) {
auto *bin = bins->template GetAs<schema::ProgramBinary>(i);
if (bin == nullptr) {
MS_LOG(ERROR) << "kernel_bin[" << i << "] null"; MS_LOG(ERROR) << "kernel_bin[" << i << "] null";
return RET_ERROR;
return;
} }
auto *pdata = kernel_bin->data();
auto *pdata = bin->data();
MS_ASSERT(pdata); MS_ASSERT(pdata);
if (pdata->size() == 0) { if (pdata->size() == 0) {
continue; continue;
} }
std::vector<unsigned char> bin(pdata->begin(), pdata->end());
auto program = CreateProgramFromBinary(bin, kernel_bin->name()->str());
program_map_.emplace(kernel_bin->name()->str(), program);
binary_map_.emplace(kernel_bin->name()->str(), bin);
MS_LOG(INFO) << "LoadCache " << kernel_bin->name()->str() << " success, size=" << pdata->size();
std::vector<unsigned char> binary(pdata->begin(), pdata->end());
auto program = CreateProgramFromBinary(binary, bin->build_option()->str());
program_map_.emplace(std::make_pair(bin->program_name()->str(), bin->build_option()->str()), program);
MS_LOG(INFO) << "LoadCache " << bin->program_name() << " success, size=" << binary.size();
} }
return RET_OK;
MS_LOG(INFO) << "Init opencl cache success";
} }
void OpenCLRuntime::StoreCache() {
if (need_write_) {
auto fbb_ = new (std::nothrow) flatbuffers::FlatBufferBuilder;
if (fbb_ == nullptr) {
MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail";
return;
}
std::vector<flatbuffers::Offset<schema::KernelBin>> vec_kernel_bin;
for (auto iv : binary_map_) {
auto name = fbb_->CreateString(iv.first);
auto data = fbb_->CreateVector<uint8_t>(iv.second);
std::vector<int32_t> shape;
auto tune = schema::CreateTuneParam(*fbb_, fbb_->CreateVector<int32_t>(shape), fbb_->CreateVector<int32_t>(shape),
fbb_->CreateVector<int32_t>(shape), fbb_->CreateVector<int32_t>(shape));
auto kbin = schema::CreateKernelBin(*fbb_, name, tune, data);
vec_kernel_bin.emplace_back(kbin);
MS_LOG(INFO) << "StoreCache " << iv.first << " success, size=" << iv.second.size();
}


auto data = fbb_->CreateVector<flatbuffers::Offset<schema::KernelBin>>(vec_kernel_bin);
auto name = fbb_->CreateString("OpenCLCache");
auto version = fbb_->CreateString(version_);
auto gpu_cache = schema::CreateGpuCache(*fbb_, name, version, data);
fbb_->Finish(gpu_cache);
uint8_t *buf = fbb_->GetBufferPointer();
lite::WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb_->GetSize());
MS_LOG(INFO) << "store opencl cache ok, size=" << fbb_->GetSize();
delete fbb_;
}
void OpenCLRuntime::StoreCache() {
if (!enable_cache_) {
return;
}
if (!flush_cache_) {
return;
}
auto fbb = std::make_unique<flatbuffers::FlatBufferBuilder>();
if (fbb == nullptr) {
MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail";
return;
}
std::vector<flatbuffers::Offset<schema::ProgramBinary>> program_binarys;
for (const auto &kv : program_map_) {
auto program_name = kv.first.first;
auto build_option = kv.first.second;
cl::Program program = kv.second;
auto binary = this->GetProgramBinary(program);
std::vector<int32_t> shape;
auto tune = schema::CreateTuneParam(*fbb, fbb->CreateVector<int32_t>(shape), fbb->CreateVector<int32_t>(shape),
fbb->CreateVector<int32_t>(shape), fbb->CreateVector<int32_t>(shape));
auto program_binary = schema::CreateProgramBinary(
*fbb, fbb->CreateString(program_name), fbb->CreateString(build_option), tune, fbb->CreateVector<uint8_t>(binary));
program_binarys.emplace_back(program_binary);
MS_LOG(INFO) << "StoreCache " << program_name << " success, size=" << binary.size();
}

auto data = fbb->CreateVector<flatbuffers::Offset<schema::ProgramBinary>>(program_binarys);
auto name = fbb->CreateString("OpenCLCache");
auto version = fbb->CreateString(cache_version_);
auto gpu_cache = schema::CreateGpuCache(*fbb, name, version, data);
fbb->Finish(gpu_cache);
uint8_t *buf = fbb->GetBufferPointer();
lite::WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb->GetSize());
MS_LOG(INFO) << "store opencl cache ok, size=" << fbb->GetSize();
} }

} // namespace mindspore::lite::opencl } // namespace mindspore::lite::opencl

+ 18
- 15
mindspore/lite/src/runtime/opencl/opencl_runtime.h View File

@@ -23,6 +23,7 @@ j* you may not use this file except in compliance with the License.
#include <memory> #include <memory>
#include <set> #include <set>
#include <string> #include <string>
#include <utility>
#include <type_traits> #include <type_traits>
#include "src/common/log_adapter.h" #include "src/common/log_adapter.h"
#include "src/runtime/opencl/opencl_wrapper.h" #include "src/runtime/opencl/opencl_wrapper.h"
@@ -33,6 +34,7 @@ namespace mindspore::lite::opencl {


enum GpuType { OTHER = 0, ADRENO = 1, MALI = 2, MALI_T = 3, MALI_G = 4 }; enum GpuType { OTHER = 0, ADRENO = 1, MALI = 2, MALI_T = 3, MALI_G = 4 };
enum TuningMode { DEFAULT = 0, FAST = 1, EXTREME = 2 }; enum TuningMode { DEFAULT = 0, FAST = 1, EXTREME = 2 };
enum InitState { UnInit = 0, InitSuccess = 1, InitFailed = 2 };


struct GpuInfo { struct GpuInfo {
GpuType type = OTHER; GpuType type = OTHER;
@@ -113,10 +115,10 @@ class OpenCLRuntime {
cl::Program CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag); cl::Program CreateProgramFromIL(const std::vector<char> &binary, const std::string &flag);
cl::Program CreateProgramFromBinary(const std::vector<unsigned char> &binary, const std::string &flag); cl::Program CreateProgramFromBinary(const std::vector<unsigned char> &binary, const std::string &flag);
cl::Kernel GetKernelFromBinary(const std::string &kernel_name); cl::Kernel GetKernelFromBinary(const std::string &kernel_name);
std::vector<std::vector<unsigned char>> GetProgramBinaries(const cl::Program &program);
std::vector<unsigned char> GetProgramBinary(const cl::Program &program);
bool LoadSource(const std::string &program_name, const std::string &source); bool LoadSource(const std::string &program_name, const std::string &source);
int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name,
const std::set<std::string> &build_options = {});
const std::vector<std::string> &build_options_ext = {});
int RunKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local, int RunKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local,
cl::CommandQueue *command_queue = nullptr, cl::Event *event = nullptr); cl::CommandQueue *command_queue = nullptr, cl::Event *event = nullptr);
int ReadOrWriteImage(void *buffer, void *data, bool is_read); int ReadOrWriteImage(void *buffer, void *data, bool is_read);
@@ -146,23 +148,20 @@ class OpenCLRuntime {
void SetTuningMode(TuningMode mode) { tuning_mode_ = mode; } void SetTuningMode(TuningMode mode) { tuning_mode_ = mode; }
TuningMode GetTuningMode() const { return tuning_mode_; } TuningMode GetTuningMode() const { return tuning_mode_; }


void InitGpuCache();
int LoadCache(const void *buf);
void StoreCache();
bool isProfiling() const { return profiling_; } bool isProfiling() const { return profiling_; }
void SetProfiling(bool profiling) { profiling_ = profiling; } void SetProfiling(bool profiling) { profiling_ = profiling; }


private: private:
static OpenCLRuntime *GetInstance(); static OpenCLRuntime *GetInstance();
static void DeleteInstance(); static void DeleteInstance();
OpenCLRuntime();
OpenCLRuntime() = default;
GpuInfo ParseGpuInfo(std::string device_name, std::string device_version); GpuInfo ParseGpuInfo(std::string device_name, std::string device_version);


bool LoadProgram(const std::string &program_name, cl::Program *program); bool LoadProgram(const std::string &program_name, cl::Program *program);
bool BuildProgram(const std::string &build_options, const cl::Program &program); bool BuildProgram(const std::string &build_options, const cl::Program &program);


private: private:
static bool init_done_;
static InitState init_state_;
static size_t instance_count_; static size_t instance_count_;
static OpenCLRuntime *ocl_runtime_instance_; static OpenCLRuntime *ocl_runtime_instance_;
cl::CommandQueue *default_command_queue_{nullptr}; cl::CommandQueue *default_command_queue_{nullptr};
@@ -170,15 +169,15 @@ class OpenCLRuntime {
cl::Context *context_{nullptr}; cl::Context *context_{nullptr};
cl::Device *device_{nullptr}; cl::Device *device_{nullptr};
OpenCLAllocator *allocator_{nullptr}; OpenCLAllocator *allocator_{nullptr};
std::map<std::string, cl::Program> program_map_;
cl::Program binary_program_{0};
std::map<std::pair<std::string, std::string>, cl::Program> program_map_;
cl::Program binary_program_;
uint64_t global_memery_cachesize_{0}; uint64_t global_memery_cachesize_{0};
uint64_t global_memery_size_{0}; uint64_t global_memery_size_{0};
uint64_t max_alloc_size_{0}; uint64_t max_alloc_size_{0};
int max_work_group_size_{1}; int max_work_group_size_{1};
uint32_t compute_units_{0}; uint32_t compute_units_{0};
uint32_t max_freq_{0}; uint32_t max_freq_{0};
std::string default_build_opts_{""};
std::string default_build_option_{"-cl-mad-enable -cl-fast-relaxed-math -Werror"};
GpuInfo gpu_info_; GpuInfo gpu_info_;
bool support_fp16_{false}; bool support_fp16_{false};
bool fp16_enable_{false}; bool fp16_enable_{false};
@@ -187,13 +186,17 @@ class OpenCLRuntime {
cl_uint image_pitch_align_{0}; cl_uint image_pitch_align_{0};
std::vector<size_t> max_work_item_sizes_; std::vector<size_t> max_work_item_sizes_;
void *handle_{nullptr}; void *handle_{nullptr};
std::map<std::string, std::vector<unsigned char>> binary_map_;
std::string cache_path_{"/data/local/tmp/opencl_cache"};
const std::string version_{"V0.1"};
bool need_write_{false};
bool enable_cache_{false};
TuningMode tuning_mode_{TuningMode::DEFAULT}; TuningMode tuning_mode_{TuningMode::DEFAULT};
bool profiling_{false}; bool profiling_{false};

// for cache
private:
void LoadCache();
void StoreCache();
bool enable_cache_{false};
bool flush_cache_{false};
std::string cache_path_{"/data/local/tmp/.opencl_cache"};
const std::string cache_version_{"V0.1"};
}; };


class OpenCLRuntimeWrapper { class OpenCLRuntimeWrapper {


+ 17
- 10
mindspore/lite/src/runtime/opencl/opencl_wrapper.cc View File

@@ -74,18 +74,22 @@ bool UnLoadOpenCLLibrary(void *handle) {
return true; return true;
} }


bool LoadLibraryFromPath(const std::string &library_path, void *handle) {
handle = dlopen(library_path.c_str(), RTLD_NOW | RTLD_LOCAL);
if (handle == nullptr) {
bool LoadLibraryFromPath(const std::string &library_path, void **handle_ptr) {
if (handle_ptr == nullptr) {
return false;
}

*handle_ptr = dlopen(library_path.c_str(), RTLD_NOW | RTLD_LOCAL);
if (*handle_ptr == nullptr) {
return false; return false;
} }


// load function ptr use dlopen and dlsym. // load function ptr use dlopen and dlsym.
#define LOAD_OPENCL_FUNCTION_PTR(func_name) \ #define LOAD_OPENCL_FUNCTION_PTR(func_name) \
func_name = reinterpret_cast<func_name##Func>(dlsym(handle, #func_name)); \
func_name = reinterpret_cast<func_name##Func>(dlsym(*handle_ptr, #func_name)); \
if (func_name == nullptr) { \ if (func_name == nullptr) { \
MS_LOG(ERROR) << "load func (" << #func_name << ") from (" << library_path << ") failed!"; \ MS_LOG(ERROR) << "load func (" << #func_name << ") from (" << library_path << ") failed!"; \
UnLoadOpenCLLibrary(handle); \
UnLoadOpenCLLibrary(*handle_ptr); \
return false; \ return false; \
} }


@@ -160,13 +164,16 @@ bool LoadLibraryFromPath(const std::string &library_path, void *handle) {
return true; return true;
} }
// load default library path // load default library path
bool LoadOpenCLLibrary(void *handle) {
if (handle != nullptr) {
bool LoadOpenCLLibrary(void **handle_ptr) {
if (handle_ptr == nullptr) {
return false;
}
if (*handle_ptr != nullptr) {
return true; return true;
} }
auto it = std::find_if(
g_opencl_library_paths.begin(), g_opencl_library_paths.end(),
[&handle](const std::string &lib_path) { return lite::opencl::LoadLibraryFromPath(lib_path, handle); });
auto it =
std::find_if(g_opencl_library_paths.begin(), g_opencl_library_paths.end(),
[&](const std::string &lib_path) { return lite::opencl::LoadLibraryFromPath(lib_path, handle_ptr); });
if (it != g_opencl_library_paths.end()) { if (it != g_opencl_library_paths.end()) {
MS_LOG(DEBUG) << "Find a OpenCL dynamic library : " << *it; MS_LOG(DEBUG) << "Find a OpenCL dynamic library : " << *it;
return true; return true;


+ 1
- 13
mindspore/lite/src/runtime/opencl/opencl_wrapper.h View File

@@ -20,25 +20,13 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <algorithm> #include <algorithm>

// support opencl min version is 1.1
#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 210
#endif
#ifndef CL_HPP_TARGET_OPENCL_VERSION
#define CL_HPP_TARGET_OPENCL_VERSION 210
#endif
#ifndef CL_HPP_MINIMUM_OPENCL_VERSION
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#endif

#include "CL/cl2.hpp" #include "CL/cl2.hpp"


#ifdef USE_OPENCL_WRAPPER #ifdef USE_OPENCL_WRAPPER


namespace mindspore::lite::opencl { namespace mindspore::lite::opencl {
// This is a opencl function wrapper. // This is a opencl function wrapper.
bool LoadOpenCLLibrary(void *handle);
bool LoadOpenCLLibrary(void **handle_ptr);
bool UnLoadOpenCLLibrary(void *handle); bool UnLoadOpenCLLibrary(void *handle);


// get platfrom id // get platfrom id


Loading…
Cancel
Save