Browse Source

fix quant abort and pow off bugs and support java call opencl

tags/v1.1.0
wandongdong 5 years ago
parent
commit
8ffe5a67b8
8 changed files with 67 additions and 46 deletions
  1. +5
    -3
      build.sh
  2. +2
    -2
      cmake/external_libs/opencl.cmake
  3. +5
    -2
      mindspore/lite/java/java/app/src/main/native/runtime/ms_config.cpp
  4. +43
    -35
      mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
  5. +5
    -1
      mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
  6. +4
    -3
      mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
  7. +1
    -0
      mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
  8. +2
    -0
      mindspore/lite/src/runtime/opencl/opencl_allocator.cc

+ 5
- 3
build.sh View File

@@ -508,7 +508,8 @@ build_lite()
LITE_ENABLE_NPU="on" LITE_ENABLE_NPU="on"
fi fi


if [ "${ENABLE_GPU}" == "on" ] && [ "${LITE_PLATFORM}" == "arm64" ] || [ $1 == "arm64" ]; then
if [[ "${LITE_ENABLE_GPU}" == "on" || $1 == "arm64" ]]; then
LITE_ENABLE_GPU="on"
echo "start get opencl" echo "start get opencl"
fi fi
if [ "${LITE_ENABLE_NPU}" == "on" ]; then if [ "${LITE_ENABLE_NPU}" == "on" ]; then
@@ -545,7 +546,7 @@ build_lite()
-DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DPLATFORM_ARM32=on -DENABLE_NEON=on -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \ -DPLATFORM_ARM32=on -DENABLE_NEON=on -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \
-DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \ -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} -DENABLE_V0=on \
-DSUPPORT_GPU=${LITE_ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} -DENABLE_V0=on \
-DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \ -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
-DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DMS_VERSION_MAJOR=${VERSION_MAJOR} \ -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DMS_VERSION_MAJOR=${VERSION_MAJOR} \
-DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \ -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
@@ -553,7 +554,7 @@ build_lite()
else else
cmake -DPLATFORM_ARM64=off -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \ cmake -DPLATFORM_ARM64=off -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \
-DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \ -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_GPU=${LITE_ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
-DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} -DENABLE_V0=on \ -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} -DENABLE_V0=on \
-DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \ -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \ -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
@@ -647,6 +648,7 @@ build_jni_arm32() {


build_java() { build_java() {
JAVA_PATH=${BASEPATH}/mindspore/lite/java JAVA_PATH=${BASEPATH}/mindspore/lite/java
LITE_ENABLE_GPU="on"
get_version get_version
build_lite_java_arm64 build_lite_java_arm64
build_lite_java_arm32 build_lite_java_arm32


+ 2
- 2
cmake/external_libs/opencl.cmake View File

@@ -1,10 +1,10 @@
if (ENABLE_GITEE) if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/OpenCL-Headers/repository/archive/v2020.06.16.tar.gz") set(REQ_URL "https://gitee.com/mirrors/OpenCL-Headers/repository/archive/v2020.06.16.tar.gz")
set(MD5 "fc7627b5a8a95ecbe3d5df43bc88aa44")
set(MD5 "8797a525aff953ea536ebe338a9f5ef6")
set(PKG_GIT_TAG "") set(PKG_GIT_TAG "")
__download_pkg_with_git(OpenCL-Headers ${REQ_URL} ${PKG_GIT_TAG} ${MD5}) __download_pkg_with_git(OpenCL-Headers ${REQ_URL} ${PKG_GIT_TAG} ${MD5})
set(REQ_URL "https://gitee.com/mirrors/OpenCL-CLHPP/repository/archive/v2.0.12.tar.gz") set(REQ_URL "https://gitee.com/mirrors/OpenCL-CLHPP/repository/archive/v2.0.12.tar.gz")
set(MD5 "bd00fca8f861b3b65660d719f00a58dd")
set(MD5 "a07b45d676b02644482bc2c3bb90b891")
set(PKG_GIT_TAG "") set(PKG_GIT_TAG "")
__download_pkg_with_git(OpenCL-CLHPP ${REQ_URL} ${PKG_GIT_TAG} ${MD5}) __download_pkg_with_git(OpenCL-CLHPP ${REQ_URL} ${PKG_GIT_TAG} ${MD5})
else() else()


+ 5
- 2
mindspore/lite/java/java/app/src/main/native/runtime/ms_config.cpp View File

@@ -32,9 +32,12 @@ extern "C" JNIEXPORT jlong JNICALL Java_com_mindspore_lite_config_MSConfig_creat
context->device_list_[0].device_type_ = mindspore::lite::DT_CPU; context->device_list_[0].device_type_ = mindspore::lite::DT_CPU;
break; break;
case 1: // DT_GPU case 1: // DT_GPU
MS_LOGE("We only support CPU now.");
return (jlong)context;
{
mindspore::lite::DeviceContext gpu_device_ctx{mindspore::lite::DT_GPU, {false}};
gpu_device_ctx.device_info_.gpu_device_info_.enable_float16_ = enable_float16;
context->device_list_.push_back(gpu_device_ctx);
break; break;
}
case 2: // DT_NPU case 2: // DT_NPU
MS_LOGE("We only support CPU now."); MS_LOGE("We only support CPU now.");
return (jlong)context; return (jlong)context;


+ 43
- 35
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc View File

@@ -51,6 +51,14 @@ int DepthwiseConv2dOpenCLKernel::CheckSpecs() {
MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[0]->data_type(); MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[0]->data_type();
return RET_ERROR; return RET_ERROR;
} }
if (!in_tensors_.at(kWeightIndex)->IsConst()) {
MS_LOG(ERROR) << "DepthwiseConv2d don't support non-constant weight yet.";
return RET_ERROR;
}
if (in_tensors_.size() == 3 && !in_tensors_.at(kBiasIndex)->IsConst()) {
MS_LOG(ERROR) << "DepthwiseConv2d don't support non-constant bias yet.";
return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
int DepthwiseConv2dOpenCLKernel::Prepare() { int DepthwiseConv2dOpenCLKernel::Prepare() {
@@ -62,13 +70,10 @@ int DepthwiseConv2dOpenCLKernel::Prepare() {
} }
kernel_name += "_NHWC4"; kernel_name += "_NHWC4";
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
if (parameter->kernel_h_ == 1) {
if (parameter->kernel_h_ == 1 && parameter->kernel_w_ == 1) {
kernel_name += "_1x1"; kernel_name += "_1x1";
} }
kernel_name += "_b";
for (auto iv : block_size_) {
kernel_name += std::to_string(iv);
}
kernel_name += "_b" + std::to_string(block_size_.H) + std::to_string(block_size_.W) + std::to_string(block_size_.C);
#ifdef PROGRAM_WITH_IL #ifdef PROGRAM_WITH_IL
kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
#else #else
@@ -100,9 +105,10 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
auto allocator = ocl_runtime_->GetAllocator(); auto allocator = ocl_runtime_->GetAllocator();
bool is_fp16 = ocl_runtime_->GetFp16Enable(); bool is_fp16 = ocl_runtime_->GetFp16Enable();


auto out_info = GpuTensorInfo(out_tensors_[0]);
// weight: o, h, w, i; o == group, i == 1 // weight: o, h, w, i; o == group, i == 1
void *origin_weight = in_tensors_.at(kWeightIndex)->data_c(); void *origin_weight = in_tensors_.at(kWeightIndex)->data_c();
int CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
int CO4 = UP_DIV(out_info.C, C4NUM * block_size_.C);
int pack_weight_size = C4NUM * CO4 * parameter->kernel_h_ * parameter->kernel_w_; int pack_weight_size = C4NUM * CO4 * parameter->kernel_h_ * parameter->kernel_w_;


int plane = parameter->kernel_h_ * parameter->kernel_w_; int plane = parameter->kernel_h_ * parameter->kernel_w_;
@@ -111,13 +117,13 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true); packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true);
if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) { if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) {
std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; }; std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane, out_tensors_[0]->Channel(), to_dtype);
PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane, out_info.C, to_dtype);
} else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) { } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) {
std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); };
PackNCHWToNC4HW4<float, float16_t>(origin_weight, packed_weight_, 1, plane, out_tensors_[0]->Channel(), to_dtype);
PackNCHWToNC4HW4<float, float16_t>(origin_weight, packed_weight_, 1, plane, out_info.C, to_dtype);
} else { // int8 or int16 } else { // int8 or int16
std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; }; std::function<int16_t(int16_t)> to_dtype = [](int16_t x) -> int16_t { return x; };
PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane, out_tensors_[0]->Channel(), to_dtype);
PackNCHWToNC4HW4<int16_t, int16_t>(origin_weight, packed_weight_, 1, plane, out_info.C, to_dtype);
FreeDequantedWeight(); FreeDequantedWeight();
} }
} else { } else {
@@ -125,51 +131,53 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true); packed_weight_ = allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true);
if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) { if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat32) {
std::function<float(float)> to_dtype = [](float x) -> float { return x; }; std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane, out_tensors_[0]->Channel(), to_dtype);
PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane, out_info.C, to_dtype);
} else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) { } else if (in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16) {
std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); }; std::function<float(float16_t)> to_dtype = [](float16_t x) -> float { return static_cast<float>(x); };
PackNCHWToNC4HW4<float16_t, float>(origin_weight, packed_weight_, 1, plane, out_tensors_[0]->Channel(), to_dtype);
PackNCHWToNC4HW4<float16_t, float>(origin_weight, packed_weight_, 1, plane, out_info.C, to_dtype);
} else { // int8 or int16 } else { // int8 or int16
std::function<float(float)> to_dtype = [](float x) -> float { return x; }; std::function<float(float)> to_dtype = [](float x) -> float { return x; };
PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane, out_tensors_[0]->Channel(), to_dtype);
PackNCHWToNC4HW4<float, float>(origin_weight, packed_weight_, 1, plane, out_info.C, to_dtype);
FreeDequantedWeight(); FreeDequantedWeight();
} }
} }

allocator->UnmapBuffer(packed_weight_); allocator->UnmapBuffer(packed_weight_);


size_t dtype_size = sizeof(float);
if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
dtype_size = sizeof(int16_t);
}
bias_data_ = allocator->Malloc(C4NUM * CO4 * dtype_size);
bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true);
size_t up_co_size = C4NUM * CO4 * dtype_size;
memset(bias_data_, 0, up_co_size);
if (in_tensors_.size() == kInputSize2) { if (in_tensors_.size() == kInputSize2) {
if (!in_tensors_.at(2)->IsConst()) {
MS_LOG(ERROR) << "DepthwiseConv2d don't support non-constant bias yet.";
return RET_ERROR;
}
size_t dtype_size = sizeof(float);
if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
dtype_size = sizeof(int16_t);
}
bias_data_ = allocator->Malloc(C4NUM * CO4 * dtype_size);
bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true);
size_t up_co_size = C4NUM * CO4 * dtype_size;
memset(bias_data_, 0, up_co_size);
auto ori_bias = in_tensors_.at(kBiasIndex)->data_c(); auto ori_bias = in_tensors_.at(kBiasIndex)->data_c();
if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) { if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) {
float16_t *bias_ptr = static_cast<float16_t *>(bias_data_); float16_t *bias_ptr = static_cast<float16_t *>(bias_data_);
for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) { for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
bias_ptr[i] = static_cast<float16_t>(static_cast<float *>(ori_bias)[i]); bias_ptr[i] = static_cast<float16_t>(static_cast<float *>(ori_bias)[i]);
} }
} else if (!is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat16) {
float32_t *bias_ptr = static_cast<float32_t *>(bias_data_);
for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) {
bias_ptr[i] = static_cast<float32_t>(static_cast<float16_t *>(ori_bias)[i]);
}
} else { } else {
memcpy(bias_data_, ori_bias, out_tensors_[0]->Channel() * dtype_size);
memcpy(bias_data_, ori_bias, out_info.C * dtype_size);
} }
allocator->UnmapBuffer(bias_data_);
} else { } else {
MS_ASSERT(in_tensors_.size() == kInputSize1); MS_ASSERT(in_tensors_.size() == kInputSize1);
} }
allocator->UnmapBuffer(bias_data_);
return mindspore::lite::RET_OK; return mindspore::lite::RET_OK;
} }
void DepthwiseConv2dOpenCLKernel::SetConstArgs() { void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_); auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
auto in_info = GpuTensorInfo(in_tensors_[0]);
auto out_info = GpuTensorInfo(out_tensors_[0]);
size_t CO4 = UP_DIV(out_info.C, C4NUM);
size_t CI4 = UP_DIV(in_info.C, C4NUM);


std::map<ActType, std::pair<float, float>> relu_clips{ std::map<ActType, std::pair<float, float>> relu_clips{
{ActType_No, {-FLT_MAX, FLT_MAX}}, {ActType_Relu, {0.0, FLT_MAX}}, {ActType_Relu6, {0, 6.0}}}; {ActType_No, {-FLT_MAX, FLT_MAX}}, {ActType_Relu, {0.0, FLT_MAX}}, {ActType_Relu6, {0, 6.0}}};
@@ -177,9 +185,8 @@ void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
cl_int2 stride = {parameter->stride_h_, parameter->stride_w_}; cl_int2 stride = {parameter->stride_h_, parameter->stride_w_};
cl_int2 padding = {-parameter->pad_u_, -parameter->pad_l_}; cl_int2 padding = {-parameter->pad_u_, -parameter->pad_l_};
cl_int2 dilation = {parameter->dilation_h_, parameter->dilation_w_}; cl_int2 dilation = {parameter->dilation_h_, parameter->dilation_w_};
cl_int4 src_size = {in_tensors_[0]->Width(), in_tensors_[0]->Height(), (cl_int)CI4, in_tensors_[0]->Batch()};
cl_int4 dst_size = {(cl_int)out_tensors_[0]->Width(), (cl_int)out_tensors_[0]->Height(), (cl_int)CO4,
(cl_int)out_tensors_[0]->Batch()};
cl_int4 src_size = {(cl_int)in_info.W, (cl_int)in_info.H, (cl_int)CI4, (cl_int)in_info.N};
cl_int4 dst_size = {(cl_int)out_info.W, (cl_int)out_info.H, (cl_int)CO4, (cl_int)out_info.N};


int arg_cnt = 2; int arg_cnt = 2;
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF);
@@ -194,10 +201,11 @@ void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second);
} }
void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() { void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() {
auto out_info = GpuTensorInfo(out_tensors_[0]);
// set global // set global
size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM * block_size_[2]);
global_size_ = {CO4, (size_t)UP_DIV(out_tensors_[0]->Width(), block_size_[1]),
(size_t)UP_DIV(out_tensors_[0]->Height() * out_tensors_[0]->Batch(), block_size_[0])};
size_t CO4 = UP_DIV(out_info.C, C4NUM * block_size_.C);
global_size_ = {CO4, (size_t)UP_DIV(out_info.W, block_size_.W),
(size_t)UP_DIV(out_info.H * out_info.N, block_size_.H)};
// set local // set local
const int max_group_size = ocl_runtime_->DeviceMaxWorkGroupSize(); const int max_group_size = ocl_runtime_->DeviceMaxWorkGroupSize();
int z = global_size_[0]; int z = global_size_[0];


+ 5
- 1
mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h View File

@@ -42,7 +42,11 @@ class DepthwiseConv2dOpenCLKernel : public OpenCLKernel {
private: private:
void *packed_weight_{nullptr}; void *packed_weight_{nullptr};
void *bias_data_{nullptr}; void *bias_data_{nullptr};
std::vector<int> block_size_{2, 2, 1};
struct {
int H{2};
int W{2};
int C{1};
} block_size_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel




+ 4
- 3
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc View File

@@ -203,9 +203,9 @@ std::set<size_t> OpenCLKernel::GenerateLocalByGlobal(size_t global_i) {
int OpenCLKernel::DequantWeight() { int OpenCLKernel::DequantWeight() {
bool is_fp16 = ocl_runtime_->GetFp16Enable(); bool is_fp16 = ocl_runtime_->GetFp16Enable();
auto *weight_tensor = in_tensors_.at(kWeightIndex); auto *weight_tensor = in_tensors_.at(kWeightIndex);
auto *restore_data = weight_tensor->data_c();
dequant_flag_ =
!weight_tensor->quant_params().empty() && weight_tensor->quant_params().front().inited && restore_data != nullptr;
restore_quant_data_ = weight_tensor->data_c();
dequant_flag_ = !weight_tensor->quant_params().empty() && weight_tensor->quant_params().front().inited &&
restore_quant_data_ != nullptr;
if (dequant_flag_) { if (dequant_flag_) {
void *dequant_weight{nullptr}; void *dequant_weight{nullptr};
bool set_flag{true}; bool set_flag{true};
@@ -242,6 +242,7 @@ void OpenCLKernel::FreeDequantedWeight() {
auto *weight_tensor = in_tensors_.at(kWeightIndex); auto *weight_tensor = in_tensors_.at(kWeightIndex);
if (dequant_flag_) { if (dequant_flag_) {
free(weight_tensor->data_c()); free(weight_tensor->data_c());
weight_tensor->set_data(restore_quant_data_);
} }
} }
} // namespace mindspore::kernel } // namespace mindspore::kernel

+ 1
- 0
mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h View File

@@ -209,6 +209,7 @@ class OpenCLKernel : public LiteKernel {
std::vector<size_t> local_size_; std::vector<size_t> local_size_;
cl::Kernel kernel_; cl::Kernel kernel_;
cl::Event event_; cl::Event event_;
void *restore_quant_data_{nullptr};
bool dequant_flag_{false}; bool dequant_flag_{false};


private: private:


+ 2
- 0
mindspore/lite/src/runtime/opencl/opencl_allocator.cc View File

@@ -150,7 +150,9 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size,
total_size_ += size; total_size_ += size;
const uint64_t max_size = ocl_runtime_->GetGlobalMemSize(); const uint64_t max_size = ocl_runtime_->GetGlobalMemSize();
if (total_size_ >= max_size) { if (total_size_ >= max_size) {
UnLock();
MS_LOG(ERROR) << "Mem pool out of max_size, total size: " << total_size_ << ", max size: " << max_size; MS_LOG(ERROR) << "Mem pool out of max_size, total size: " << total_size_ << ", max size: " << max_size;
return nullptr;
} }
cl::Buffer *buffer = nullptr; cl::Buffer *buffer = nullptr;
cl::Image2D *image = nullptr; cl::Image2D *image = nullptr;


Loading…
Cancel
Save