Browse Source

!6764 [MS][LITE][GPU]fix bug: softmax precision, model random zero

Merge pull request !6764 from chenzupeng/master-lite-2
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
12828560de
5 changed files with 106 additions and 100 deletions
  1. +98
    -96
      mindspore/lite/src/runtime/kernel/opencl/cl/softmax.cl
  2. +1
    -1
      mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
  3. +5
    -2
      mindspore/lite/src/runtime/opencl/opencl_allocator.cc
  4. +1
    -1
      mindspore/lite/src/runtime/opencl/opencl_allocator.h
  5. +1
    -0
      mindspore/lite/src/tensor.h

+ 98
- 96
mindspore/lite/src/runtime/kernel/opencl/cl/softmax.cl View File

@@ -14,9 +14,9 @@ __kernel void SoftMax_NHWC4_BUF(__read_only image2d_t input, __global FLT4 *outp


if (X >= H || Y >= W) return; if (X >= H || Y >= W) return;


FLT sum = 0.0f;
float sum = 0.0f;
for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X)));
sum += exp(t.x); sum += exp(t.x);
if (d * 4 + 1 < C) sum += exp(t.y); if (d * 4 + 1 < C) sum += exp(t.y);
if (d * 4 + 2 < C) sum += exp(t.z); if (d * 4 + 2 < C) sum += exp(t.z);
@@ -24,14 +24,15 @@ __kernel void SoftMax_NHWC4_BUF(__read_only image2d_t input, __global FLT4 *outp
} }


for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X)));
t = divide_no_check(exp(t), sum); t = divide_no_check(exp(t), sum);
__global FLT *output_flt = (__global FLT *)output; __global FLT *output_flt = (__global FLT *)output;
output_flt += (X * W + Y) * C + 4 * d; output_flt += (X * W + Y) * C + 4 * d;
output_flt[0] = t.x;
if (d * 4 + 1 < C) output_flt[1] += t.y;
if (d * 4 + 2 < C) output_flt[2] += t.z;
if (d * 4 + 3 < C) output_flt[3] += t.w;
FLT4 result = TO_FLT4(t);
output_flt[0] = result.x;
if (d * 4 + 1 < C) output_flt[1] += result.y;
if (d * 4 + 2 < C) output_flt[2] += result.z;
if (d * 4 + 3 < C) output_flt[3] += result.w;
} }
} }


@@ -45,9 +46,9 @@ __kernel void SoftMax_NHWC4_IMG(__read_only image2d_t input, __write_only image2


if (X >= H || Y >= W) return; if (X >= H || Y >= W) return;


FLT sum = 0.0f;
float sum = 0.0f;
for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X)));
sum += exp(t.x); sum += exp(t.x);
if (d * 4 + 1 < C) sum += exp(t.y); if (d * 4 + 1 < C) sum += exp(t.y);
if (d * 4 + 2 < C) sum += exp(t.z); if (d * 4 + 2 < C) sum += exp(t.z);
@@ -55,9 +56,9 @@ __kernel void SoftMax_NHWC4_IMG(__read_only image2d_t input, __write_only image2
} }


for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y * S + d, X)));
t = exp(t) / sum; t = exp(t) / sum;
WRITE_IMAGE(output, (int2)(Y * S + d, X), t);
WRITE_IMAGE(output, (int2)(Y * S + d, X), TO_FLT4(t));
} }
} }


@@ -71,9 +72,9 @@ __kernel void SoftMax_NC4HW4_BUF(__read_only image2d_t input, __global FLT4 *out


if (X >= H || Y >= W) return; if (X >= H || Y >= W) return;


FLT sum = 0.0f;
float sum = 0.0f;
for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X)));
sum += exp(t.x); sum += exp(t.x);
if (d * 4 + 1 < C) sum += exp(t.y); if (d * 4 + 1 < C) sum += exp(t.y);
if (d * 4 + 2 < C) sum += exp(t.z); if (d * 4 + 2 < C) sum += exp(t.z);
@@ -81,14 +82,15 @@ __kernel void SoftMax_NC4HW4_BUF(__read_only image2d_t input, __global FLT4 *out
} }


for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X)));
t = divide_no_check(exp(t), sum); t = divide_no_check(exp(t), sum);
__global FLT *output_flt = (__global FLT *)output; __global FLT *output_flt = (__global FLT *)output;
output_flt += (X * W + Y) * C + 4 * d; output_flt += (X * W + Y) * C + 4 * d;
output_flt[0] = t.x;
if (d * 4 + 1 < C) output_flt[1] += t.y;
if (d * 4 + 2 < C) output_flt[2] += t.z;
if (d * 4 + 3 < C) output_flt[3] += t.w;
FLT4 result = TO_FLT4(t);
output_flt[0] = result.x;
if (d * 4 + 1 < C) output_flt[1] += result.y;
if (d * 4 + 2 < C) output_flt[2] += result.z;
if (d * 4 + 3 < C) output_flt[3] += result.w;
} }
} }


@@ -102,9 +104,9 @@ __kernel void SoftMax_NC4HW4_IMG(__read_only image2d_t input, __write_only image


if (X >= H || Y >= W) return; if (X >= H || Y >= W) return;


FLT sum = 0.0f;
float sum = 0.0f;
for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X)));
sum += exp(t.x); sum += exp(t.x);
if (d * 4 + 1 < C) sum += exp(t.y); if (d * 4 + 1 < C) sum += exp(t.y);
if (d * 4 + 2 < C) sum += exp(t.z); if (d * 4 + 2 < C) sum += exp(t.z);
@@ -112,51 +114,51 @@ __kernel void SoftMax_NC4HW4_IMG(__read_only image2d_t input, __write_only image
} }


for (int d = 0; d < S; ++d) { for (int d = 0; d < S; ++d) {
FLT4 t = READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X));
float4 t = convert_float4(READ_IMAGE(input, smp_zero, (int2)(Y, d * H + X)));
t = exp(t) / sum; t = exp(t) / sum;
WRITE_IMAGE(output, (int2)(Y, d * H + X), t);
WRITE_IMAGE(output, (int2)(Y, d * H + X), TO_FLT4(t));
} }
} }


__kernel void SoftMax1x1_NHWC4_BUF(__read_only image2d_t input, __global FLT4 *output, const float4 mask, __kernel void SoftMax1x1_NHWC4_BUF(__read_only image2d_t input, __global FLT4 *output, const float4 mask,
const int slices, const int slices_x32) { const int slices, const int slices_x32) {
int tid = get_local_id(0); int tid = get_local_id(0);
FLT sum = 0.0f;
float sum = 0.0f;
for (size_t i = tid; i < slices - 1; i += 32) { for (size_t i = tid; i < slices - 1; i += 32) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(i, 0));
sum += dot((FLT4)(1.0f), exp(src));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(i, 0)));
sum += dot((float4)(1.0f), exp(src));
} }
if ((slices - 1) % 32 == tid) { if ((slices - 1) % 32 == tid) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(slices - 1, 0));

sum += dot(TO_FLT4(mask), exp(src));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(slices - 1, 0)));
sum += dot(convert_float4(mask), exp(src));
} }


__local FLT4 tmp[8];
__local FLT *tmpx1 = (__local FLT *)tmp;
__local float4 tmp[8];
__local float *tmpx1 = (__local float *)tmp;
tmpx1[tid] = sum; tmpx1[tid] = sum;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0) { if (tid == 0) {
sum = dot((FLT4)(1.0f), tmp[0]);
sum += dot((FLT4)(1.0f), tmp[1]);
sum += dot((FLT4)(1.0f), tmp[2]);
sum += dot((FLT4)(1.0f), tmp[3]);
sum += dot((FLT4)(1.0f), tmp[4]);
sum += dot((FLT4)(1.0f), tmp[5]);
sum += dot((FLT4)(1.0f), tmp[6]);
sum += dot((FLT4)(1.0f), tmp[7]);
sum = dot((float4)(1.0f), tmp[0]);
sum += dot((float4)(1.0f), tmp[1]);
sum += dot((float4)(1.0f), tmp[2]);
sum += dot((float4)(1.0f), tmp[3]);
sum += dot((float4)(1.0f), tmp[4]);
sum += dot((float4)(1.0f), tmp[5]);
sum += dot((float4)(1.0f), tmp[6]);
sum += dot((float4)(1.0f), tmp[7]);
tmpx1[0] = divide_no_check(1.0f, sum); tmpx1[0] = divide_no_check(1.0f, sum);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
sum = tmpx1[0]; sum = tmpx1[0];
for (size_t i = tid; i < slices - 1; i += 32) { for (size_t i = tid; i < slices - 1; i += 32) {
FLT4 result = READ_IMAGE(input, smp_zero, (int2)(i, 0));
float4 result = convert_float4(READ_IMAGE(input, smp_zero, (int2)(i, 0)));
result = exp(result) * sum; result = exp(result) * sum;
output[i] = result;
output[i] = TO_FLT4(result);
} }
if ((slices - 1) % 32 == tid) { if ((slices - 1) % 32 == tid) {
FLT4 result = READ_IMAGE(input, smp_zero, (int2)(slices - 1, 0));
result = exp(result) * sum;
float4 result_float = convert_float4(READ_IMAGE(input, smp_zero, (int2)(slices - 1, 0)));
result_float = exp(result_float) * sum;
FLT4 result = TO_FLT4(result_float);
__global FLT4 *remain_ptr4 = output; __global FLT4 *remain_ptr4 = output;
remain_ptr4 += slices - 1; remain_ptr4 += slices - 1;
__global FLT *remain_ptr = (__global FLT *)remain_ptr4; __global FLT *remain_ptr = (__global FLT *)remain_ptr4;
@@ -176,80 +178,81 @@ __kernel void SoftMax1x1_NHWC4_BUF(__read_only image2d_t input, __global FLT4 *o
__kernel void SoftMax1x1_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const float4 mask, __kernel void SoftMax1x1_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const float4 mask,
const int slices, const int slices_x32) { const int slices, const int slices_x32) {
int tid = get_local_id(0); int tid = get_local_id(0);
FLT sum = 0.0f;
float sum = 0.0f;
for (size_t i = tid; i < slices - 1; i += 32) { for (size_t i = tid; i < slices - 1; i += 32) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(i, 0));
sum += dot((FLT4)(1.0f), exp(src));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(i, 0)));
sum += dot((float4)(1.0f), exp(src));
} }
if ((slices - 1) % 32 == tid) { if ((slices - 1) % 32 == tid) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(slices - 1, 0));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(slices - 1, 0)));


sum += dot(TO_FLT4(mask), exp(src));
sum += dot(convert_float4(mask), exp(src));
} }


__local FLT4 tmp[8];
__local FLT *tmpx1 = (__local FLT *)tmp;
__local float4 tmp[8];
__local float *tmpx1 = (__local float *)tmp;
tmpx1[tid] = sum; tmpx1[tid] = sum;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0) { if (tid == 0) {
sum = dot((FLT4)(1.0f), tmp[0]);
sum += dot((FLT4)(1.0f), tmp[1]);
sum += dot((FLT4)(1.0f), tmp[2]);
sum += dot((FLT4)(1.0f), tmp[3]);
sum += dot((FLT4)(1.0f), tmp[4]);
sum += dot((FLT4)(1.0f), tmp[5]);
sum += dot((FLT4)(1.0f), tmp[6]);
sum += dot((FLT4)(1.0f), tmp[7]);
sum = dot((float4)(1.0f), tmp[0]);
sum += dot((float4)(1.0f), tmp[1]);
sum += dot((float4)(1.0f), tmp[2]);
sum += dot((float4)(1.0f), tmp[3]);
sum += dot((float4)(1.0f), tmp[4]);
sum += dot((float4)(1.0f), tmp[5]);
sum += dot((float4)(1.0f), tmp[6]);
sum += dot((float4)(1.0f), tmp[7]);
tmpx1[0] = divide_no_check(1.0f, sum); tmpx1[0] = divide_no_check(1.0f, sum);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
sum = tmpx1[0]; sum = tmpx1[0];
for (size_t i = tid; i < slices; i += 32) { for (size_t i = tid; i < slices; i += 32) {
FLT4 result = READ_IMAGE(input, smp_zero, (int2)(i, 0));
float4 result = convert_float4(READ_IMAGE(input, smp_zero, (int2)(i, 0)));
result = exp(result) * sum; result = exp(result) * sum;
WRITE_IMAGE(output, (int2)(i, 0), result);
WRITE_IMAGE(output, (int2)(i, 0), TO_FLT4(result));
} }
} }


__kernel void SoftMax1x1_NC4HW4_BUF(__read_only image2d_t input, __global FLT4 *output, const float4 mask, __kernel void SoftMax1x1_NC4HW4_BUF(__read_only image2d_t input, __global FLT4 *output, const float4 mask,
const int slices, const int slices_x32) { const int slices, const int slices_x32) {
int tid = get_local_id(0); int tid = get_local_id(0);
FLT sum = 0.0f;
float sum = 0.0f;
for (size_t i = tid; i < slices - 1; i += 32) { for (size_t i = tid; i < slices - 1; i += 32) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(0, i));
sum += dot((FLT4)(1.0f), exp(src));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(0, i)));
sum += dot((float4)(1.0f), exp(src));
} }
if ((slices - 1) % 32 == tid) { if ((slices - 1) % 32 == tid) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(0, slices - 1));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(0, slices - 1)));


sum += dot(TO_FLT4(mask), exp(src));
sum += dot(convert_float4(mask), exp(src));
} }


__local FLT4 tmp[8];
__local FLT *tmpx1 = (__local FLT *)tmp;
__local float4 tmp[8];
__local float *tmpx1 = (__local float *)tmp;
tmpx1[tid] = sum; tmpx1[tid] = sum;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0) { if (tid == 0) {
sum = dot((FLT4)(1.0f), tmp[0]);
sum += dot((FLT4)(1.0f), tmp[1]);
sum += dot((FLT4)(1.0f), tmp[2]);
sum += dot((FLT4)(1.0f), tmp[3]);
sum += dot((FLT4)(1.0f), tmp[4]);
sum += dot((FLT4)(1.0f), tmp[5]);
sum += dot((FLT4)(1.0f), tmp[6]);
sum += dot((FLT4)(1.0f), tmp[7]);
sum = dot((float4)(1.0f), tmp[0]);
sum += dot((float4)(1.0f), tmp[1]);
sum += dot((float4)(1.0f), tmp[2]);
sum += dot((float4)(1.0f), tmp[3]);
sum += dot((float4)(1.0f), tmp[4]);
sum += dot((float4)(1.0f), tmp[5]);
sum += dot((float4)(1.0f), tmp[6]);
sum += dot((float4)(1.0f), tmp[7]);
tmpx1[0] = divide_no_check(1.0f, sum); tmpx1[0] = divide_no_check(1.0f, sum);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
sum = tmpx1[0]; sum = tmpx1[0];
for (size_t i = tid; i < slices - 1; i += 32) { for (size_t i = tid; i < slices - 1; i += 32) {
FLT4 result = READ_IMAGE(input, smp_zero, (int2)(0, i));
float4 result = convert_float4(READ_IMAGE(input, smp_zero, (int2)(0, i)));
result = exp(result) * sum; result = exp(result) * sum;
output[i] = result;
output[i] = TO_FLT4(result);
} }
if ((slices - 1) % 32 == tid) { if ((slices - 1) % 32 == tid) {
FLT4 result = READ_IMAGE(input, smp_zero, (int2)(0, slices - 1));
result = exp(result) * sum;
float4 result_float = convert_float4(READ_IMAGE(input, smp_zero, (int2)(0, slices - 1)));
result_float = exp(result_float) * sum;
FLT4 result = TO_FLT4(result_float);
__global FLT4 *remain_ptr4 = output; __global FLT4 *remain_ptr4 = output;
remain_ptr4 += slices - 1; remain_ptr4 += slices - 1;
__global FLT *remain_ptr = (__global FLT *)remain_ptr4; __global FLT *remain_ptr = (__global FLT *)remain_ptr4;
@@ -269,37 +272,36 @@ __kernel void SoftMax1x1_NC4HW4_BUF(__read_only image2d_t input, __global FLT4 *
__kernel void SoftMax1x1_NC4HW4_IMG(__read_only image2d_t input, __write_only image2d_t output, const float4 mask, __kernel void SoftMax1x1_NC4HW4_IMG(__read_only image2d_t input, __write_only image2d_t output, const float4 mask,
const int slices, const int slices_x32) { const int slices, const int slices_x32) {
int tid = get_local_id(0); int tid = get_local_id(0);
FLT sum = 0.0f;
float sum = 0.0f;
for (size_t i = tid; i < slices - 1; i += 32) { for (size_t i = tid; i < slices - 1; i += 32) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(0, i));
sum += dot((FLT4)(1.0f), exp(src));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(0, i)));
sum += dot((float4)(1.0f), exp(src));
} }
if ((slices - 1) % 32 == tid) { if ((slices - 1) % 32 == tid) {
FLT4 src = READ_IMAGE(input, smp_zero, (int2)(0, slices - 1));

sum += dot(TO_FLT4(mask), exp(src));
float4 src = convert_float4(READ_IMAGE(input, smp_zero, (int2)(0, slices - 1)));
sum += dot(convert_float4(mask), exp(src));
} }


__local FLT4 tmp[8];
__local FLT *tmpx1 = (__local FLT *)tmp;
__local float4 tmp[8];
__local float *tmpx1 = (__local float *)tmp;
tmpx1[tid] = sum; tmpx1[tid] = sum;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0) { if (tid == 0) {
sum = dot((FLT4)(1.0f), tmp[0]);
sum += dot((FLT4)(1.0f), tmp[1]);
sum += dot((FLT4)(1.0f), tmp[2]);
sum += dot((FLT4)(1.0f), tmp[3]);
sum += dot((FLT4)(1.0f), tmp[4]);
sum += dot((FLT4)(1.0f), tmp[5]);
sum += dot((FLT4)(1.0f), tmp[6]);
sum += dot((FLT4)(1.0f), tmp[7]);
sum = dot((float4)(1.0f), tmp[0]);
sum += dot((float4)(1.0f), tmp[1]);
sum += dot((float4)(1.0f), tmp[2]);
sum += dot((float4)(1.0f), tmp[3]);
sum += dot((float4)(1.0f), tmp[4]);
sum += dot((float4)(1.0f), tmp[5]);
sum += dot((float4)(1.0f), tmp[6]);
sum += dot((float4)(1.0f), tmp[7]);
tmpx1[0] = divide_no_check(1.0f, sum); tmpx1[0] = divide_no_check(1.0f, sum);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
sum = tmpx1[0]; sum = tmpx1[0];
for (size_t i = tid; i < slices; i += 32) { for (size_t i = tid; i < slices; i += 32) {
FLT4 result = READ_IMAGE(input, smp_zero, (int2)(0, i));
float4 result = convert_float4(READ_IMAGE(input, smp_zero, (int2)(0, i)));
result = exp(result) * sum; result = exp(result) * sum;
WRITE_IMAGE(output, (int2)(0, i), result);
WRITE_IMAGE(output, (int2)(0, i), TO_FLT4(result));
} }
} }

+ 1
- 1
mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc View File

@@ -129,7 +129,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te
MS_LOG(ERROR) << "SubGraphOpenCLKernel create op failed!"; MS_LOG(ERROR) << "SubGraphOpenCLKernel create op failed!";
delete new_tensor; delete new_tensor;
new_tensor = nullptr; new_tensor = nullptr;
delete parameter;
free(parameter);
parameter = nullptr; parameter = nullptr;
return RET_ERROR; return RET_ERROR;
} }


+ 5
- 2
mindspore/lite/src/runtime/opencl/opencl_allocator.cc View File

@@ -111,8 +111,8 @@ void *OpenCLAllocator::Malloc(size_t size, const std::vector<size_t> &img_size)
ocl_runtime_->UnmapBuffer(*mem, host_ptr); ocl_runtime_->UnmapBuffer(*mem, host_ptr);
if (!img_size.empty()) { if (!img_size.empty()) {
cl::ImageFormat image_format(CL_RGBA, img_size[2]); cl::ImageFormat image_format(CL_RGBA, img_size[2]);
image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), image_format, *buffer, img_size[0],
img_size[1], img_pitch * dtype_size, &ret);
image = new (std::nothrow) cl::Image2D(*ocl_runtime_->Context(), image_format, *buffer, img_size[0], img_size[1],
img_pitch * dtype_size, &ret);
if (image == nullptr || ret != CL_SUCCESS) { if (image == nullptr || ret != CL_SUCCESS) {
delete buffer; delete buffer;
UnLock(); UnLock();
@@ -265,6 +265,9 @@ void OpenCLAllocator::Clear() {
Lock(); Lock();
auto svm_capabilities = ocl_runtime_->GetSVMCapabilities(); auto svm_capabilities = ocl_runtime_->GetSVMCapabilities();
for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) { for (auto it = allocated_list_.begin(); it != allocated_list_.end(); it++) {
if (it->second->map_flags) {
UnmapBuffer(it->second->host_ptr_);
}
if (svm_capabilities) { if (svm_capabilities) {
clSVMFree((*ocl_runtime_->Context())(), it->second->host_ptr_); clSVMFree((*ocl_runtime_->Context())(), it->second->host_ptr_);
MS_LOG(DEBUG) << "OpenCL free svm buffer : " << it->second->host_ptr_; MS_LOG(DEBUG) << "OpenCL free svm buffer : " << it->second->host_ptr_;


+ 1
- 1
mindspore/lite/src/runtime/opencl/opencl_allocator.h View File

@@ -63,7 +63,7 @@ class OpenCLAllocator : public Allocator {
int GetImageSize(void *host_ptr, std::vector<size_t> *img_size); int GetImageSize(void *host_ptr, std::vector<size_t> *img_size);
void *Prepare(void *ptr) override { void *Prepare(void *ptr) override {
if (ptr != nullptr) { if (ptr != nullptr) {
ptr = MapBuffer(ptr, CL_MAP_WRITE, nullptr, true);
ptr = MapBuffer(ptr, CL_MAP_READ | CL_MAP_WRITE, nullptr, true);
} }
return ptr; return ptr;
} }


+ 1
- 0
mindspore/lite/src/tensor.h View File

@@ -184,6 +184,7 @@ class Tensor : public mindspore::tensor::MSTensor {
MS_LOG(WARNING) << "Malloc data failed"; MS_LOG(WARNING) << "Malloc data failed";
} }
} }
Prepare();
return this->data_; return this->data_;
} }




Loading…
Cancel
Save