Browse Source

fix adaptive avg pooling accumulation overflow in vulkan using fp16 arithmetic (#2698)

tags/20210322
zylo117 GitHub 5 years ago
parent
commit
41fba71fa0
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 15 deletions
  1. +5
    -4
      src/layer/vulkan/shader/pooling_adaptive.comp
  2. +5
    -4
      src/layer/vulkan/shader/pooling_adaptive_pack4.comp
  3. +8
    -7
      src/layer/vulkan/shader/pooling_adaptive_pack8.comp

+ 5
- 4
src/layer/vulkan/shader/pooling_adaptive.comp View File

@@ -111,7 +111,7 @@ void main()
}
if (pooling_type == 1)
{
res = afp(0.f);
float res_fp32 = 0.f; // force accumulation in fp32
int area = 0;

#if NCNN_image_shader
@@ -119,7 +119,7 @@ void main()
{
for (int x = 0; x < kernel_w; x++)
{
res += image3d_ld1(bottom_blob, ivec3(sx + x, sy + y, gz));
res_fp32 += image3d_ld1(bottom_blob, ivec3(sx + x, sy + y, gz));
area += 1;
}
}
@@ -130,7 +130,7 @@ void main()
{
for (int x = 0; x < kernel_w; x++)
{
res += buffer_ld1(bottom_blob_data, v_offset + x);
res_fp32 += buffer_ld1(bottom_blob_data, v_offset + x);
area += 1;
}

@@ -138,7 +138,8 @@ void main()
}
#endif

res /= afp(area);
res_fp32 /= float(area);
res = afp(res_fp32); // cast to fp16 if possible
}

#if NCNN_image_shader


+ 5
- 4
src/layer/vulkan/shader/pooling_adaptive_pack4.comp View File

@@ -111,7 +111,7 @@ void main()
}
else if (pooling_type == 1)
{
res = afpvec4(0.f);
vec4 res_fp32 = vec4(0.f); // force accumulation in fp32
int area = 0;

#if NCNN_image_shader
@@ -119,7 +119,7 @@ void main()
{
for (int x = 0; x < kernel_w; x++)
{
res += image3d_ld4(bottom_blob, ivec3(sx + x, sy + y, gz));
res_fp32 += image3d_ld4(bottom_blob, ivec3(sx + x, sy + y, gz));
area += 1;
}
}
@@ -130,7 +130,7 @@ void main()
{
for (int x = 0; x < kernel_w; x++)
{
res += buffer_ld4(bottom_blob_data, v_offset + x);
res_fp32 += buffer_ld4(bottom_blob_data, v_offset + x);
area += 1;
}

@@ -138,7 +138,8 @@ void main()
}
#endif

res /= afp(area);
res_fp32 /= float(area);
res = afpvec4(res_fp32); // cast to fp16 if possible
}

#if NCNN_image_shader


+ 8
- 7
src/layer/vulkan/shader/pooling_adaptive_pack8.comp View File

@@ -114,7 +114,7 @@ void main()
}
else if (pooling_type == 1)
{
res = afpvec8(afpvec4(0.f), afpvec4(0.f));
mat2x4 res_fp32 = mat2x4(vec4(0.f), vec4(0.f)); // force accumulation in fp32
int area = 0;
#if NCNN_image_shader
@@ -123,8 +123,8 @@ void main()
for (int x = 0; x < kernel_w; x++)
{
afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx + x, sy + y, gz));
res[0] += v[0];
res[1] += v[1];
res_fp32[0] += v[0];
res_fp32[1] += v[1];
area += 1;
}
}
@@ -136,8 +136,8 @@ void main()
for (int x = 0; x < kernel_w; x++)
{
afpvec8 v = buffer_ld8(bottom_blob_data, v_offset + x);
res[0] += v[0];
res[1] += v[1];
res_fp32[0] += v[0];
res_fp32[1] += v[1];
area += 1;
}

@@ -145,8 +145,9 @@ void main()
}
#endif

res[0] /= afp(area);
res[1] /= afp(area);
res_fp32[0] /= float(area);
res_fp32[1] /= float(area);
res = afpvec8(res_fp32); // cast to fp16 if possible
}

#if NCNN_image_shader


Loading…
Cancel
Save