Browse Source

fix flatten vulkan fp16p image pack1to4 and pack1to8

tags/20200413
nihuini 6 years ago
parent
commit
32a9a489bc
5 changed files with 49 additions and 21 deletions
  1. +1
    -3
      src/layer/arm/flatten_arm.cpp
  2. +1
    -1
      src/layer/vulkan/flatten_vulkan.cpp
  3. +17
    -5
      src/layer/vulkan/shader/flatten_pack1to4.comp
  4. +29
    -12
      src/layer/vulkan/shader/flatten_pack1to8.comp
  5. +1
    -0
      tests/test_flatten.cpp

+ 1
- 3
src/layer/arm/flatten_arm.cpp View File

@@ -33,10 +33,8 @@ Flatten_arm::Flatten_arm()

int Flatten_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
{
if (bottom_blob.elemsize / bottom_blob.elempack == 2u)
{
if (opt.use_bf16_storage)
return forward_bf16s(bottom_blob, top_blob, opt);
}

int dims = bottom_blob.dims;



+ 1
- 1
src/layer/vulkan/flatten_vulkan.cpp View File

@@ -193,7 +193,7 @@ int Flatten_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute
if (out_elempack == 1) out_elemsize = 4u;
}

if (dims == 2 && elempack == 1)
if (dims == 2 && elempack == 1 && !(opt.use_fp16_packed && !opt.use_fp16_storage && out_elempack != 1))
{
top_blob = bottom_blob;
top_blob.dims = 1;


+ 17
- 5
src/layer/vulkan/shader/flatten_pack1to4.comp View File

@@ -67,13 +67,25 @@ void main()

ivec4 i4 = gx * 4 + ivec4(0, 1, 2, 3);

int size = psc(w) * psc(h);
ivec4 v_offset;

ivec4 z4 = i4 / size;
ivec4 y4 = i4 % size / psc(w);
ivec4 x4 = i4 % size % psc(w);
if (psc(dims) == 2)
{
ivec4 y4 = i4 / psc(w);
ivec4 x4 = i4 % psc(w);

ivec4 v_offset = z4 * psc(cstep) + y4 * psc(w) + x4;
v_offset = y4 * psc(w) + x4;
}
else // if (psc(dims) == 3)
{
int size = psc(w) * psc(h);

ivec4 z4 = i4 / size;
ivec4 y4 = i4 % size / psc(w);
ivec4 x4 = i4 % size % psc(w);

v_offset = z4 * psc(cstep) + y4 * psc(w) + x4;
}

buffer_cp1to4(top_blob_data, gx, bottom_blob_data, v_offset);
}

+ 29
- 12
src/layer/vulkan/shader/flatten_pack1to8.comp View File

@@ -69,18 +69,35 @@ void main()
ivec4 i4 = gx * 8 + ivec4(0, 1, 2, 3);
ivec4 ii4 = i4 + 4;

int size = psc(w) * psc(h);

ivec4 z4 = i4 / size;
ivec4 y4 = i4 % size / psc(w);
ivec4 x4 = i4 % size % psc(w);

ivec4 zz4 = ii4 / size;
ivec4 yy4 = ii4 % size / psc(w);
ivec4 xx4 = ii4 % size % psc(w);

ivec4 v_offset = z4 * psc(cstep) + y4 * psc(w) + x4;
ivec4 vv_offset = zz4 * psc(cstep) + yy4 * psc(w) + xx4;
ivec4 v_offset;
ivec4 vv_offset;

if (psc(dims) == 2)
{
ivec4 y4 = i4 / psc(w);
ivec4 x4 = i4 % psc(w);

ivec4 yy4 = ii4 / psc(w);
ivec4 xx4 = ii4 % psc(w);

v_offset = y4 * psc(w) + x4;
vv_offset = yy4 * psc(w) + xx4;
}
else // if (psc(dims) == 3)
{
int size = psc(w) * psc(h);

ivec4 z4 = i4 / size;
ivec4 y4 = i4 % size / psc(w);
ivec4 x4 = i4 % size % psc(w);

ivec4 zz4 = ii4 / size;
ivec4 yy4 = ii4 % size / psc(w);
ivec4 xx4 = ii4 % size % psc(w);

v_offset = z4 * psc(cstep) + y4 * psc(w) + x4;
vv_offset = zz4 * psc(cstep) + yy4 * psc(w) + xx4;
}

buffer_cp1to8(top_blob_data, gx, bottom_blob_data, v_offset, vv_offset);
}

+ 1
- 0
tests/test_flatten.cpp View File

@@ -51,6 +51,7 @@ static int test_flatten_0()
|| test_flatten(RandomMat(6, 6, 15))
|| test_flatten(RandomMat(13, 13))
|| test_flatten(RandomMat(8, 12))
|| test_flatten(RandomMat(8, 2))
|| test_flatten(RandomMat(32))
|| test_flatten(RandomMat(17))
;


Loading…
Cancel
Save