Browse Source

resolve cast from type properly, no more fp16p to/from fp16s conversion

tags/20200616
nihuini 6 years ago
parent
commit
f87f21779f
2 changed files with 102 additions and 10 deletions
  1. +2
    -2
      src/command.cpp
  2. +100
    -8
      src/gpu.cpp

+ 2
- 2
src/command.cpp View File

@@ -230,7 +230,7 @@ void VkCompute::record_download(const VkMat& src, Mat& dst, const Option& opt)

// gpu cast to fp32 on the fly (integrated gpu)
Option opt_staging = opt;
if (vkdev->info.type == 1)
if (vkdev->info.type != 0)
{
opt_staging.use_fp16_packed = false;
opt_staging.use_fp16_storage = false;
@@ -358,7 +358,7 @@ void VkCompute::record_download(const VkImageMat& src, Mat& dst, const Option& o

// gpu cast to fp32 on the fly (integrated gpu)
Option opt_staging = opt;
if (vkdev->info.type == 1)
if (vkdev->info.type != 0)
{
opt_staging.use_fp16_packed = false;
opt_staging.use_fp16_storage = false;


+ 100
- 8
src/gpu.cpp View File

@@ -1687,10 +1687,30 @@ void VulkanDevice::convert_packing(const VkMat& src, VkMat& dst, int dst_elempac
Option opt = _opt;
opt.use_image_storage = false;

int cast_type_from_index = src.elemsize == src.elempack * 4u ? 0 : src.elemsize == src.elempack * 2u && info.support_fp16_storage ? 2 : 1;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed && dst_elempack % 4 == 0 ? 1 : 0;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0;
int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2;

int cast_type_from_index;
if (src.elemsize == src.elempack * 4u)
{
cast_type_from_index = 0;
}
else // if (src.elemsize == src.elempack * 2u)
{
if (cast_type_to_index != 0)
{
cast_type_from_index = cast_type_to_index;
}
else if (info.support_fp16_storage)
{
cast_type_from_index = 2;
}
else // if (info.support_fp16_packed)
{
cast_type_from_index = 1;
}
}

// NCNN_LOGE("convert_packing b2b %d %d %d", cast_type_from_index, cast_type_to_index, packing_type_to_index);

const ncnn::Packing_vulkan* uop = uop_packing[0][0][cast_type_from_index][cast_type_to_index][packing_type_to_index];
@@ -1705,10 +1725,30 @@ void VulkanDevice::convert_packing(const VkImageMat& src, VkImageMat& dst, int d
return;
}

int cast_type_from_index = src.elemsize == src.elempack * 4u ? 0 : src.elemsize == src.elempack * 2u && info.support_fp16_storage ? 2 : 1;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed && dst_elempack % 4 == 0 ? 1 : 0;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0;
int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2;

int cast_type_from_index;
if (src.elemsize == src.elempack * 4u)
{
cast_type_from_index = 0;
}
else // if (src.elemsize == src.elempack * 2u)
{
if (cast_type_to_index != 0)
{
cast_type_from_index = cast_type_to_index;
}
else if (info.support_fp16_storage)
{
cast_type_from_index = 2;
}
else // if (info.support_fp16_packed)
{
cast_type_from_index = 1;
}
}

// NCNN_LOGE("convert_packing i2i %d %d %d", cast_type_from_index, cast_type_to_index, packing_type_to_index);

const ncnn::Packing_vulkan* uop = uop_packing[1][1][cast_type_from_index][cast_type_to_index][packing_type_to_index];
@@ -1723,10 +1763,30 @@ void VulkanDevice::convert_packing(const VkMat& src, VkImageMat& dst, int dst_el
return;
}

int cast_type_from_index = src.elemsize == src.elempack * 4u ? 0 : src.elemsize == src.elempack * 2u && info.support_fp16_storage ? 2 : 1;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed && dst_elempack % 4 == 0 ? 1 : 0;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0;
int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2;

int cast_type_from_index;
if (src.elemsize == src.elempack * 4u)
{
cast_type_from_index = 0;
}
else // if (src.elemsize == src.elempack * 2u)
{
if (cast_type_to_index != 0)
{
cast_type_from_index = cast_type_to_index;
}
else if (info.support_fp16_storage)
{
cast_type_from_index = 2;
}
else // if (info.support_fp16_packed)
{
cast_type_from_index = 1;
}
}

// NCNN_LOGE("convert_packing b2i %d %d %d", cast_type_from_index, cast_type_to_index, packing_type_to_index);

const ncnn::Packing_vulkan* uop = uop_packing[0][1][cast_type_from_index][cast_type_to_index][packing_type_to_index];
@@ -1741,10 +1801,30 @@ void VulkanDevice::convert_packing(const VkImageMat& src, VkMat& dst, int dst_el
return;
}

int cast_type_from_index = src.elemsize == src.elempack * 4u ? 0 : src.elemsize == src.elempack * 2u && info.support_fp16_storage ? 2 : 1;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed && dst_elempack % 4 == 0 ? 1 : 0;
int cast_type_to_index = opt.use_fp16_storage ? 2 : opt.use_fp16_packed ? 1 : 0;
int packing_type_to_index = dst_elempack == 1 ? 0 : dst_elempack == 4 ? 1 : 2;

int cast_type_from_index;
if (src.elemsize == src.elempack * 4u)
{
cast_type_from_index = 0;
}
else // if (src.elemsize == src.elempack * 2u)
{
if (cast_type_to_index != 0)
{
cast_type_from_index = cast_type_to_index;
}
else if (info.support_fp16_storage)
{
cast_type_from_index = 2;
}
else // if (info.support_fp16_packed)
{
cast_type_from_index = 1;
}
}

// NCNN_LOGE("convert_packing i2b %d %d %d", cast_type_from_index, cast_type_to_index, packing_type_to_index);

const ncnn::Packing_vulkan* uop = uop_packing[1][0][cast_type_from_index][cast_type_to_index][packing_type_to_index];
@@ -2073,6 +2153,12 @@ int VulkanDevice::create_utility_operator()
{
for (int j1=0; j1<3; j1++)
{
if ((j0 == 1 && j1 == 2) || (j0 == 2 && j1 == 1))
{
// no fp16p to/from fp16s conversion
continue;
}

opt.use_fp16_packed = (j0 == 1 || j1 == 1);
opt.use_fp16_storage = (j0 == 2 || j1 == 2);

@@ -2132,6 +2218,12 @@ void VulkanDevice::destroy_utility_operator()
{
for (int j1=0; j1<3; j1++)
{
if ((j0 == 1 && j1 == 2) || (j0 == 2 && j1 == 1))
{
// no fp16p to/from fp16s conversion
continue;
}

opt.use_fp16_packed = (j0 == 1 || j1 == 1);
opt.use_fp16_storage = (j0 == 2 || j1 == 2);



Loading…
Cancel
Save