Browse Source

Unified elempack activation function vulkan shader (#6175)

pull/6181/head
Yexuan Wu GitHub 10 months ago
parent
commit
e2d93a482e
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
60 changed files with 285 additions and 2225 deletions
  1. +14
    -74
      src/layer/vulkan/celu_vulkan.cpp
  2. +0
    -2
      src/layer/vulkan/celu_vulkan.h
  3. +14
    -74
      src/layer/vulkan/clip_vulkan.cpp
  4. +0
    -2
      src/layer/vulkan/clip_vulkan.h
  5. +14
    -68
      src/layer/vulkan/dropout_vulkan.cpp
  6. +0
    -2
      src/layer/vulkan/dropout_vulkan.h
  7. +14
    -74
      src/layer/vulkan/elu_vulkan.cpp
  8. +0
    -2
      src/layer/vulkan/elu_vulkan.h
  9. +14
    -68
      src/layer/vulkan/erf_vulkan.cpp
  10. +0
    -2
      src/layer/vulkan/erf_vulkan.h
  11. +14
    -74
      src/layer/vulkan/gelu_vulkan.cpp
  12. +0
    -2
      src/layer/vulkan/gelu_vulkan.h
  13. +14
    -68
      src/layer/vulkan/hardsigmoid_vulkan.cpp
  14. +0
    -2
      src/layer/vulkan/hardsigmoid_vulkan.h
  15. +14
    -68
      src/layer/vulkan/hardswish_vulkan.cpp
  16. +0
    -2
      src/layer/vulkan/hardswish_vulkan.h
  17. +14
    -74
      src/layer/vulkan/mish_vulkan.cpp
  18. +0
    -2
      src/layer/vulkan/mish_vulkan.h
  19. +14
    -74
      src/layer/vulkan/relu_vulkan.cpp
  20. +0
    -2
      src/layer/vulkan/relu_vulkan.h
  21. +8
    -20
      src/layer/vulkan/shader/celu.comp
  22. +0
    -42
      src/layer/vulkan/shader/celu_pack4.comp
  23. +0
    -43
      src/layer/vulkan/shader/celu_pack8.comp
  24. +9
    -21
      src/layer/vulkan/shader/clip.comp
  25. +0
    -43
      src/layer/vulkan/shader/clip_pack4.comp
  26. +0
    -44
      src/layer/vulkan/shader/clip_pack8.comp
  27. +9
    -20
      src/layer/vulkan/shader/dropout.comp
  28. +0
    -42
      src/layer/vulkan/shader/dropout_pack4.comp
  29. +0
    -43
      src/layer/vulkan/shader/dropout_pack8.comp
  30. +8
    -20
      src/layer/vulkan/shader/elu.comp
  31. +0
    -42
      src/layer/vulkan/shader/elu_pack4.comp
  32. +0
    -43
      src/layer/vulkan/shader/elu_pack8.comp
  33. +18
    -30
      src/layer/vulkan/shader/erf.comp
  34. +0
    -55
      src/layer/vulkan/shader/erf_pack4.comp
  35. +0
    -56
      src/layer/vulkan/shader/erf_pack8.comp
  36. +9
    -20
      src/layer/vulkan/shader/gelu.comp
  37. +0
    -46
      src/layer/vulkan/shader/gelu_pack4.comp
  38. +0
    -48
      src/layer/vulkan/shader/gelu_pack8.comp
  39. +9
    -20
      src/layer/vulkan/shader/hardsigmoid.comp
  40. +0
    -43
      src/layer/vulkan/shader/hardsigmoid_pack4.comp
  41. +0
    -44
      src/layer/vulkan/shader/hardsigmoid_pack8.comp
  42. +8
    -20
      src/layer/vulkan/shader/hardswish.comp
  43. +0
    -43
      src/layer/vulkan/shader/hardswish_pack4.comp
  44. +0
    -44
      src/layer/vulkan/shader/hardswish_pack8.comp
  45. +11
    -23
      src/layer/vulkan/shader/mish.comp
  46. +0
    -44
      src/layer/vulkan/shader/mish_pack4.comp
  47. +0
    -46
      src/layer/vulkan/shader/mish_pack8.comp
  48. +10
    -21
      src/layer/vulkan/shader/relu.comp
  49. +0
    -45
      src/layer/vulkan/shader/relu_pack4.comp
  50. +0
    -51
      src/layer/vulkan/shader/relu_pack8.comp
  51. +9
    -22
      src/layer/vulkan/shader/swish.comp
  52. +0
    -40
      src/layer/vulkan/shader/swish_pack4.comp
  53. +0
    -41
      src/layer/vulkan/shader/swish_pack8.comp
  54. +9
    -21
      src/layer/vulkan/shader/tanh.comp
  55. +0
    -44
      src/layer/vulkan/shader/tanh_pack4.comp
  56. +0
    -46
      src/layer/vulkan/shader/tanh_pack8.comp
  57. +14
    -75
      src/layer/vulkan/swish_vulkan.cpp
  58. +0
    -2
      src/layer/vulkan/swish_vulkan.h
  59. +14
    -74
      src/layer/vulkan/tanh_vulkan.cpp
  60. +0
    -2
      src/layer/vulkan/tanh_vulkan.h

+ 14
- 74
src/layer/vulkan/celu_vulkan.cpp View File

@@ -12,8 +12,6 @@ CELU_vulkan::CELU_vulkan()
support_vulkan = true;

pipeline_celu = 0;
pipeline_celu_pack4 = 0;
pipeline_celu_pack8 = 0;
}

int CELU_vulkan::create_pipeline(const Option& opt)
@@ -41,63 +39,15 @@ int CELU_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(1 + 5);
std::vector<vk_specialization_type> specializations(1 + 1);
specializations[0].f = alpha;
specializations[1 + 0].i = shape_packed.dims;
specializations[1 + 1].i = shape_packed.w;
specializations[1 + 2].i = shape_packed.h * shape_packed.d;
specializations[1 + 3].i = shape_packed.c;
specializations[1 + 4].i = shape_packed.cstep;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_celu = new Pipeline(vkdev);
pipeline_celu->set_optimal_local_size_xyz(local_size_xyz);
pipeline_celu->create(LayerShaderType::celu, opt, specializations);
}
specializations[1 + 0].u32 = shape_packed.total() * elempack / 4;

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_celu_pack4 = new Pipeline(vkdev);
pipeline_celu_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_celu_pack4->create(LayerShaderType::celu_pack4, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_celu_pack8 = new Pipeline(vkdev);
pipeline_celu_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_celu_pack8->create(LayerShaderType::celu_pack8, opt, specializations);
}
pipeline_celu = new Pipeline(vkdev);
pipeline_celu->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_celu->create(LayerShaderType::celu, opt, specializations);

return 0;
}
@@ -107,34 +57,24 @@ int CELU_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_celu;
pipeline_celu = 0;

delete pipeline_celu_pack4;
pipeline_celu_pack4 = 0;

delete pipeline_celu_pack8;
pipeline_celu_pack8 = 0;

return 0;
}

int CELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_celu_pack8
: elempack == 4 ? pipeline_celu_pack4
: pipeline_celu;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_celu, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/celu_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_celu;
Pipeline* pipeline_celu_pack4;
Pipeline* pipeline_celu_pack8;
};

} // namespace ncnn


+ 14
- 74
src/layer/vulkan/clip_vulkan.cpp View File

@@ -12,8 +12,6 @@ Clip_vulkan::Clip_vulkan()
support_vulkan = true;

pipeline_clip = 0;
pipeline_clip_pack4 = 0;
pipeline_clip_pack8 = 0;
}

int Clip_vulkan::create_pipeline(const Option& opt)
@@ -41,64 +39,16 @@ int Clip_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(2 + 5);
std::vector<vk_specialization_type> specializations(2 + 1);
specializations[0].f = min;
specializations[1].f = max;
specializations[2 + 0].i = shape_packed.dims;
specializations[2 + 1].i = shape_packed.w;
specializations[2 + 2].i = shape_packed.h * shape_packed.d;
specializations[2 + 3].i = shape_packed.c;
specializations[2 + 4].i = shape_packed.cstep;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_clip = new Pipeline(vkdev);
pipeline_clip->set_optimal_local_size_xyz(local_size_xyz);
pipeline_clip->create(LayerShaderType::clip, opt, specializations);
}
specializations[2 + 0].u32 = shape_packed.total() * elempack / 4;

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_clip_pack4 = new Pipeline(vkdev);
pipeline_clip_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_clip_pack4->create(LayerShaderType::clip_pack4, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_clip_pack8 = new Pipeline(vkdev);
pipeline_clip_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_clip_pack8->create(LayerShaderType::clip_pack8, opt, specializations);
}
pipeline_clip = new Pipeline(vkdev);
pipeline_clip->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_clip->create(LayerShaderType::clip, opt, specializations);

return 0;
}
@@ -108,34 +58,24 @@ int Clip_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_clip;
pipeline_clip = 0;

delete pipeline_clip_pack4;
pipeline_clip_pack4 = 0;

delete pipeline_clip_pack8;
pipeline_clip_pack8 = 0;

return 0;
}

int Clip_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_clip_pack8
: elempack == 4 ? pipeline_clip_pack4
: pipeline_clip;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_clip, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/clip_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_clip;
Pipeline* pipeline_clip_pack4;
Pipeline* pipeline_clip_pack8;
};

} // namespace ncnn


+ 14
- 68
src/layer/vulkan/dropout_vulkan.cpp View File

@@ -12,8 +12,6 @@ Dropout_vulkan::Dropout_vulkan()
support_vulkan = true;

pipeline_dropout = 0;
pipeline_dropout_pack4 = 0;
pipeline_dropout_pack8 = 0;
}

int Dropout_vulkan::create_pipeline(const Option& opt)
@@ -40,57 +38,15 @@ int Dropout_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(1 + 5);
std::vector<vk_specialization_type> specializations(1 + 1);
specializations[0].f = scale;
specializations[1 + 0].i = shape_packed.dims;
specializations[1 + 1].i = shape_packed.w;
specializations[1 + 2].i = shape_packed.h;
specializations[1 + 3].i = shape_packed.c;
specializations[1 + 4].i = shape_packed.cstep;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_dropout = new Pipeline(vkdev);
pipeline_dropout->set_optimal_local_size_xyz(local_size_xyz);
pipeline_dropout->create(LayerShaderType::dropout, opt, specializations);
}
specializations[1 + 0].u32 = shape_packed.total() * elempack / 4;

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_dropout_pack4 = new Pipeline(vkdev);
pipeline_dropout_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_dropout_pack4->create(LayerShaderType::dropout_pack4, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_dropout_pack8 = new Pipeline(vkdev);
pipeline_dropout_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_dropout_pack8->create(LayerShaderType::dropout_pack8, opt, specializations);
}
pipeline_dropout = new Pipeline(vkdev);
pipeline_dropout->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_dropout->create(LayerShaderType::dropout, opt, specializations);

return 0;
}
@@ -100,12 +56,6 @@ int Dropout_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_dropout;
pipeline_dropout = 0;

delete pipeline_dropout_pack4;
pipeline_dropout_pack4 = 0;

delete pipeline_dropout_pack8;
pipeline_dropout_pack8 = 0;

return 0;
}

@@ -116,23 +66,19 @@ int Dropout_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
return 0;
}

int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_dropout_pack8
: elempack == 4 ? pipeline_dropout_pack4
: pipeline_dropout;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_dropout, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/dropout_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_dropout;
Pipeline* pipeline_dropout_pack4;
Pipeline* pipeline_dropout_pack8;
};

} // namespace ncnn


+ 14
- 74
src/layer/vulkan/elu_vulkan.cpp View File

@@ -12,8 +12,6 @@ ELU_vulkan::ELU_vulkan()
support_vulkan = true;

pipeline_elu = 0;
pipeline_elu_pack4 = 0;
pipeline_elu_pack8 = 0;
}

int ELU_vulkan::create_pipeline(const Option& opt)
@@ -41,63 +39,15 @@ int ELU_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(1 + 5);
std::vector<vk_specialization_type> specializations(1 + 1);
specializations[0].f = alpha;
specializations[1 + 0].i = shape_packed.dims;
specializations[1 + 1].i = shape_packed.w;
specializations[1 + 2].i = shape_packed.h * shape_packed.d;
specializations[1 + 3].i = shape_packed.c;
specializations[1 + 4].i = shape_packed.cstep;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_elu = new Pipeline(vkdev);
pipeline_elu->set_optimal_local_size_xyz(local_size_xyz);
pipeline_elu->create(LayerShaderType::elu, opt, specializations);
}
specializations[1 + 0].u32 = shape_packed.total() * elempack / 4;

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_elu_pack4 = new Pipeline(vkdev);
pipeline_elu_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_elu_pack4->create(LayerShaderType::elu_pack4, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_elu_pack8 = new Pipeline(vkdev);
pipeline_elu_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_elu_pack8->create(LayerShaderType::elu_pack8, opt, specializations);
}
pipeline_elu = new Pipeline(vkdev);
pipeline_elu->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_elu->create(LayerShaderType::elu, opt, specializations);

return 0;
}
@@ -107,34 +57,24 @@ int ELU_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_elu;
pipeline_elu = 0;

delete pipeline_elu_pack4;
pipeline_elu_pack4 = 0;

delete pipeline_elu_pack8;
pipeline_elu_pack8 = 0;

return 0;
}

int ELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_elu_pack8
: elempack == 4 ? pipeline_elu_pack4
: pipeline_elu;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_elu, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/elu_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_elu;
Pipeline* pipeline_elu_pack4;
Pipeline* pipeline_elu_pack8;
};

} // namespace ncnn


+ 14
- 68
src/layer/vulkan/erf_vulkan.cpp View File

@@ -12,8 +12,6 @@ Erf_vulkan::Erf_vulkan()
support_vulkan = true;

pipeline_erf = 0;
pipeline_erf_pack4 = 0;
pipeline_erf_pack8 = 0;
}

int Erf_vulkan::create_pipeline(const Option& opt)
@@ -40,56 +38,14 @@ int Erf_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(0 + 5);
specializations[0 + 0].i = shape_packed.dims;
specializations[0 + 1].i = shape_packed.w;
specializations[0 + 2].i = shape_packed.h;
specializations[0 + 3].i = shape_packed.c;
specializations[0 + 4].i = shape_packed.cstep;
std::vector<vk_specialization_type> specializations(1);
specializations[0].u32 = shape_packed.total() * elempack / 4;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_erf = new Pipeline(vkdev);
pipeline_erf->set_optimal_local_size_xyz(local_size_xyz);
pipeline_erf->create(LayerShaderType::erf, opt, specializations);
}

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_erf_pack4 = new Pipeline(vkdev);
pipeline_erf_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_erf_pack4->create(LayerShaderType::erf_pack4, opt, specializations);
}

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_erf_pack8 = new Pipeline(vkdev);
pipeline_erf_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_erf_pack8->create(LayerShaderType::erf_pack8, opt, specializations);
}
pipeline_erf = new Pipeline(vkdev);
pipeline_erf->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_erf->create(LayerShaderType::erf, opt, specializations);

return 0;
}
@@ -99,34 +55,24 @@ int Erf_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_erf;
pipeline_erf = 0;

delete pipeline_erf_pack4;
pipeline_erf_pack4 = 0;

delete pipeline_erf_pack8;
pipeline_erf_pack8 = 0;

return 0;
}

int Erf_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_erf_pack8
: elempack == 4 ? pipeline_erf_pack4
: pipeline_erf;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_erf, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/erf_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_erf;
Pipeline* pipeline_erf_pack4;
Pipeline* pipeline_erf_pack8;
};

} // namespace ncnn


+ 14
- 74
src/layer/vulkan/gelu_vulkan.cpp View File

@@ -12,8 +12,6 @@ GELU_vulkan::GELU_vulkan()
support_vulkan = true;

pipeline_gelu = 0;
pipeline_gelu_pack4 = 0;
pipeline_gelu_pack8 = 0;
}

int GELU_vulkan::create_pipeline(const Option& opt)
@@ -41,62 +39,14 @@ int GELU_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(0 + 5);
specializations[0 + 0].i = shape_packed.dims;
specializations[0 + 1].i = shape_packed.w;
specializations[0 + 2].i = shape_packed.h * shape_packed.d;
specializations[0 + 3].i = shape_packed.c;
specializations[0 + 4].i = shape_packed.cstep;
std::vector<vk_specialization_type> specializations(1);
specializations[0].u32 = shape_packed.total() * elempack / 4;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_gelu = new Pipeline(vkdev);
pipeline_gelu->set_optimal_local_size_xyz(local_size_xyz);
pipeline_gelu->create(LayerShaderType::gelu, opt, specializations);
}

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_gelu_pack4 = new Pipeline(vkdev);
pipeline_gelu_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_gelu_pack4->create(LayerShaderType::gelu_pack4, opt, specializations);
}

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_gelu_pack8 = new Pipeline(vkdev);
pipeline_gelu_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_gelu_pack8->create(LayerShaderType::gelu_pack8, opt, specializations);
}
pipeline_gelu = new Pipeline(vkdev);
pipeline_gelu->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_gelu->create(LayerShaderType::gelu, opt, specializations);

return 0;
}
@@ -106,34 +56,24 @@ int GELU_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_gelu;
pipeline_gelu = 0;

delete pipeline_gelu_pack4;
pipeline_gelu_pack4 = 0;

delete pipeline_gelu_pack8;
pipeline_gelu_pack8 = 0;

return 0;
}

int GELU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_gelu_pack8
: elempack == 4 ? pipeline_gelu_pack4
: pipeline_gelu;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_gelu, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/gelu_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_gelu;
Pipeline* pipeline_gelu_pack4;
Pipeline* pipeline_gelu_pack8;
};

} // namespace ncnn


+ 14
- 68
src/layer/vulkan/hardsigmoid_vulkan.cpp View File

@@ -12,8 +12,6 @@ HardSigmoid_vulkan::HardSigmoid_vulkan()
support_vulkan = true;

pipeline_hardsigmoid = 0;
pipeline_hardsigmoid_pack4 = 0;
pipeline_hardsigmoid_pack8 = 0;
}

int HardSigmoid_vulkan::create_pipeline(const Option& opt)
@@ -40,58 +38,16 @@ int HardSigmoid_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(2 + 5);
std::vector<vk_specialization_type> specializations(2 + 1);
specializations[0].f = alpha;
specializations[1].f = beta;
specializations[2 + 0].i = shape_packed.dims;
specializations[2 + 1].i = shape_packed.w;
specializations[2 + 2].i = shape_packed.h;
specializations[2 + 3].i = shape_packed.c;
specializations[2 + 4].i = shape_packed.cstep;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_hardsigmoid = new Pipeline(vkdev);
pipeline_hardsigmoid->set_optimal_local_size_xyz(local_size_xyz);
pipeline_hardsigmoid->create(LayerShaderType::hardsigmoid, opt, specializations);
}
specializations[2 + 0].u32 = shape_packed.total() * elempack / 4;

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_hardsigmoid_pack4 = new Pipeline(vkdev);
pipeline_hardsigmoid_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_hardsigmoid_pack4->create(LayerShaderType::hardsigmoid_pack4, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_hardsigmoid_pack8 = new Pipeline(vkdev);
pipeline_hardsigmoid_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_hardsigmoid_pack8->create(LayerShaderType::hardsigmoid_pack8, opt, specializations);
}
pipeline_hardsigmoid = new Pipeline(vkdev);
pipeline_hardsigmoid->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_hardsigmoid->create(LayerShaderType::hardsigmoid, opt, specializations);

return 0;
}
@@ -101,34 +57,24 @@ int HardSigmoid_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_hardsigmoid;
pipeline_hardsigmoid = 0;

delete pipeline_hardsigmoid_pack4;
pipeline_hardsigmoid_pack4 = 0;

delete pipeline_hardsigmoid_pack8;
pipeline_hardsigmoid_pack8 = 0;

return 0;
}

int HardSigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_hardsigmoid_pack8
: elempack == 4 ? pipeline_hardsigmoid_pack4
: pipeline_hardsigmoid;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_hardsigmoid, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/hardsigmoid_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_hardsigmoid;
Pipeline* pipeline_hardsigmoid_pack4;
Pipeline* pipeline_hardsigmoid_pack8;
};

} // namespace ncnn


+ 14
- 68
src/layer/vulkan/hardswish_vulkan.cpp View File

@@ -12,8 +12,6 @@ HardSwish_vulkan::HardSwish_vulkan()
support_vulkan = true;

pipeline_hardswish = 0;
pipeline_hardswish_pack4 = 0;
pipeline_hardswish_pack8 = 0;
}

int HardSwish_vulkan::create_pipeline(const Option& opt)
@@ -40,58 +38,16 @@ int HardSwish_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(2 + 5);
std::vector<vk_specialization_type> specializations(2 + 1);
specializations[0].f = alpha;
specializations[1].f = beta;
specializations[2 + 0].i = shape_packed.dims;
specializations[2 + 1].i = shape_packed.w;
specializations[2 + 2].i = shape_packed.h;
specializations[2 + 3].i = shape_packed.c;
specializations[2 + 4].i = shape_packed.cstep;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_hardswish = new Pipeline(vkdev);
pipeline_hardswish->set_optimal_local_size_xyz(local_size_xyz);
pipeline_hardswish->create(LayerShaderType::hardswish, opt, specializations);
}
specializations[2 + 0].u32 = shape_packed.total() * elempack / 4;

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_hardswish_pack4 = new Pipeline(vkdev);
pipeline_hardswish_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_hardswish_pack4->create(LayerShaderType::hardswish_pack4, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_hardswish_pack8 = new Pipeline(vkdev);
pipeline_hardswish_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_hardswish_pack8->create(LayerShaderType::hardswish_pack8, opt, specializations);
}
pipeline_hardswish = new Pipeline(vkdev);
pipeline_hardswish->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_hardswish->create(LayerShaderType::hardswish, opt, specializations);

return 0;
}
@@ -101,34 +57,24 @@ int HardSwish_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_hardswish;
pipeline_hardswish = 0;

delete pipeline_hardswish_pack4;
pipeline_hardswish_pack4 = 0;

delete pipeline_hardswish_pack8;
pipeline_hardswish_pack8 = 0;

return 0;
}

int HardSwish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_hardswish_pack8
: elempack == 4 ? pipeline_hardswish_pack4
: pipeline_hardswish;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_hardswish, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/hardswish_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_hardswish;
Pipeline* pipeline_hardswish_pack4;
Pipeline* pipeline_hardswish_pack8;
};

} // namespace ncnn


+ 14
- 74
src/layer/vulkan/mish_vulkan.cpp View File

@@ -12,8 +12,6 @@ Mish_vulkan::Mish_vulkan()
support_vulkan = true;

pipeline_mish = 0;
pipeline_mish_pack4 = 0;
pipeline_mish_pack8 = 0;
}

int Mish_vulkan::create_pipeline(const Option& opt)
@@ -41,62 +39,14 @@ int Mish_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(0 + 5);
specializations[0 + 0].i = shape_packed.dims;
specializations[0 + 1].i = shape_packed.w;
specializations[0 + 2].i = shape_packed.h * shape_packed.d;
specializations[0 + 3].i = shape_packed.c;
specializations[0 + 4].i = shape_packed.cstep;
std::vector<vk_specialization_type> specializations(1);
specializations[0].u32 = shape_packed.total() * elempack / 4;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_mish = new Pipeline(vkdev);
pipeline_mish->set_optimal_local_size_xyz(local_size_xyz);
pipeline_mish->create(LayerShaderType::mish, opt, specializations);
}

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_mish_pack4 = new Pipeline(vkdev);
pipeline_mish_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_mish_pack4->create(LayerShaderType::mish_pack4, opt, specializations);
}

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_mish_pack8 = new Pipeline(vkdev);
pipeline_mish_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_mish_pack8->create(LayerShaderType::mish_pack8, opt, specializations);
}
pipeline_mish = new Pipeline(vkdev);
pipeline_mish->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_mish->create(LayerShaderType::mish, opt, specializations);

return 0;
}
@@ -106,34 +56,24 @@ int Mish_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_mish;
pipeline_mish = 0;

delete pipeline_mish_pack4;
pipeline_mish_pack4 = 0;

delete pipeline_mish_pack8;
pipeline_mish_pack8 = 0;

return 0;
}

int Mish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_mish_pack8
: elempack == 4 ? pipeline_mish_pack4
: pipeline_mish;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_mish, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/mish_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_mish;
Pipeline* pipeline_mish_pack4;
Pipeline* pipeline_mish_pack8;
};

} // namespace ncnn


+ 14
- 74
src/layer/vulkan/relu_vulkan.cpp View File

@@ -12,8 +12,6 @@ ReLU_vulkan::ReLU_vulkan()
support_vulkan = true;

pipeline_relu = 0;
pipeline_relu_pack4 = 0;
pipeline_relu_pack8 = 0;
}

int ReLU_vulkan::create_pipeline(const Option& opt)
@@ -41,63 +39,15 @@ int ReLU_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(1 + 5);
std::vector<vk_specialization_type> specializations(1 + 1);
specializations[0].f = slope;
specializations[1 + 0].i = shape_packed.dims;
specializations[1 + 1].i = shape_packed.w;
specializations[1 + 2].i = shape_packed.h * shape_packed.d;
specializations[1 + 3].i = shape_packed.c;
specializations[1 + 4].i = shape_packed.cstep;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_relu = new Pipeline(vkdev);
pipeline_relu->set_optimal_local_size_xyz(local_size_xyz);
pipeline_relu->create(LayerShaderType::relu, opt, specializations);
}
specializations[1 + 0].u32 = shape_packed.total() * elempack / 4;

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_relu_pack4 = new Pipeline(vkdev);
pipeline_relu_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_relu_pack4->create(LayerShaderType::relu_pack4, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_relu_pack8 = new Pipeline(vkdev);
pipeline_relu_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_relu_pack8->create(LayerShaderType::relu_pack8, opt, specializations);
}
pipeline_relu = new Pipeline(vkdev);
pipeline_relu->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_relu->create(LayerShaderType::relu, opt, specializations);

return 0;
}
@@ -107,34 +57,24 @@ int ReLU_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_relu;
pipeline_relu = 0;

delete pipeline_relu_pack4;
pipeline_relu_pack4 = 0;

delete pipeline_relu_pack8;
pipeline_relu_pack8 = 0;

return 0;
}

int ReLU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_relu_pack8
: elempack == 4 ? pipeline_relu_pack4
: pipeline_relu;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_relu, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/relu_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_relu;
Pipeline* pipeline_relu_pack4;
Pipeline* pipeline_relu_pack8;
};

} // namespace ncnn


+ 8
- 20
src/layer/vulkan/shader/celu.comp View File

@@ -6,37 +6,25 @@
layout (constant_id = 0) const float alpha = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = shape_constant_id_offset + 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
const uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

afp v = buffer_ld1(bottom_top_blob_data, gi);
v = max(v, afpvec4(0.0f)) + min(alpha * (exp(v / afpvec4(alpha)) - afpvec4(1.0f)), afpvec4(0.0f));

v = max(v, afp(0.0f)) + min(alpha * (exp(v / alpha) - 1.0f), afp(0.0f));

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 42
src/layer/vulkan/shader/celu_pack4.comp View File

@@ -1,42 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = max(v, afp(0.0f)) + min(alpha * (exp(v / alpha) - 1.0f), afp(0.0f));

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 43
src/layer/vulkan/shader/celu_pack8.comp View File

@@ -1,43 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = max(v[0], afp(0.0f)) + min(alpha * (exp(v[0] / alpha) - 1.0f), afp(0.0f));
v[1] = max(v[1], afp(0.0f)) + min(alpha*(exp(v[1]/alpha)-1), afp(0.0f));

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 9
- 21
src/layer/vulkan/shader/clip.comp View File

@@ -7,37 +7,25 @@ layout (constant_id = 0) const float const_min = 0;
layout (constant_id = 1) const float const_max = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = shape_constant_id_offset + 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
const uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

afp v = buffer_ld1(bottom_top_blob_data, gi);
v = clamp(v, afpvec4(const_min), afpvec4(const_max));

v = clamp(v, afp(const_min), afp(const_max));

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 43
src/layer/vulkan/shader/clip_pack4.comp View File

@@ -1,43 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float const_min = 0;
layout (constant_id = 1) const float const_max = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = clamp(v, afp(const_min), afp(const_max));

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 44
src/layer/vulkan/shader/clip_pack8.comp View File

@@ -1,44 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float const_min = 0;
layout (constant_id = 1) const float const_max = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = clamp(v[0], afp(const_min), afp(const_max));
v[1] = clamp(v[1], afp(const_min), afp(const_max));

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 9
- 20
src/layer/vulkan/shader/dropout.comp View File

@@ -1,4 +1,4 @@
// Copyright 2018 Tencent
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450
@@ -6,37 +6,26 @@
layout (constant_id = 0) const float scale = 1;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = shape_constant_id_offset + 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v *= afp(scale);

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 42
src/layer/vulkan/shader/dropout_pack4.comp View File

@@ -1,42 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float scale = 1;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v *= afp(scale);

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 43
src/layer/vulkan/shader/dropout_pack8.comp View File

@@ -1,43 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float scale = 1;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = v[0] * afp(scale);
v[1] = v[1] * afp(scale);

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 8
- 20
src/layer/vulkan/shader/elu.comp View File

@@ -6,37 +6,25 @@
layout (constant_id = 0) const float alpha = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = shape_constant_id_offset + 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
const uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

afp v = buffer_ld1(bottom_top_blob_data, gi);
v = mix(afpvec4(alpha) * afpvec4(exp(v) - afpvec4(1.0f)), v, greaterThan(v, afpvec4(0.0f)));

v = v > afp(0.0f) ? v : afp(alpha * (exp(v) - 1.0f));

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 42
src/layer/vulkan/shader/elu_pack4.comp View File

@@ -1,42 +0,0 @@
// Copyright 2022 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = mix(afpvec4(alpha) * afpvec4(exp(v) - afpvec4(1.0f)), v, greaterThan(v, afpvec4(0.0f)));

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 43
src/layer/vulkan/shader/elu_pack8.comp View File

@@ -1,43 +0,0 @@
// Copyright 2022 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = mix(afpvec4(alpha) * afpvec4(exp(v[0]) - afpvec4(1.0f)), v[0], greaterThan(v[0], afpvec4(0.0f)));
v[1] = mix(afpvec4(alpha) * afpvec4(exp(v[1]) - afpvec4(1.0f)), v[1], greaterThan(v[1], afpvec4(0.0f)));

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 18
- 30
src/layer/vulkan/shader/erf.comp View File

@@ -3,53 +3,41 @@

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

float erf(float x)
afpvec4 erf(afpvec4 x)
{
float a1 = 0.254829592f;
float a2 = -0.284496736f;
float a3 = 1.421413741f;
float a4 = -1.453152027f;
float a5 = 1.061405429f;
float p = 0.3275911f;
float s = sign(x);
float x_abs = abs(x);
float t = 1.0f/(1.0f + p*x_abs);
float y = 1.0f - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x_abs*x_abs);
afpvec4 a1 = afpvec4(0.254829592f);
afpvec4 a2 = afpvec4(-0.284496736f);
afpvec4 a3 = afpvec4(1.421413741f);
afpvec4 a4 = afpvec4(-1.453152027f);
afpvec4 a5 = afpvec4(1.061405429f);
afpvec4 p = afpvec4(0.3275911f);
afpvec4 s = sign(x);
afpvec4 x_abs = abs(x);
afpvec4 t = 1.0f / (1.0f + p * x_abs);
afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs);
return s * y;
}

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
const uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = erf(v);

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 55
src/layer/vulkan/shader/erf_pack4.comp View File

@@ -1,55 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

afpvec4 erf(afpvec4 x)
{
afpvec4 a1 = afpvec4(0.254829592f);
afpvec4 a2 = afpvec4(-0.284496736f);
afpvec4 a3 = afpvec4(1.421413741f);
afpvec4 a4 = afpvec4(-1.453152027f);
afpvec4 a5 = afpvec4(1.061405429f);
afpvec4 p = afpvec4(0.3275911f);
afpvec4 s = sign(x);
afpvec4 x_abs = abs(x);
afpvec4 t = 1.0f / (1.0f + p * x_abs);
afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs);
return s * y;
}

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = erf(v);

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 56
src/layer/vulkan/shader/erf_pack8.comp View File

@@ -1,56 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

afpvec4 erf(afpvec4 x)
{
afpvec4 a1 = afpvec4(0.254829592f);
afpvec4 a2 = afpvec4(-0.284496736f);
afpvec4 a3 = afpvec4(1.421413741f);
afpvec4 a4 = afpvec4(-1.453152027f);
afpvec4 a5 = afpvec4(1.061405429f);
afpvec4 p = afpvec4(0.3275911f);
afpvec4 s = sign(x);
afpvec4 x_abs = abs(x);
afpvec4 t = 1.0f / (1.0f + p * x_abs);
afpvec4 y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x_abs * x_abs);
return s * y;
}

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = erf(v[0]);
v[1] = erf(v[1]);

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 9
- 20
src/layer/vulkan/shader/gelu.comp View File

@@ -4,43 +4,32 @@
#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

// y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3)))

#if NCNN_moltenvk
v = afp(0.5f) * v * (afp(1.0f) + afp(tanh(float(afp(0.79788452f) * (v + afp(0.044715f) * v * v * v)))));
v = afpvec4(0.5f) * v * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v)))));
#else
v = afp(0.5f) * v * (afp(1.0f) + tanh(afp(0.79788452f) * (v + afp(0.044715f) * v * v * v)));
v = afpvec4(0.5f) * v * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v)));
#endif

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 46
src/layer/vulkan/shader/gelu_pack4.comp View File

@@ -1,46 +0,0 @@
// Copyright 2023 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

// y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3)))

#if NCNN_moltenvk
v = afpvec4(0.5f) * v * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v)))));
#else
v = afpvec4(0.5f) * v * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v + afpvec4(0.044715f) * v * v * v)));
#endif

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 48
src/layer/vulkan/shader/gelu_pack8.comp View File

@@ -1,48 +0,0 @@
// Copyright 2023 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

// y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3)))
#if NCNN_moltenvk
v[0] = afpvec4(0.5f) * v[0] * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v[0] + afpvec4(0.044715f) * v[0] * v[0] * v[0])))));
v[1] = afpvec4(0.5f) * v[1] * (afpvec4(1.0f) + afpvec4(tanh(vec4(afpvec4(0.79788452f) * (v[1] + afpvec4(0.044715f) * v[1] * v[1] * v[1])))));
#else
v[0] = afpvec4(0.5f) * v[0] * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v[0] + afpvec4(0.044715f) * v[0] * v[0] * v[0])));
v[1] = afpvec4(0.5f) * v[1] * (afpvec4(1.0f) + tanh(afpvec4(0.79788452f) * (v[1] + afpvec4(0.044715f) * v[1] * v[1] * v[1])));
#endif

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 9
- 20
src/layer/vulkan/shader/hardsigmoid.comp View File

@@ -1,4 +1,4 @@
// Copyright 2018 Tencent
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450
@@ -7,37 +7,26 @@ layout (constant_id = 0) const float alpha = 0;
layout (constant_id = 1) const float beta = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = shape_constant_id_offset + 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 43
src/layer/vulkan/shader/hardsigmoid_pack4.comp View File

@@ -1,43 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;
layout (constant_id = 1) const float beta = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 44
src/layer/vulkan/shader/hardsigmoid_pack8.comp View File

@@ -1,44 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;
layout (constant_id = 1) const float beta = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = clamp(v[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f));
v[1] = clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 8
- 20
src/layer/vulkan/shader/hardswish.comp View File

@@ -1,4 +1,4 @@
// Copyright 2018 Tencent
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450
@@ -7,37 +7,25 @@ layout (constant_id = 0) const float alpha = 0;
layout (constant_id = 1) const float beta = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = shape_constant_id_offset + 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 43
src/layer/vulkan/shader/hardswish_pack4.comp View File

@@ -1,43 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;
layout (constant_id = 1) const float beta = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = v * clamp(v * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 44
src/layer/vulkan/shader/hardswish_pack8.comp View File

@@ -1,44 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float alpha = 0;
layout (constant_id = 1) const float beta = 0;

#define shape_constant_id_offset 2
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = v[0] * clamp(v[0] * afp(alpha) + afp(beta), afp(0.f), afp(1.f));
v[1] = v[1] * clamp(v[1] * afp(alpha) + afp(beta), afp(0.f), afp(1.f));

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 11
- 23
src/layer/vulkan/shader/mish.comp View File

@@ -1,44 +1,32 @@
// Copyright 2020 Tencent
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

#if NCNN_moltenvk
v = v * afp(tanh(float(log(exp(v) + afp(1.f)))));
v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f)))));
#else
v = v * tanh(log(exp(v) + afp(1.f)));
v = v * tanh(log(exp(v) + afpvec4(1.f)));
#endif

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 44
src/layer/vulkan/shader/mish_pack4.comp View File

@@ -1,44 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

#if NCNN_moltenvk
v = v * afpvec4(tanh(vec4(log(exp(v) + afpvec4(1.f)))));
#else
v = v * tanh(log(exp(v) + afpvec4(1.f)));
#endif

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 46
src/layer/vulkan/shader/mish_pack8.comp View File

@@ -1,46 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

#if NCNN_moltenvk
v[0] = v[0] * afpvec4(tanh(vec4(log(exp(v[0]) + afpvec4(1.f)))));
v[1] = v[1] * afpvec4(tanh(vec4(log(exp(v[1]) + afpvec4(1.f)))));
#else
v[0] = v[0] * tanh(log(exp(v[0]) + afpvec4(1.f)));
v[1] = v[1] * tanh(log(exp(v[1]) + afpvec4(1.f)));
#endif

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 10
- 21
src/layer/vulkan/shader/relu.comp View File

@@ -1,4 +1,4 @@
// Copyright 2018 Tencent
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450
@@ -6,40 +6,29 @@
layout (constant_id = 0) const float slope = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = shape_constant_id_offset + 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

if (slope == 0)
v = max(v, afp(0.f));
else
v = v < afp(0.f) ? v * afp(slope) : v;
v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f)));

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 45
src/layer/vulkan/shader/relu_pack4.comp View File

@@ -1,45 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float slope = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

if (slope == 0)
v = max(v, afp(0.f));
else
v = mix(v, v * afp(slope), lessThan(v, afpvec4(0.f)));

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 51
src/layer/vulkan/shader/relu_pack8.comp View File

@@ -1,51 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

layout (constant_id = 0) const float slope = 0;

#define shape_constant_id_offset 1
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

if (slope == 0)
{
v[0] = max(v[0], afp(0.f));
v[1] = max(v[1], afp(0.f));
}
else
{
v[0] = mix(v[0], v[0] * afp(slope), lessThan(v[0], afpvec4(0.f)));
v[1] = mix(v[1], v[1] * afp(slope), lessThan(v[1], afpvec4(0.f)));
}

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 9
- 22
src/layer/vulkan/shader/swish.comp View File

@@ -1,40 +1,27 @@
// Copyright 2020 Tencent
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };
layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

afp v = buffer_ld1(bottom_top_blob_data, gi);
v = v / (afpvec4(1.f) + exp(-v));

v = v / (afp(1.f) + exp(-v));

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 40
src/layer/vulkan/shader/swish_pack4.comp View File

@@ -1,40 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

v = v / (afpvec4(1.f) + exp(-v));

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 41
src/layer/vulkan/shader/swish_pack8.comp View File

@@ -1,41 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

v[0] = v[0] / (afpvec4(1.f) + exp(-v[0]));
v[1] = v[1] / (afpvec4(1.f) + exp(-v[1]));

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 9
- 21
src/layer/vulkan/shader/tanh.comp View File

@@ -3,42 +3,30 @@

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
layout (constant_id = 0) const uint n = 0;

layout (binding = 0) buffer bottom_top_blob { sfp bottom_top_blob_data[]; };

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
uint n;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
uint gi = gl_GlobalInvocationID.x;

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
if (gi >= psc(n))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afp v = buffer_ld1(bottom_top_blob_data, gi);
afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

#if NCNN_moltenvk
v = afp(tanh(float(v)));
v = afpvec4(tanh(vec4(v)));
#else
v = tanh(v);
#endif

buffer_st1(bottom_top_blob_data, gi, v);
buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 44
src/layer/vulkan/shader/tanh_pack4.comp View File

@@ -1,44 +0,0 @@
// Copyright 2019 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec4 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec4 v = buffer_ld4(bottom_top_blob_data, gi);

#if NCNN_moltenvk
v = afpvec4(tanh(vec4(v)));
#else
v = tanh(v);
#endif

buffer_st4(bottom_top_blob_data, gi, v);
}

+ 0
- 46
src/layer/vulkan/shader/tanh_pack8.comp View File

@@ -1,46 +0,0 @@
// Copyright 2020 Tencent
// SPDX-License-Identifier: BSD-3-Clause

#version 450

#define shape_constant_id_offset 0
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;

layout (binding = 0) buffer bottom_top_blob { sfpvec8 bottom_top_blob_data[]; };

layout (push_constant) uniform parameter
{
int dims;
int w;
int h;
int c;
int cstep;
} p;

void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= psc(w) || gy >= psc(h) || gz >= psc(c))
return;

const int gi = gz * psc(cstep) + gy * psc(w) + gx;

afpvec8 v = buffer_ld8(bottom_top_blob_data, gi);

#if NCNN_moltenvk
v[0] = afpvec4(tanh(vec4(v[0])));
v[1] = afpvec4(tanh(vec4(v[1])));
#else
v[0] = tanh(v[0]);
v[1] = tanh(v[1]);
#endif

buffer_st8(bottom_top_blob_data, gi, v);
}

+ 14
- 75
src/layer/vulkan/swish_vulkan.cpp View File

@@ -12,8 +12,6 @@ Swish_vulkan::Swish_vulkan()
support_vulkan = true;

pipeline_swish = 0;
pipeline_swish_pack4 = 0;
pipeline_swish_pack8 = 0;
}

int Swish_vulkan::create_pipeline(const Option& opt)
@@ -41,62 +39,14 @@ int Swish_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(0 + 5);
specializations[0 + 0].i = shape_packed.dims;
specializations[0 + 1].i = shape_packed.w;
specializations[0 + 2].i = shape_packed.h * shape_packed.d;
specializations[0 + 3].i = shape_packed.c;
specializations[0 + 4].i = shape_packed.cstep;
std::vector<vk_specialization_type> specializations(1);
specializations[0 + 0].u32 = shape_packed.total() * elempack / 4;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_swish = new Pipeline(vkdev);
pipeline_swish->set_optimal_local_size_xyz(local_size_xyz);
pipeline_swish->create(LayerShaderType::swish, opt, specializations);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_swish_pack4 = new Pipeline(vkdev);
pipeline_swish_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_swish_pack4->create(LayerShaderType::swish_pack4, opt, specializations);
}

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_swish_pack8 = new Pipeline(vkdev);
pipeline_swish_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_swish_pack8->create(LayerShaderType::swish_pack8, opt, specializations);
}
pipeline_swish = new Pipeline(vkdev);
pipeline_swish->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_swish->create(LayerShaderType::swish, opt, specializations);

return 0;
}
@@ -106,36 +56,25 @@ int Swish_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_swish;
pipeline_swish = 0;

delete pipeline_swish_pack4;
pipeline_swish_pack4 = 0;

delete pipeline_swish_pack8;
pipeline_swish_pack8 = 0;

return 0;
}

int Swish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

const Pipeline* pipeline = elempack == 8 ? pipeline_swish_pack8
: elempack == 4 ? pipeline_swish_pack4
: pipeline_swish;
cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_swish, bindings, constants, dispatcher);

return 0;
}

} // namespace ncnn

+ 0
- 2
src/layer/vulkan/swish_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_swish;
Pipeline* pipeline_swish_pack4;
Pipeline* pipeline_swish_pack8;
};

} // namespace ncnn


+ 14
- 74
src/layer/vulkan/tanh_vulkan.cpp View File

@@ -12,8 +12,6 @@ TanH_vulkan::TanH_vulkan()
support_vulkan = true;

pipeline_tanh = 0;
pipeline_tanh_pack4 = 0;
pipeline_tanh_pack8 = 0;
}

int TanH_vulkan::create_pipeline(const Option& opt)
@@ -41,62 +39,14 @@ int TanH_vulkan::create_pipeline(const Option& opt)
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);

std::vector<vk_specialization_type> specializations(0 + 5);
specializations[0 + 0].i = shape_packed.dims;
specializations[0 + 1].i = shape_packed.w;
specializations[0 + 2].i = shape_packed.h * shape_packed.d;
specializations[0 + 3].i = shape_packed.c;
specializations[0 + 4].i = shape_packed.cstep;
std::vector<vk_specialization_type> specializations(1);
specializations[0 + 0].u32 = shape_packed.total() * elempack / 4;

Mat local_size_xyz;
if (shape_packed.dims == 1)
{
local_size_xyz.w = std::min(64, shape_packed.w);
local_size_xyz.h = 1;
local_size_xyz.c = 1;
}
if (shape_packed.dims == 2)
{
local_size_xyz.w = std::min(8, shape_packed.w);
local_size_xyz.h = std::min(8, shape_packed.h);
local_size_xyz.c = 1;
}
if (shape_packed.dims == 3)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h);
local_size_xyz.c = std::min(4, shape_packed.c);
}
if (shape_packed.dims == 4)
{
local_size_xyz.w = std::min(4, shape_packed.w);
local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
local_size_xyz.c = std::min(4, shape_packed.c);
}
const int local_size_x = vkdev->info.subgroup_size();

// pack1
if (shape.dims == 0 || elempack == 1)
{
pipeline_tanh = new Pipeline(vkdev);
pipeline_tanh->set_optimal_local_size_xyz(local_size_xyz);
pipeline_tanh->create(LayerShaderType::tanh, opt, specializations);
}

// pack4
if (shape.dims == 0 || elempack == 4)
{
pipeline_tanh_pack4 = new Pipeline(vkdev);
pipeline_tanh_pack4->set_optimal_local_size_xyz(local_size_xyz);
pipeline_tanh_pack4->create(LayerShaderType::tanh_pack4, opt, specializations);
}

// pack8
if ((opt.use_shader_pack8 && shape.dims == 0) || elempack == 8)
{
pipeline_tanh_pack8 = new Pipeline(vkdev);
pipeline_tanh_pack8->set_optimal_local_size_xyz(local_size_xyz);
pipeline_tanh_pack8->create(LayerShaderType::tanh_pack8, opt, specializations);
}
pipeline_tanh = new Pipeline(vkdev);
pipeline_tanh->set_optimal_local_size_xyz(local_size_x, 1, 1);
pipeline_tanh->create(LayerShaderType::tanh, opt, specializations);

return 0;
}
@@ -106,34 +56,24 @@ int TanH_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_tanh;
pipeline_tanh = 0;

delete pipeline_tanh_pack4;
pipeline_tanh_pack4 = 0;

delete pipeline_tanh_pack8;
pipeline_tanh_pack8 = 0;

return 0;
}

int TanH_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
{
int elempack = bottom_top_blob.elempack;
const size_t n = bottom_top_blob.total() * bottom_top_blob.elempack / 4;

std::vector<VkMat> bindings(1);
bindings[0] = bottom_top_blob;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
constants[1].i = bottom_top_blob.w;
constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 8 ? pipeline_tanh_pack8
: elempack == 4 ? pipeline_tanh_pack4
: pipeline_tanh;
std::vector<vk_constant_type> constants(1);
constants[0].u32 = n;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
VkMat dispatcher;
dispatcher.w = n;
dispatcher.h = 1;
dispatcher.c = 1;
cmd.record_pipeline(pipeline_tanh, bindings, constants, dispatcher);

return 0;
}


+ 0
- 2
src/layer/vulkan/tanh_vulkan.h View File

@@ -21,8 +21,6 @@ public:

public:
Pipeline* pipeline_tanh;
Pipeline* pipeline_tanh_pack4;
Pipeline* pipeline_tanh_pack8;
};

} // namespace ncnn


Loading…
Cancel
Save