Browse Source

enable pack8 shaders for layers without shape changes

tags/20200226
nihui 6 years ago
parent
commit
a9d2cb8d6b
32 changed files with 442 additions and 86 deletions
  1. +14
    -1
      src/layer/vulkan/absval_vulkan.cpp
  2. +1
    -0
      src/layer/vulkan/absval_vulkan.h
  3. +28
    -2
      src/layer/vulkan/cast_vulkan.cpp
  4. +2
    -0
      src/layer/vulkan/cast_vulkan.h
  5. +14
    -1
      src/layer/vulkan/clip_vulkan.cpp
  6. +1
    -0
      src/layer/vulkan/clip_vulkan.h
  7. +14
    -1
      src/layer/vulkan/dropout_vulkan.cpp
  8. +1
    -0
      src/layer/vulkan/dropout_vulkan.h
  9. +23
    -2
      src/layer/vulkan/eltwise_vulkan.cpp
  10. +1
    -0
      src/layer/vulkan/eltwise_vulkan.h
  11. +14
    -1
      src/layer/vulkan/hardsigmoid_vulkan.cpp
  12. +1
    -0
      src/layer/vulkan/hardsigmoid_vulkan.h
  13. +14
    -1
      src/layer/vulkan/hardswish_vulkan.cpp
  14. +1
    -0
      src/layer/vulkan/hardswish_vulkan.h
  15. +56
    -2
      src/layer/vulkan/lrn_vulkan.cpp
  16. +4
    -0
      src/layer/vulkan/lrn_vulkan.h
  17. +53
    -15
      src/layer/vulkan/normalize_vulkan.cpp
  18. +5
    -1
      src/layer/vulkan/normalize_vulkan.h
  19. +20
    -17
      src/layer/vulkan/padding_vulkan.cpp
  20. +1
    -2
      src/layer/vulkan/padding_vulkan.h
  21. +59
    -31
      src/layer/vulkan/pooling_vulkan.cpp
  22. +3
    -1
      src/layer/vulkan/pooling_vulkan.h
  23. +14
    -1
      src/layer/vulkan/relu_vulkan.cpp
  24. +1
    -0
      src/layer/vulkan/relu_vulkan.h
  25. +14
    -1
      src/layer/vulkan/sigmoid_vulkan.cpp
  26. +1
    -0
      src/layer/vulkan/sigmoid_vulkan.h
  27. +47
    -4
      src/layer/vulkan/softmax_vulkan.cpp
  28. +5
    -0
      src/layer/vulkan/softmax_vulkan.h
  29. +14
    -1
      src/layer/vulkan/tanh_vulkan.cpp
  30. +1
    -0
      src/layer/vulkan/tanh_vulkan.h
  31. +14
    -1
      src/layer/vulkan/unaryop_vulkan.cpp
  32. +1
    -0
      src/layer/vulkan/unaryop_vulkan.h

+ 14
- 1
src/layer/vulkan/absval_vulkan.cpp View File

@@ -24,6 +24,7 @@ AbsVal_vulkan::AbsVal_vulkan()

pipeline_absval = 0;
pipeline_absval_pack4 = 0;
pipeline_absval_pack8 = 0;
}

int AbsVal_vulkan::create_pipeline(const Option& opt)
@@ -44,6 +45,13 @@ int AbsVal_vulkan::create_pipeline(const Option& opt)
pipeline_absval_pack4->create("absval_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_absval_pack8 = new Pipeline(vkdev);
pipeline_absval_pack8->set_optimal_local_size_xyz();
pipeline_absval_pack8->create("absval_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -55,6 +63,9 @@ int AbsVal_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_absval_pack4;
pipeline_absval_pack4 = 0;

delete pipeline_absval_pack8;
pipeline_absval_pack8 = 0;

return 0;
}

@@ -72,7 +83,9 @@ int AbsVal_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_absval_pack4 : pipeline_absval;
const Pipeline* pipeline = elempack == 8 ? pipeline_absval_pack8
: elempack == 4 ? pipeline_absval_pack4
: pipeline_absval;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/absval_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_absval;
Pipeline* pipeline_absval_pack4;
Pipeline* pipeline_absval_pack8;
};

} // namespace ncnn


+ 28
- 2
src/layer/vulkan/cast_vulkan.cpp View File

@@ -24,8 +24,10 @@ Cast_vulkan::Cast_vulkan()

pipeline_cast_fp32_to_fp16 = 0;
pipeline_cast_fp32_to_fp16_pack4 = 0;
pipeline_cast_fp32_to_fp16_pack8 = 0;
pipeline_cast_fp16_to_fp32 = 0;
pipeline_cast_fp16_to_fp32_pack4 = 0;
pipeline_cast_fp16_to_fp32_pack8 = 0;
}

int Cast_vulkan::create_pipeline(const Option& opt)
@@ -47,6 +49,13 @@ int Cast_vulkan::create_pipeline(const Option& opt)
pipeline_cast_fp32_to_fp16_pack4->set_optimal_local_size_xyz();
pipeline_cast_fp32_to_fp16_pack4->create("cast_fp32_to_fp16_pack4", opt, specializations, 2, 10);
}

// pack8
{
pipeline_cast_fp32_to_fp16_pack8 = new Pipeline(vkdev);
pipeline_cast_fp32_to_fp16_pack8->set_optimal_local_size_xyz();
pipeline_cast_fp32_to_fp16_pack8->create("cast_fp32_to_fp16_pack8", opt, specializations, 2, 10);
}
}

if (type_from == 2 && type_to == 1)
@@ -64,6 +73,13 @@ int Cast_vulkan::create_pipeline(const Option& opt)
pipeline_cast_fp16_to_fp32_pack4->set_optimal_local_size_xyz();
pipeline_cast_fp16_to_fp32_pack4->create("cast_fp16_to_fp32_pack4", opt, specializations, 2, 10);
}

// pack8
{
pipeline_cast_fp16_to_fp32_pack8 = new Pipeline(vkdev);
pipeline_cast_fp16_to_fp32_pack8->set_optimal_local_size_xyz();
pipeline_cast_fp16_to_fp32_pack8->create("cast_fp16_to_fp32_pack8", opt, specializations, 2, 10);
}
}

return 0;
@@ -77,12 +93,18 @@ int Cast_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_cast_fp32_to_fp16_pack4;
pipeline_cast_fp32_to_fp16_pack4 = 0;

delete pipeline_cast_fp32_to_fp16_pack8;
pipeline_cast_fp32_to_fp16_pack8 = 0;

delete pipeline_cast_fp16_to_fp32;
pipeline_cast_fp16_to_fp32 = 0;

delete pipeline_cast_fp16_to_fp32_pack4;
pipeline_cast_fp16_to_fp32_pack4 = 0;

delete pipeline_cast_fp16_to_fp32_pack8;
pipeline_cast_fp16_to_fp32_pack8 = 0;

return 0;
}

@@ -159,11 +181,15 @@ int Cast_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& c

if (type_from == 1 && type_to == 2)
{
pipeline = elempack == 4 ? pipeline_cast_fp32_to_fp16_pack4 : pipeline_cast_fp32_to_fp16;
pipeline = elempack == 8 ? pipeline_cast_fp32_to_fp16_pack8
: elempack == 4 ? pipeline_cast_fp32_to_fp16_pack4
: pipeline_cast_fp32_to_fp16;
}
if (type_from == 2 && type_to == 1)
{
pipeline = elempack == 4 ? pipeline_cast_fp16_to_fp32_pack4 : pipeline_cast_fp16_to_fp32;
pipeline = elempack == 8 ? pipeline_cast_fp16_to_fp32_pack8
: elempack == 4 ? pipeline_cast_fp16_to_fp32_pack4
: pipeline_cast_fp16_to_fp32;
}

// TODO more cast type


+ 2
- 0
src/layer/vulkan/cast_vulkan.h View File

@@ -33,8 +33,10 @@ public:
public:
Pipeline* pipeline_cast_fp32_to_fp16;
Pipeline* pipeline_cast_fp32_to_fp16_pack4;
Pipeline* pipeline_cast_fp32_to_fp16_pack8;
Pipeline* pipeline_cast_fp16_to_fp32;
Pipeline* pipeline_cast_fp16_to_fp32_pack4;
Pipeline* pipeline_cast_fp16_to_fp32_pack8;
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/clip_vulkan.cpp View File

@@ -24,6 +24,7 @@ Clip_vulkan::Clip_vulkan()

pipeline_clip = 0;
pipeline_clip_pack4 = 0;
pipeline_clip_pack8 = 0;
}

int Clip_vulkan::create_pipeline(const Option& opt)
@@ -46,6 +47,13 @@ int Clip_vulkan::create_pipeline(const Option& opt)
pipeline_clip_pack4->create("clip_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_clip_pack8 = new Pipeline(vkdev);
pipeline_clip_pack8->set_optimal_local_size_xyz();
pipeline_clip_pack8->create("clip_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -57,6 +65,9 @@ int Clip_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_clip_pack4;
pipeline_clip_pack4 = 0;

delete pipeline_clip_pack8;
pipeline_clip_pack8 = 0;

return 0;
}

@@ -74,7 +85,9 @@ int Clip_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_clip_pack4 : pipeline_clip;
const Pipeline* pipeline = elempack == 8 ? pipeline_clip_pack8
: elempack == 4 ? pipeline_clip_pack4
: pipeline_clip;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/clip_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_clip;
Pipeline* pipeline_clip_pack4;
Pipeline* pipeline_clip_pack8;
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/dropout_vulkan.cpp View File

@@ -25,6 +25,7 @@ Dropout_vulkan::Dropout_vulkan()

pipeline_dropout = 0;
pipeline_dropout_pack4 = 0;
pipeline_dropout_pack8 = 0;
}

int Dropout_vulkan::create_pipeline(const Option& opt)
@@ -46,6 +47,13 @@ int Dropout_vulkan::create_pipeline(const Option& opt)
pipeline_dropout_pack4->create("dropout_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_dropout_pack8 = new Pipeline(vkdev);
pipeline_dropout_pack8->set_optimal_local_size_xyz();
pipeline_dropout_pack8->create("dropout_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -57,6 +65,9 @@ int Dropout_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_dropout_pack4;
pipeline_dropout_pack4 = 0;

delete pipeline_dropout_pack8;
pipeline_dropout_pack8 = 0;

return 0;
}

@@ -79,7 +90,9 @@ int Dropout_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_dropout_pack4 : pipeline_dropout;
const Pipeline* pipeline = elempack == 8 ? pipeline_dropout_pack8
: elempack == 4 ? pipeline_dropout_pack4
: pipeline_dropout;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/dropout_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_dropout;
Pipeline* pipeline_dropout_pack4;
Pipeline* pipeline_dropout_pack8;
};

} // namespace ncnn


+ 23
- 2
src/layer/vulkan/eltwise_vulkan.cpp View File

@@ -27,6 +27,8 @@ Eltwise_vulkan::Eltwise_vulkan()
pipeline_eltwise[1] = 0;
pipeline_eltwise_pack4[0] = 0;
pipeline_eltwise_pack4[1] = 0;
pipeline_eltwise_pack8[0] = 0;
pipeline_eltwise_pack8[1] = 0;
}

int Eltwise_vulkan::create_pipeline(const Option& opt)
@@ -55,6 +57,16 @@ int Eltwise_vulkan::create_pipeline(const Option& opt)
pipeline_eltwise_pack4[1]->create("eltwise_pack4", opt, specializations, 3, 5+2);
}

// pack8
{
pipeline_eltwise_pack8[0] = new Pipeline(vkdev);
pipeline_eltwise_pack8[0]->set_optimal_local_size_xyz();
pipeline_eltwise_pack8[0]->create("eltwise_pack8", opt, specializations, 3, 5+2);
pipeline_eltwise_pack8[1] = new Pipeline(vkdev);
pipeline_eltwise_pack8[1]->set_optimal_local_size_xyz();
pipeline_eltwise_pack8[1]->create("eltwise_pack8", opt, specializations, 3, 5+2);
}

return 0;
}

@@ -70,6 +82,11 @@ int Eltwise_vulkan::destroy_pipeline(const Option& /*opt*/)
pipeline_eltwise_pack4[0] = 0;
pipeline_eltwise_pack4[1] = 0;

delete pipeline_eltwise_pack8[0];
delete pipeline_eltwise_pack8[1];
pipeline_eltwise_pack8[0] = 0;
pipeline_eltwise_pack8[1] = 0;

return 0;
}

@@ -103,7 +120,9 @@ int Eltwise_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<
constants[5].f = coeffs.w == 0 ? 1.f : coeffs[0];
constants[6].f = coeffs.w == 0 ? 1.f : coeffs[1];

const Pipeline* pipeline = elempack == 4 ? pipeline_eltwise_pack4[1] : pipeline_eltwise[1];
const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[1]
: elempack == 4 ? pipeline_eltwise_pack4[1]
: pipeline_eltwise[1];

cmd.record_pipeline(pipeline, bindings, constants, top_blob);

@@ -123,7 +142,9 @@ int Eltwise_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<
constants[5].f = 1.f;
constants[6].f = coeffs.w == 0 ? 1 : coeffs[b];

const Pipeline* pipeline = elempack == 4 ? pipeline_eltwise_pack4[b%2] : pipeline_eltwise[b%2];
const Pipeline* pipeline = elempack == 8 ? pipeline_eltwise_pack8[b%2]
: elempack == 4 ? pipeline_eltwise_pack4[b%2]
: pipeline_eltwise[b%2];

cmd.record_pipeline(pipeline, bindings, constants, top_blob);
}


+ 1
- 0
src/layer/vulkan/eltwise_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_eltwise[2];
Pipeline* pipeline_eltwise_pack4[2];
Pipeline* pipeline_eltwise_pack8[2];
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/hardsigmoid_vulkan.cpp View File

@@ -24,6 +24,7 @@ HardSigmoid_vulkan::HardSigmoid_vulkan()

pipeline_hardsigmoid = 0;
pipeline_hardsigmoid_pack4 = 0;
pipeline_hardsigmoid_pack8 = 0;
}

int HardSigmoid_vulkan::create_pipeline(const Option& opt)
@@ -46,6 +47,13 @@ int HardSigmoid_vulkan::create_pipeline(const Option& opt)
pipeline_hardsigmoid_pack4->create("hardsigmoid_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_hardsigmoid_pack8 = new Pipeline(vkdev);
pipeline_hardsigmoid_pack8->set_optimal_local_size_xyz();
pipeline_hardsigmoid_pack8->create("hardsigmoid_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -57,6 +65,9 @@ int HardSigmoid_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_hardsigmoid_pack4;
pipeline_hardsigmoid_pack4 = 0;

delete pipeline_hardsigmoid_pack8;
pipeline_hardsigmoid_pack8 = 0;

return 0;
}

@@ -74,7 +85,9 @@ int HardSigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd,
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_hardsigmoid_pack4 : pipeline_hardsigmoid;
const Pipeline* pipeline = elempack == 8 ? pipeline_hardsigmoid_pack8
: elempack == 4 ? pipeline_hardsigmoid_pack4
: pipeline_hardsigmoid;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/hardsigmoid_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_hardsigmoid;
Pipeline* pipeline_hardsigmoid_pack4;
Pipeline* pipeline_hardsigmoid_pack8;
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/hardswish_vulkan.cpp View File

@@ -24,6 +24,7 @@ HardSwish_vulkan::HardSwish_vulkan()

pipeline_hardswish = 0;
pipeline_hardswish_pack4 = 0;
pipeline_hardswish_pack8 = 0;
}

int HardSwish_vulkan::create_pipeline(const Option& opt)
@@ -46,6 +47,13 @@ int HardSwish_vulkan::create_pipeline(const Option& opt)
pipeline_hardswish_pack4->create("hardswish_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_hardswish_pack8 = new Pipeline(vkdev);
pipeline_hardswish_pack8->set_optimal_local_size_xyz();
pipeline_hardswish_pack8->create("hardswish_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -57,6 +65,9 @@ int HardSwish_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_hardswish_pack4;
pipeline_hardswish_pack4 = 0;

delete pipeline_hardswish_pack8;
pipeline_hardswish_pack8 = 0;

return 0;
}

@@ -74,7 +85,9 @@ int HardSwish_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_hardswish_pack4 : pipeline_hardswish;
const Pipeline* pipeline = elempack == 8 ? pipeline_hardswish_pack8
: elempack == 4 ? pipeline_hardswish_pack4
: pipeline_hardswish;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/hardswish_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_hardswish;
Pipeline* pipeline_hardswish_pack4;
Pipeline* pipeline_hardswish_pack8;
};

} // namespace ncnn


+ 56
- 2
src/layer/vulkan/lrn_vulkan.cpp View File

@@ -29,6 +29,10 @@ LRN_vulkan::LRN_vulkan()
pipeline_lrn_norm_across_channel_pack4 = 0;
pipeline_lrn_square_pad_within_channel_pack4 = 0;
pipeline_lrn_norm_within_channel_pack4 = 0;
pipeline_lrn_square_pad_across_channel_pack8 = 0;
pipeline_lrn_norm_across_channel_pack8 = 0;
pipeline_lrn_square_pad_within_channel_pack8 = 0;
pipeline_lrn_norm_within_channel_pack8 = 0;
}

int LRN_vulkan::create_pipeline(const Option& opt)
@@ -67,6 +71,20 @@ int LRN_vulkan::create_pipeline(const Option& opt)
pipeline_lrn_square_pad_within_channel_pack4->set_optimal_local_size_xyz();
pipeline_lrn_square_pad_within_channel_pack4->create("lrn_square_pad_within_channel_pack4", opt, specializations, 2, 10);
}

// pack8
if (region_type == 0)
{
pipeline_lrn_square_pad_across_channel_pack8 = new Pipeline(vkdev);
pipeline_lrn_square_pad_across_channel_pack8->set_optimal_local_size_xyz();
pipeline_lrn_square_pad_across_channel_pack8->create("lrn_square_pad_across_channel_pack8", opt, specializations, 2, 10);
}
if (region_type == 1)
{
pipeline_lrn_square_pad_within_channel_pack8 = new Pipeline(vkdev);
pipeline_lrn_square_pad_within_channel_pack8->set_optimal_local_size_xyz();
pipeline_lrn_square_pad_within_channel_pack8->create("lrn_square_pad_within_channel_pack8", opt, specializations, 2, 10);
}
}

{
@@ -95,6 +113,20 @@ int LRN_vulkan::create_pipeline(const Option& opt)
pipeline_lrn_norm_within_channel_pack4->set_optimal_local_size_xyz();
pipeline_lrn_norm_within_channel_pack4->create("lrn_norm_within_channel_pack4", opt, specializations, 2, 10);
}

// pack8
if (region_type == 0)
{
pipeline_lrn_norm_across_channel_pack8 = new Pipeline(vkdev);
pipeline_lrn_norm_across_channel_pack8->set_optimal_local_size_xyz();
pipeline_lrn_norm_across_channel_pack8->create("lrn_norm_across_channel_pack8", opt, specializations, 2, 10);
}
if (region_type == 1)
{
pipeline_lrn_norm_within_channel_pack8 = new Pipeline(vkdev);
pipeline_lrn_norm_within_channel_pack8->set_optimal_local_size_xyz();
pipeline_lrn_norm_within_channel_pack8->create("lrn_norm_within_channel_pack8", opt, specializations, 2, 10);
}
}

return 0;
@@ -120,6 +152,18 @@ int LRN_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_lrn_norm_within_channel_pack4;
pipeline_lrn_norm_within_channel_pack4 = 0;

delete pipeline_lrn_square_pad_across_channel_pack8;
pipeline_lrn_square_pad_across_channel_pack8 = 0;

delete pipeline_lrn_norm_across_channel_pack8;
pipeline_lrn_norm_across_channel_pack8 = 0;

delete pipeline_lrn_square_pad_within_channel_pack8;
pipeline_lrn_square_pad_within_channel_pack8 = 0;

delete pipeline_lrn_norm_within_channel_pack8;
pipeline_lrn_norm_within_channel_pack8 = 0;

return 0;
}

@@ -167,7 +211,12 @@ int LRN_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Op
constants[9].i = square_workspace.cstep;

const Pipeline* pipeline = 0;
if (elempack == 4)
if (elempack == 8)
{
if (region_type == 0) pipeline = pipeline_lrn_square_pad_across_channel_pack8;
if (region_type == 1) pipeline = pipeline_lrn_square_pad_within_channel_pack8;
}
else if (elempack == 4)
{
if (region_type == 0) pipeline = pipeline_lrn_square_pad_across_channel_pack4;
if (region_type == 1) pipeline = pipeline_lrn_square_pad_within_channel_pack4;
@@ -199,7 +248,12 @@ int LRN_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Op
constants[9].i = bottom_top_blob.cstep;

const Pipeline* pipeline = 0;
if (elempack == 4)
if (elempack == 8)
{
if (region_type == 0) pipeline = pipeline_lrn_norm_across_channel_pack8;
if (region_type == 1) pipeline = pipeline_lrn_norm_within_channel_pack8;
}
else if (elempack == 4)
{
if (region_type == 0) pipeline = pipeline_lrn_norm_across_channel_pack4;
if (region_type == 1) pipeline = pipeline_lrn_norm_within_channel_pack4;


+ 4
- 0
src/layer/vulkan/lrn_vulkan.h View File

@@ -37,6 +37,10 @@ public:
Pipeline* pipeline_lrn_norm_across_channel_pack4;
Pipeline* pipeline_lrn_square_pad_within_channel_pack4;
Pipeline* pipeline_lrn_norm_within_channel_pack4;
Pipeline* pipeline_lrn_square_pad_across_channel_pack8;
Pipeline* pipeline_lrn_norm_across_channel_pack8;
Pipeline* pipeline_lrn_square_pad_within_channel_pack8;
Pipeline* pipeline_lrn_norm_within_channel_pack8;
};

} // namespace ncnn


+ 53
- 15
src/layer/vulkan/normalize_vulkan.cpp View File

@@ -33,6 +33,12 @@ Normalize_vulkan::Normalize_vulkan()
pipeline_normalize_reduce_sum4_fp32_pack4[1] = 0;
pipeline_normalize_coeffs_pack4 = 0;
pipeline_normalize_norm_pack4 = 0;

pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = 0;
pipeline_normalize_reduce_sum4_fp32_pack8[0] = 0;
pipeline_normalize_reduce_sum4_fp32_pack8[1] = 0;
pipeline_normalize_coeffs_pack8 = 0;
pipeline_normalize_norm_pack8 = 0;
}

int Normalize_vulkan::create_pipeline(const Option& opt)
@@ -65,6 +71,18 @@ int Normalize_vulkan::create_pipeline(const Option& opt)
pipeline_normalize_reduce_sum4_fp32_pack4[1] = new Pipeline(vkdev);
pipeline_normalize_reduce_sum4_fp32_pack4[1]->set_optimal_local_size_xyz();
pipeline_normalize_reduce_sum4_fp32_pack4[1]->create("normalize_reduce_sum4_fp32_pack4", opt, specializations, 2, 6);

// pack8
pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = new Pipeline(vkdev);
pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8->set_optimal_local_size_xyz();
pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8->create("normalize_reduce_sum4_fp16_to_fp32_pack8", opt, specializations, 2, 6);

pipeline_normalize_reduce_sum4_fp32_pack8[0] = new Pipeline(vkdev);
pipeline_normalize_reduce_sum4_fp32_pack8[0]->set_optimal_local_size_xyz();
pipeline_normalize_reduce_sum4_fp32_pack8[0]->create("normalize_reduce_sum4_fp32_pack8", opt, specializations, 2, 6);
pipeline_normalize_reduce_sum4_fp32_pack8[1] = new Pipeline(vkdev);
pipeline_normalize_reduce_sum4_fp32_pack8[1]->set_optimal_local_size_xyz();
pipeline_normalize_reduce_sum4_fp32_pack8[1]->create("normalize_reduce_sum4_fp32_pack8", opt, specializations, 2, 6);
}

{
@@ -79,6 +97,10 @@ int Normalize_vulkan::create_pipeline(const Option& opt)
pipeline_normalize_coeffs_pack4 = new Pipeline(vkdev);
pipeline_normalize_coeffs_pack4->set_optimal_local_size_xyz();
pipeline_normalize_coeffs_pack4->create("normalize_coeffs_pack4", opt, specializations, 2, 3);

pipeline_normalize_coeffs_pack8 = new Pipeline(vkdev);
pipeline_normalize_coeffs_pack8->set_optimal_local_size_xyz();
pipeline_normalize_coeffs_pack8->create("normalize_coeffs_pack8", opt, specializations, 2, 3);
}

{
@@ -95,6 +117,10 @@ int Normalize_vulkan::create_pipeline(const Option& opt)
pipeline_normalize_norm_pack4 = new Pipeline(vkdev);
pipeline_normalize_norm_pack4->set_optimal_local_size_xyz();
pipeline_normalize_norm_pack4->create("normalize_norm_pack4", opt, specializations, 3, 5);

pipeline_normalize_norm_pack8 = new Pipeline(vkdev);
pipeline_normalize_norm_pack8->set_optimal_local_size_xyz();
pipeline_normalize_norm_pack8->create("normalize_norm_pack8", opt, specializations, 3, 5);
}

return 0;
@@ -118,18 +144,32 @@ int Normalize_vulkan::destroy_pipeline(const Option& /*opt*/)
pipeline_normalize_reduce_sum4_fp32_pack4[0] = 0;
pipeline_normalize_reduce_sum4_fp32_pack4[1] = 0;

delete pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8;
pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8 = 0;

delete pipeline_normalize_reduce_sum4_fp32_pack8[0];
delete pipeline_normalize_reduce_sum4_fp32_pack8[1];
pipeline_normalize_reduce_sum4_fp32_pack8[0] = 0;
pipeline_normalize_reduce_sum4_fp32_pack8[1] = 0;

delete pipeline_normalize_coeffs;
pipeline_normalize_coeffs = 0;

delete pipeline_normalize_coeffs_pack4;
pipeline_normalize_coeffs_pack4 = 0;

delete pipeline_normalize_coeffs_pack8;
pipeline_normalize_coeffs_pack8 = 0;

delete pipeline_normalize_norm;
pipeline_normalize_norm = 0;

delete pipeline_normalize_norm_pack4;
pipeline_normalize_norm_pack4 = 0;

delete pipeline_normalize_norm_pack8;
pipeline_normalize_norm_pack8 = 0;

return 0;
}

@@ -141,16 +181,6 @@ int Normalize_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
Mat scale_data4(4);
scale_data4.fill(scale_data[0]);
cmd.record_upload(scale_data4, scale_data_gpu, opt);

Mat scale_data_pack4;
convert_packing(scale_data4, scale_data_pack4, 4);
cmd.record_upload(scale_data_pack4, scale_data_gpu_pack4, opt);
}
else if (scale_data_size % 4 == 0)
{
Mat scale_data_pack4;
convert_packing(scale_data, scale_data_pack4, 4);
cmd.record_upload(scale_data_pack4, scale_data_gpu_pack4, opt);
}
else
{
@@ -209,7 +239,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
constants[4].i = sqsum_workspace.c;
constants[5].i = sqsum_workspace.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4 : pipeline_normalize_reduce_sum4_fp16_to_fp32;
const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8
: elempack == 4 ? pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4
: pipeline_normalize_reduce_sum4_fp16_to_fp32;

cmd.record_pipeline(pipeline, bindings, constants, sqsum_workspace);
}
@@ -257,7 +289,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
constants[4].i = sqsum_workspace_reduced.c;
constants[5].i = sqsum_workspace_reduced.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_reduce_sum4_fp32_pack4[pb%2] : pipeline_normalize_reduce_sum4_fp32[pb%2];
const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_reduce_sum4_fp32_pack8[pb%2]
: elempack == 4 ? pipeline_normalize_reduce_sum4_fp32_pack4[pb%2]
: pipeline_normalize_reduce_sum4_fp32[pb%2];

cmd.record_pipeline(pipeline, bindings, constants, sqsum_workspace_reduced);

@@ -281,7 +315,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
constants[1].i = sqsum_workspace.c;
constants[2].i = sqsum_workspace.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_coeffs_pack4 : pipeline_normalize_coeffs;
const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_coeffs_pack8
: elempack == 4 ? pipeline_normalize_coeffs_pack4
: pipeline_normalize_coeffs;

cmd.record_pipeline(pipeline, bindings, constants, coeffs_workspace);
}
@@ -291,7 +327,7 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
std::vector<VkMat> bindings(3);
bindings[0] = bottom_top_blob;
bindings[1] = coeffs_workspace;
bindings[2] = (scale_data_size == 1 && scale_data[0] == 1.f) ? coeffs_workspace : elempack == 4 ? scale_data_gpu_pack4 : scale_data_gpu;
bindings[2] = (scale_data_size == 1 && scale_data[0] == 1.f) ? coeffs_workspace : scale_data_gpu;

std::vector<vk_constant_type> constants(5);
constants[0].i = bottom_top_blob.dims;
@@ -300,7 +336,9 @@ int Normalize_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, co
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_normalize_norm_pack4 : pipeline_normalize_norm;
const Pipeline* pipeline = elempack == 8 ? pipeline_normalize_norm_pack8
: elempack == 4 ? pipeline_normalize_norm_pack4
: pipeline_normalize_norm;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
}


+ 5
- 1
src/layer/vulkan/normalize_vulkan.h View File

@@ -39,11 +39,15 @@ public:
Pipeline* pipeline_normalize_coeffs;
Pipeline* pipeline_normalize_norm;

VkMat scale_data_gpu_pack4;
Pipeline* pipeline_normalize_reduce_sum4_fp16_to_fp32_pack4;
Pipeline* pipeline_normalize_reduce_sum4_fp32_pack4[2];
Pipeline* pipeline_normalize_coeffs_pack4;
Pipeline* pipeline_normalize_norm_pack4;

Pipeline* pipeline_normalize_reduce_sum4_fp16_to_fp32_pack8;
Pipeline* pipeline_normalize_reduce_sum4_fp32_pack8[2];
Pipeline* pipeline_normalize_coeffs_pack8;
Pipeline* pipeline_normalize_norm_pack8;
};

} // namespace ncnn


+ 20
- 17
src/layer/vulkan/padding_vulkan.cpp View File

@@ -24,6 +24,7 @@ Padding_vulkan::Padding_vulkan()

pipeline_padding = 0;
pipeline_padding_pack4 = 0;
pipeline_padding_pack8 = 0;
}

int Padding_vulkan::create_pipeline(const Option& opt)
@@ -47,6 +48,13 @@ int Padding_vulkan::create_pipeline(const Option& opt)
pipeline_padding_pack4->create("padding_pack4", opt, specializations, 3, 12);
}

// pack8
{
pipeline_padding_pack8 = new Pipeline(vkdev);
pipeline_padding_pack8->set_optimal_local_size_xyz();
pipeline_padding_pack8->create("padding_pack8", opt, specializations, 3, 12);
}

return 0;
}

@@ -58,6 +66,9 @@ int Padding_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_padding_pack4;
pipeline_padding_pack4 = 0;

delete pipeline_padding_pack8;
pipeline_padding_pack8 = 0;

return 0;
}

@@ -66,19 +77,7 @@ int Padding_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
if (per_channel_pad_data_size == 0)
return 0;

// pack1
if (per_channel_pad_data_size % 4 != 0)
{
cmd.record_upload(per_channel_pad_data, per_channel_pad_data_gpu, opt);
}

// pack4
if (per_channel_pad_data_size % 4 == 0)
{
Mat per_channel_pad_data_pack4;
convert_packing(per_channel_pad_data, per_channel_pad_data_pack4, 4);
cmd.record_upload(per_channel_pad_data_pack4, per_channel_pad_data_gpu_pack4, opt);
}
cmd.record_upload(per_channel_pad_data, per_channel_pad_data_gpu, opt);

return 0;
}
@@ -109,7 +108,7 @@ int Padding_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute
std::vector<VkMat> bindings(3);
bindings[0] = bottom_blob;
bindings[1] = top_blob;
bindings[2] = per_channel_pad_data_size ? (elempack == 4 ? per_channel_pad_data_gpu_pack4 : per_channel_pad_data_gpu) : top_blob;// TODO use dummy buffer
bindings[2] = per_channel_pad_data_size ? per_channel_pad_data_gpu : top_blob;// TODO use dummy buffer

std::vector<vk_constant_type> constants(12);
constants[0].i = bottom_blob.dims;
@@ -125,7 +124,9 @@ int Padding_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute
constants[10].i = left;
constants[11].i = top;

const Pipeline* pipeline = elempack == 4 ? pipeline_padding_pack4 : pipeline_padding;
const Pipeline* pipeline = elempack == 8 ? pipeline_padding_pack8
: elempack == 4 ? pipeline_padding_pack4
: pipeline_padding;

cmd.record_pipeline(pipeline, bindings, constants, top_blob);

@@ -176,7 +177,7 @@ int Padding_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<
std::vector<VkMat> bindings(3);
bindings[0] = bottom_blob;
bindings[1] = top_blob;
bindings[2] = per_channel_pad_data_size ? (elempack == 4 ? per_channel_pad_data_gpu_pack4 : per_channel_pad_data_gpu) : top_blob;// TODO use dummy buffer
bindings[2] = per_channel_pad_data_size ? per_channel_pad_data_gpu : top_blob;// TODO use dummy buffer

std::vector<vk_constant_type> constants(12);
constants[0].i = bottom_blob.dims;
@@ -192,7 +193,9 @@ int Padding_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<
constants[10].i = _left;
constants[11].i = _top;

const Pipeline* pipeline = elempack == 4 ? pipeline_padding_pack4 : pipeline_padding;
const Pipeline* pipeline = elempack == 8 ? pipeline_padding_pack8
: elempack == 4 ? pipeline_padding_pack4
: pipeline_padding;

cmd.record_pipeline(pipeline, bindings, constants, top_blob);



+ 1
- 2
src/layer/vulkan/padding_vulkan.h View File

@@ -37,9 +37,8 @@ public:
public:
VkMat per_channel_pad_data_gpu;
Pipeline* pipeline_padding;

VkMat per_channel_pad_data_gpu_pack4;
Pipeline* pipeline_padding_pack4;
Pipeline* pipeline_padding_pack8;
};

} // namespace ncnn


+ 59
- 31
src/layer/vulkan/pooling_vulkan.cpp View File

@@ -27,9 +27,11 @@ Pooling_vulkan::Pooling_vulkan()

padding = 0;
pipeline_pooling = 0;
pipeline_pooling_global = 0;
pipeline_pooling_pack4 = 0;
pipeline_pooling_pack8 = 0;
pipeline_pooling_global = 0;
pipeline_pooling_global_pack4 = 0;
pipeline_pooling_global_pack8 = 0;
}

int Pooling_vulkan::create_pipeline(const Option& opt)
@@ -59,34 +61,6 @@ int Pooling_vulkan::create_pipeline(const Option& opt)
padding->create_pipeline(opt);
}

std::vector<vk_specialization_type> specializations(12);
specializations[0].i = pooling_type;
specializations[1].i = kernel_w;
specializations[2].i = kernel_h;
specializations[3].i = stride_w;
specializations[4].i = stride_h;
specializations[5].i = pad_left;
specializations[6].i = pad_right;
specializations[7].i = pad_top;
specializations[8].i = pad_bottom;
specializations[9].i = global_pooling;
specializations[10].i = pad_mode;
specializations[11].i = avgpool_count_include_pad;

// pack1
{
pipeline_pooling = new Pipeline(vkdev);
pipeline_pooling->set_optimal_local_size_xyz();
pipeline_pooling->create("pooling", opt, specializations, 2, 12);
}

// pack4
{
pipeline_pooling_pack4 = new Pipeline(vkdev);
pipeline_pooling_pack4->set_optimal_local_size_xyz();
pipeline_pooling_pack4->create("pooling_pack4", opt, specializations, 2, 12);
}

if (global_pooling)
{
std::vector<vk_specialization_type> specializations(1);
@@ -105,6 +79,50 @@ int Pooling_vulkan::create_pipeline(const Option& opt)
pipeline_pooling_global_pack4->set_optimal_local_size_xyz(256, 1, 1);
pipeline_pooling_global_pack4->create("pooling_global_pack4", opt, specializations, 2, 12);
}

// pack8
{
pipeline_pooling_global_pack8 = new Pipeline(vkdev);
pipeline_pooling_global_pack8->set_optimal_local_size_xyz(256, 1, 1);
pipeline_pooling_global_pack8->create("pooling_global_pack8", opt, specializations, 2, 12);
}
}
else
{
std::vector<vk_specialization_type> specializations(12);
specializations[0].i = pooling_type;
specializations[1].i = kernel_w;
specializations[2].i = kernel_h;
specializations[3].i = stride_w;
specializations[4].i = stride_h;
specializations[5].i = pad_left;
specializations[6].i = pad_right;
specializations[7].i = pad_top;
specializations[8].i = pad_bottom;
specializations[9].i = global_pooling;
specializations[10].i = pad_mode;
specializations[11].i = avgpool_count_include_pad;

// pack1
{
pipeline_pooling = new Pipeline(vkdev);
pipeline_pooling->set_optimal_local_size_xyz();
pipeline_pooling->create("pooling", opt, specializations, 2, 12);
}

// pack4
{
pipeline_pooling_pack4 = new Pipeline(vkdev);
pipeline_pooling_pack4->set_optimal_local_size_xyz();
pipeline_pooling_pack4->create("pooling_pack4", opt, specializations, 2, 12);
}

// pack8
{
pipeline_pooling_pack8 = new Pipeline(vkdev);
pipeline_pooling_pack8->set_optimal_local_size_xyz();
pipeline_pooling_pack8->create("pooling_pack8", opt, specializations, 2, 12);
}
}

return 0;
@@ -125,12 +143,18 @@ int Pooling_vulkan::destroy_pipeline(const Option& opt)
delete pipeline_pooling_pack4;
pipeline_pooling_pack4 = 0;

delete pipeline_pooling_pack8;
pipeline_pooling_pack8 = 0;

delete pipeline_pooling_global;
pipeline_pooling_global = 0;

delete pipeline_pooling_global_pack4;
pipeline_pooling_global_pack4 = 0;

delete pipeline_pooling_global_pack8;
pipeline_pooling_global_pack8 = 0;

return 0;
}

@@ -166,7 +190,9 @@ int Pooling_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute
constants[10].i = 0;
constants[11].i = 0;

const Pipeline* pipeline = elempack == 4 ? pipeline_pooling_global_pack4 : pipeline_pooling_global;
const Pipeline* pipeline = elempack == 8 ? pipeline_pooling_global_pack8
: elempack == 4 ? pipeline_pooling_global_pack4
: pipeline_pooling_global;

cmd.record_pipeline(pipeline, bindings, constants, top_blob);

@@ -298,7 +324,9 @@ int Pooling_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute
constants[10].i = wtailpad;
constants[11].i = htailpad;

const Pipeline* pipeline = elempack == 4 ? pipeline_pooling_pack4 : pipeline_pooling;
const Pipeline* pipeline = elempack == 8 ? pipeline_pooling_pack8
: elempack == 4 ? pipeline_pooling_pack4
: pipeline_pooling;

cmd.record_pipeline(pipeline, bindings, constants, top_blob);



+ 3
- 1
src/layer/vulkan/pooling_vulkan.h View File

@@ -34,9 +34,11 @@ public:
ncnn::Layer* padding;

Pipeline* pipeline_pooling;
Pipeline* pipeline_pooling_global;
Pipeline* pipeline_pooling_pack4;
Pipeline* pipeline_pooling_pack8;
Pipeline* pipeline_pooling_global;
Pipeline* pipeline_pooling_global_pack4;
Pipeline* pipeline_pooling_global_pack8;
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/relu_vulkan.cpp View File

@@ -24,6 +24,7 @@ ReLU_vulkan::ReLU_vulkan()

pipeline_relu = 0;
pipeline_relu_pack4 = 0;
pipeline_relu_pack8 = 0;
}

int ReLU_vulkan::create_pipeline(const Option& opt)
@@ -45,6 +46,13 @@ int ReLU_vulkan::create_pipeline(const Option& opt)
pipeline_relu_pack4->create("relu_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_relu_pack8 = new Pipeline(vkdev);
pipeline_relu_pack8->set_optimal_local_size_xyz();
pipeline_relu_pack8->create("relu_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -56,6 +64,9 @@ int ReLU_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_relu_pack4;
pipeline_relu_pack4 = 0;

delete pipeline_relu_pack8;
pipeline_relu_pack8 = 0;

return 0;
}

@@ -73,7 +84,9 @@ int ReLU_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_relu_pack4 : pipeline_relu;
const Pipeline* pipeline = elempack == 8 ? pipeline_relu_pack8
: elempack == 4 ? pipeline_relu_pack4
: pipeline_relu;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/relu_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_relu;
Pipeline* pipeline_relu_pack4;
Pipeline* pipeline_relu_pack8;
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/sigmoid_vulkan.cpp View File

@@ -25,6 +25,7 @@ Sigmoid_vulkan::Sigmoid_vulkan()

pipeline_sigmoid = 0;
pipeline_sigmoid_pack4 = 0;
pipeline_sigmoid_pack8 = 0;
}

int Sigmoid_vulkan::create_pipeline(const Option& opt)
@@ -45,6 +46,13 @@ int Sigmoid_vulkan::create_pipeline(const Option& opt)
pipeline_sigmoid_pack4->create("sigmoid_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_sigmoid_pack8 = new Pipeline(vkdev);
pipeline_sigmoid_pack8->set_optimal_local_size_xyz();
pipeline_sigmoid_pack8->create("sigmoid_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -56,6 +64,9 @@ int Sigmoid_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_sigmoid_pack4;
pipeline_sigmoid_pack4 = 0;

delete pipeline_sigmoid_pack8;
pipeline_sigmoid_pack8 = 0;

return 0;
}

@@ -73,7 +84,9 @@ int Sigmoid_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_sigmoid_pack4 : pipeline_sigmoid;
const Pipeline* pipeline = elempack == 8 ? pipeline_sigmoid_pack8
: elempack == 4 ? pipeline_sigmoid_pack4
: pipeline_sigmoid;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/sigmoid_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_sigmoid;
Pipeline* pipeline_sigmoid_pack4;
Pipeline* pipeline_sigmoid_pack8;
};

} // namespace ncnn


+ 47
- 4
src/layer/vulkan/softmax_vulkan.cpp View File

@@ -34,6 +34,11 @@ Softmax_vulkan::Softmax_vulkan()
pipeline_softmax_exp_sub_max_pack4 = 0;
pipeline_softmax_reduce_sum_pack4 = 0;
pipeline_softmax_div_sum_pack4 = 0;

pipeline_softmax_reduce_max_pack8 = 0;
pipeline_softmax_exp_sub_max_pack8 = 0;
pipeline_softmax_reduce_sum_pack8 = 0;
pipeline_softmax_div_sum_pack8 = 0;
}

int Softmax_vulkan::create_pipeline(const Option& opt)
@@ -77,6 +82,24 @@ int Softmax_vulkan::create_pipeline(const Option& opt)
pipeline_softmax_div_sum_pack4->create("softmax_div_sum_pack4", opt, specializations, 2, 10);
}

// pack8
{
pipeline_softmax_reduce_max_pack8 = new Pipeline(vkdev);
pipeline_softmax_exp_sub_max_pack8 = new Pipeline(vkdev);
pipeline_softmax_reduce_sum_pack8 = new Pipeline(vkdev);
pipeline_softmax_div_sum_pack8 = new Pipeline(vkdev);

pipeline_softmax_reduce_max_pack8->set_optimal_local_size_xyz();
pipeline_softmax_exp_sub_max_pack8->set_optimal_local_size_xyz();
pipeline_softmax_reduce_sum_pack8->set_optimal_local_size_xyz();
pipeline_softmax_div_sum_pack8->set_optimal_local_size_xyz();

pipeline_softmax_reduce_max_pack8->create("softmax_reduce_max_pack8", opt, specializations, 2, 10);
pipeline_softmax_exp_sub_max_pack8->create("softmax_exp_sub_max_pack8", opt, specializations, 2, 10);
pipeline_softmax_reduce_sum_pack8->create("softmax_reduce_sum_pack8", opt, specializations, 2, 10);
pipeline_softmax_div_sum_pack8->create("softmax_div_sum_pack8", opt, specializations, 2, 10);
}

return 0;
}

@@ -106,6 +129,18 @@ int Softmax_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_softmax_div_sum_pack4;
pipeline_softmax_div_sum_pack4 = 0;

delete pipeline_softmax_reduce_max_pack8;
pipeline_softmax_reduce_max_pack8 = 0;

delete pipeline_softmax_exp_sub_max_pack8;
pipeline_softmax_exp_sub_max_pack8 = 0;

delete pipeline_softmax_reduce_sum_pack8;
pipeline_softmax_reduce_sum_pack8 = 0;

delete pipeline_softmax_div_sum_pack8;
pipeline_softmax_div_sum_pack8 = 0;

return 0;
}

@@ -170,7 +205,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
constants[8].i = max_workspace.c;
constants[9].i = max_workspace.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_reduce_max_pack4 : pipeline_softmax_reduce_max;
const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_max_pack8
: elempack == 4 ? pipeline_softmax_reduce_max_pack4
: pipeline_softmax_reduce_max;

cmd.record_pipeline(pipeline, bindings, constants, max_workspace);
}
@@ -193,7 +230,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
constants[8].i = max_workspace.c;
constants[9].i = max_workspace.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_exp_sub_max_pack4 : pipeline_softmax_exp_sub_max;
const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_exp_sub_max_pack8
: elempack == 4 ? pipeline_softmax_exp_sub_max_pack4
: pipeline_softmax_exp_sub_max;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
}
@@ -216,7 +255,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
constants[8].i = sum_workspace.c;
constants[9].i = sum_workspace.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_reduce_sum_pack4 : pipeline_softmax_reduce_sum;
const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_reduce_sum_pack8
: elempack == 4 ? pipeline_softmax_reduce_sum_pack4
: pipeline_softmax_reduce_sum;

cmd.record_pipeline(pipeline, bindings, constants, sum_workspace);
}
@@ -239,7 +280,9 @@ int Softmax_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
constants[8].i = sum_workspace.c;
constants[9].i = sum_workspace.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_softmax_div_sum_pack4 : pipeline_softmax_div_sum;
const Pipeline* pipeline = elempack == 8 ? pipeline_softmax_div_sum_pack8
: elempack == 4 ? pipeline_softmax_div_sum_pack4
: pipeline_softmax_div_sum;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
}


+ 5
- 0
src/layer/vulkan/softmax_vulkan.h View File

@@ -40,6 +40,11 @@ public:
Pipeline* pipeline_softmax_exp_sub_max_pack4;
Pipeline* pipeline_softmax_reduce_sum_pack4;
Pipeline* pipeline_softmax_div_sum_pack4;

Pipeline* pipeline_softmax_reduce_max_pack8;
Pipeline* pipeline_softmax_exp_sub_max_pack8;
Pipeline* pipeline_softmax_reduce_sum_pack8;
Pipeline* pipeline_softmax_div_sum_pack8;
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/tanh_vulkan.cpp View File

@@ -25,6 +25,7 @@ TanH_vulkan::TanH_vulkan()

pipeline_tanh = 0;
pipeline_tanh_pack4 = 0;
pipeline_tanh_pack8 = 0;
}

int TanH_vulkan::create_pipeline(const Option& opt)
@@ -45,6 +46,13 @@ int TanH_vulkan::create_pipeline(const Option& opt)
pipeline_tanh_pack4->create("tanh_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_tanh_pack8 = new Pipeline(vkdev);
pipeline_tanh_pack8->set_optimal_local_size_xyz();
pipeline_tanh_pack8->create("tanh_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -56,6 +64,9 @@ int TanH_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_tanh_pack4;
pipeline_tanh_pack4 = 0;

delete pipeline_tanh_pack8;
pipeline_tanh_pack8 = 0;

return 0;
}

@@ -73,7 +84,9 @@ int TanH_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const O
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_tanh_pack4 : pipeline_tanh;
const Pipeline* pipeline = elempack == 8 ? pipeline_tanh_pack8
: elempack == 4 ? pipeline_tanh_pack4
: pipeline_tanh;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/tanh_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_tanh;
Pipeline* pipeline_tanh_pack4;
Pipeline* pipeline_tanh_pack8;
};

} // namespace ncnn


+ 14
- 1
src/layer/vulkan/unaryop_vulkan.cpp View File

@@ -24,6 +24,7 @@ UnaryOp_vulkan::UnaryOp_vulkan()

pipeline_unaryop = 0;
pipeline_unaryop_pack4 = 0;
pipeline_unaryop_pack8 = 0;
}

int UnaryOp_vulkan::create_pipeline(const Option& opt)
@@ -45,6 +46,13 @@ int UnaryOp_vulkan::create_pipeline(const Option& opt)
pipeline_unaryop_pack4->create("unaryop_pack4", opt, specializations, 1, 5);
}

// pack8
{
pipeline_unaryop_pack8 = new Pipeline(vkdev);
pipeline_unaryop_pack8->set_optimal_local_size_xyz();
pipeline_unaryop_pack8->create("unaryop_pack8", opt, specializations, 1, 5);
}

return 0;
}

@@ -56,6 +64,9 @@ int UnaryOp_vulkan::destroy_pipeline(const Option& /*opt*/)
delete pipeline_unaryop_pack4;
pipeline_unaryop_pack4 = 0;

delete pipeline_unaryop_pack8;
pipeline_unaryop_pack8 = 0;

return 0;
}

@@ -73,7 +84,9 @@ int UnaryOp_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, cons
constants[3].i = bottom_top_blob.c;
constants[4].i = bottom_top_blob.cstep;

const Pipeline* pipeline = elempack == 4 ? pipeline_unaryop_pack4 : pipeline_unaryop;
const Pipeline* pipeline = elempack == 8 ? pipeline_unaryop_pack8
: elempack == 4 ? pipeline_unaryop_pack4
: pipeline_unaryop;

cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);



+ 1
- 0
src/layer/vulkan/unaryop_vulkan.h View File

@@ -33,6 +33,7 @@ public:
public:
Pipeline* pipeline_unaryop;
Pipeline* pipeline_unaryop_pack4;
Pipeline* pipeline_unaryop_pack8;
};

} // namespace ncnn


Loading…
Cancel
Save