From d85775fbcd92f46acd5a64c9edeba7d83d49ecac Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 3 Mar 2019 14:20:05 +0800 Subject: [PATCH] fix softmax axis order on 3-dim, fix caffe reshape conversion, regenerate ssd param --- benchmark/mobilenet_ssd.param | 2 +- benchmark/squeezenet_ssd.param | 2 +- src/layer/concat.cpp | 18 +++---- src/layer/flatten.cpp | 11 +++- src/layer/shader/softmax_div_sum.comp | 14 +++-- src/layer/shader/softmax_div_sum_pack4.comp | 14 +++-- src/layer/shader/softmax_exp_sub_max.comp | 14 +++-- .../shader/softmax_exp_sub_max_pack4.comp | 14 +++-- src/layer/shader/softmax_reduce_max.comp | 46 ++++++++++++---- .../shader/softmax_reduce_max_pack4.comp | 50 ++++++++++++++---- src/layer/shader/softmax_reduce_sum.comp | 48 +++++++++++++---- .../shader/softmax_reduce_sum_pack4.comp | 52 +++++++++++++++---- src/layer/softmax.cpp | 44 ++++++++-------- tools/caffe/caffe2ncnn.cpp | 6 +-- 14 files changed, 240 insertions(+), 95 deletions(-) diff --git a/benchmark/mobilenet_ssd.param b/benchmark/mobilenet_ssd.param index c1362b7dc..5bda98c28 100644 --- a/benchmark/mobilenet_ssd.param +++ b/benchmark/mobilenet_ssd.param @@ -123,7 +123,7 @@ PriorBox conv17_2_mbox_priorbox 2 1 conv17_2_conv17_2/relu_splitncnn_0 d Concat mbox_loc 6 1 conv11_mbox_loc_flat conv13_mbox_loc_flat conv14_2_mbox_loc_flat conv15_2_mbox_loc_flat conv16_2_mbox_loc_flat conv17_2_mbox_loc_flat mbox_loc 0=0 Concat mbox_conf 6 1 conv11_mbox_conf_flat conv13_mbox_conf_flat conv14_2_mbox_conf_flat conv15_2_mbox_conf_flat conv16_2_mbox_conf_flat conv17_2_mbox_conf_flat mbox_conf 0=0 Concat mbox_priorbox 6 1 conv11_mbox_priorbox conv13_mbox_priorbox conv14_2_mbox_priorbox conv15_2_mbox_priorbox conv16_2_mbox_priorbox conv17_2_mbox_priorbox mbox_priorbox 0=1 -Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=0 3=0 +Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=-233 3=0 Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1 Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox detection_out 0=21 1=0.450000 2=100 3=100 4=0.250000 diff --git a/benchmark/squeezenet_ssd.param b/benchmark/squeezenet_ssd.param index f4c4c6657..cb54f96ca 100644 --- a/benchmark/squeezenet_ssd.param +++ b/benchmark/squeezenet_ssd.param @@ -175,7 +175,7 @@ PriorBox conv13_2_mbox_priorbox 2 1 conv13_2_conv13_2/relu_splitncnn_0 d Concat mbox_loc 6 1 fire5_mbox_loc_flat fire9_mbox_loc_flat fire10_mbox_loc_flat fire11_mbox_loc_flat conv12_2_mbox_loc_flat conv13_2_mbox_loc_flat mbox_loc 0=0 Concat mbox_conf 6 1 fire5_mbox_conf_flat fire9_mbox_conf_flat fire10_mbox_conf_flat fire11_mbox_conf_flat conv12_2_mbox_conf_flat conv13_2_mbox_conf_flat mbox_conf 0=0 Concat mbox_priorbox 6 1 fire5_mbox_priorbox fire9_mbox_priorbox fire10_mbox_priorbox fire11_mbox_priorbox conv12_2_mbox_priorbox conv13_2_mbox_priorbox mbox_priorbox 0=1 -Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=0 3=0 +Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=-233 3=0 Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1 Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox detection_out 0=21 1=0.450000 2=100 3=100 4=0.250000 diff --git a/src/layer/concat.cpp b/src/layer/concat.cpp index b6f74f796..dc6c0f06d 100644 --- a/src/layer/concat.cpp +++ b/src/layer/concat.cpp @@ -370,15 +370,15 @@ int Concat::forward(const std::vector& bottom_blobs, std::vector& constants[10].i = woffset; const Pipeline* pipeline = 0; - if (packing == 1 && out_packing == 1) + if (bottom_blob.packing == 1 && out_packing == 1) { pipeline = pipeline_concat; } - else if (packing == 4 && out_packing == 4) + else if (bottom_blob.packing == 4 && out_packing == 4) { pipeline = pipeline_concat_pack4; } - else if (packing == 4 && out_packing == 1) + else if (bottom_blob.packing == 4 && out_packing == 1) { pipeline = pipeline_concat_pack4to1; } @@ -450,15 +450,15 @@ int Concat::forward(const std::vector& bottom_blobs, std::vector& constants[10].i = hoffset; const Pipeline* pipeline = 0; - if (packing == 1 && out_packing == 1) + if (bottom_blob.packing == 1 && out_packing == 1) { pipeline = pipeline_concat; } - else if (packing == 4 && out_packing == 4) + else if (bottom_blob.packing == 4 && out_packing == 4) { pipeline = pipeline_concat_pack4; } - else if (packing == 4 && out_packing == 1) + else if (bottom_blob.packing == 4 && out_packing == 1) { pipeline = pipeline_concat_pack4to1; } @@ -587,15 +587,15 @@ int Concat::forward(const std::vector& bottom_blobs, std::vector& constants[10].i = coffset; const Pipeline* pipeline = 0; - if (packing == 1 && out_packing == 1) + if (bottom_blob.packing == 1 && out_packing == 1) { pipeline = pipeline_concat; } - else if (packing == 4 && out_packing == 4) + else if (bottom_blob.packing == 4 && out_packing == 4) { pipeline = pipeline_concat_pack4; } - else if (packing == 4 && out_packing == 1) + else if (bottom_blob.packing == 4 && out_packing == 1) { pipeline = pipeline_concat_pack4to1; } diff --git a/src/layer/flatten.cpp b/src/layer/flatten.cpp index b0e30cc4e..17baaad3b 100644 --- a/src/layer/flatten.cpp +++ b/src/layer/flatten.cpp @@ -83,12 +83,21 @@ int Flatten::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, { int dims = bottom_blob.dims; - if (dims == 1 || dims == 2) + if (dims == 1) { top_blob = bottom_blob; return 0; } + if (dims == 2) + { + top_blob = bottom_blob; + top_blob.dims = 1; + top_blob.w = bottom_blob.w * bottom_blob.h; + top_blob.h = 1; + return 0; + } + int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; diff --git a/src/layer/shader/softmax_div_sum.comp b/src/layer/shader/softmax_div_sum.comp index b1ffbbd03..48949a786 100644 --- a/src/layer/shader/softmax_div_sum.comp +++ b/src/layer/shader/softmax_div_sum.comp @@ -55,32 +55,36 @@ void main() if (p.dims == 2 && axis == 0) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gx]; return; } if (p.dims == 2 && axis == 1) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gy]; return; } if (p.dims == 3 && axis == 0) { int gi = gz * p.cstep + gy * p.w + gx; - bottom_top_blob_data[gi] /= sum_workspace_data[ gy * p.w + gx ]; + bottom_top_blob_data[gi] /= sum_workspace_data[gy * p.w + gx]; return; } if (p.dims == 3 && axis == 1) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.w + gx]; return; } if (p.dims == 3 && axis == 2) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.h + gy]; return; } } diff --git a/src/layer/shader/softmax_div_sum_pack4.comp b/src/layer/shader/softmax_div_sum_pack4.comp index 1c1e10f57..702927808 100644 --- a/src/layer/shader/softmax_div_sum_pack4.comp +++ b/src/layer/shader/softmax_div_sum_pack4.comp @@ -55,32 +55,36 @@ void main() if (p.dims == 2 && axis == 0) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gx]; return; } if (p.dims == 2 && axis == 1) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gy]; return; } if (p.dims == 3 && axis == 0) { int gi = gz * p.cstep + gy * p.w + gx; - bottom_top_blob_data[gi] /= sum_workspace_data[ gy * p.w + gx ]; + bottom_top_blob_data[gi] /= sum_workspace_data[gy * p.w + gx]; return; } if (p.dims == 3 && axis == 1) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.w + gx]; return; } if (p.dims == 3 && axis == 2) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.h + gy]; return; } } diff --git a/src/layer/shader/softmax_exp_sub_max.comp b/src/layer/shader/softmax_exp_sub_max.comp index 0c8ff8bb4..838c4e84d 100644 --- a/src/layer/shader/softmax_exp_sub_max.comp +++ b/src/layer/shader/softmax_exp_sub_max.comp @@ -55,32 +55,36 @@ void main() if (p.dims == 2 && axis == 0) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gx]); return; } if (p.dims == 2 && axis == 1) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy]); return; } if (p.dims == 3 && axis == 0) { int gi = gz * p.cstep + gy * p.w + gx; - bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[ gy * p.w + gx ]); + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy * p.w + gx]); return; } if (p.dims == 3 && axis == 1) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.w + gx]); return; } if (p.dims == 3 && axis == 2) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.h + gy]); return; } } diff --git a/src/layer/shader/softmax_exp_sub_max_pack4.comp b/src/layer/shader/softmax_exp_sub_max_pack4.comp index b16e8bedb..99af1498a 100644 --- a/src/layer/shader/softmax_exp_sub_max_pack4.comp +++ b/src/layer/shader/softmax_exp_sub_max_pack4.comp @@ -55,32 +55,36 @@ void main() if (p.dims == 2 && axis == 0) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gx]); return; } if (p.dims == 2 && axis == 1) { - // FIXME TODO + int gi = gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy]); return; } if (p.dims == 3 && axis == 0) { int gi = gz * p.cstep + gy * p.w + gx; - bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[ gy * p.w + gx ]); + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy * p.w + gx]); return; } if (p.dims == 3 && axis == 1) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.w + gx]); return; } if (p.dims == 3 && axis == 2) { - // FIXME TODO + int gi = gz * p.cstep + gy * p.w + gx; + bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.h + gy]); return; } } diff --git a/src/layer/shader/softmax_reduce_max.comp b/src/layer/shader/softmax_reduce_max.comp index b32a547a2..0e9e7563c 100644 --- a/src/layer/shader/softmax_reduce_max.comp +++ b/src/layer/shader/softmax_reduce_max.comp @@ -44,13 +44,13 @@ void main() int gy = int(gl_GlobalInvocationID.y); int gz = int(gl_GlobalInvocationID.z); - if (gx >= p.w || gy >= p.h || gz >= p.c) + if (gx >= p.outw || gy >= p.outh || gz >= p.outc) return; if (p.dims == 1) // axis == 0 { float max_value = -99999999; - for (int i=0; i= p.w || gy >= p.h || gz >= p.c) + if (gx >= p.outw || gy >= p.outh || gz >= p.outc) return; if (p.dims == 1) // axis == 0 { vec4 max_value = vec4(-99999999); - for (int i=0; i= p.w || gy >= p.h || gz >= p.c) + if (gx >= p.outw || gy >= p.outh || gz >= p.outc) return; if (p.dims == 1) // axis == 0 { float sum_value = 0.f; - for (int i=0; i= p.w || gy >= p.h || gz >= p.c) + if (gx >= p.outw || gy >= p.outh || gz >= p.outc) return; if (p.dims == 1) // axis == 0 { - vec4 sum_value = vec4(0.0); - for (int i=0; i