| @@ -123,7 +123,7 @@ PriorBox conv17_2_mbox_priorbox 2 1 conv17_2_conv17_2/relu_splitncnn_0 d | |||
| Concat mbox_loc 6 1 conv11_mbox_loc_flat conv13_mbox_loc_flat conv14_2_mbox_loc_flat conv15_2_mbox_loc_flat conv16_2_mbox_loc_flat conv17_2_mbox_loc_flat mbox_loc 0=0 | |||
| Concat mbox_conf 6 1 conv11_mbox_conf_flat conv13_mbox_conf_flat conv14_2_mbox_conf_flat conv15_2_mbox_conf_flat conv16_2_mbox_conf_flat conv17_2_mbox_conf_flat mbox_conf 0=0 | |||
| Concat mbox_priorbox 6 1 conv11_mbox_priorbox conv13_mbox_priorbox conv14_2_mbox_priorbox conv15_2_mbox_priorbox conv16_2_mbox_priorbox conv17_2_mbox_priorbox mbox_priorbox 0=1 | |||
| Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=0 3=0 | |||
| Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=-233 3=0 | |||
| Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1 | |||
| Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten | |||
| DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox detection_out 0=21 1=0.450000 2=100 3=100 4=0.250000 | |||
| @@ -175,7 +175,7 @@ PriorBox conv13_2_mbox_priorbox 2 1 conv13_2_conv13_2/relu_splitncnn_0 d | |||
| Concat mbox_loc 6 1 fire5_mbox_loc_flat fire9_mbox_loc_flat fire10_mbox_loc_flat fire11_mbox_loc_flat conv12_2_mbox_loc_flat conv13_2_mbox_loc_flat mbox_loc 0=0 | |||
| Concat mbox_conf 6 1 fire5_mbox_conf_flat fire9_mbox_conf_flat fire10_mbox_conf_flat fire11_mbox_conf_flat conv12_2_mbox_conf_flat conv13_2_mbox_conf_flat mbox_conf 0=0 | |||
| Concat mbox_priorbox 6 1 fire5_mbox_priorbox fire9_mbox_priorbox fire10_mbox_priorbox fire11_mbox_priorbox conv12_2_mbox_priorbox conv13_2_mbox_priorbox mbox_priorbox 0=1 | |||
| Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=0 3=0 | |||
| Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=-233 3=0 | |||
| Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1 | |||
| Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten | |||
| DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox detection_out 0=21 1=0.450000 2=100 3=100 4=0.250000 | |||
| @@ -370,15 +370,15 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& | |||
| constants[10].i = woffset; | |||
| const Pipeline* pipeline = 0; | |||
| if (packing == 1 && out_packing == 1) | |||
| if (bottom_blob.packing == 1 && out_packing == 1) | |||
| { | |||
| pipeline = pipeline_concat; | |||
| } | |||
| else if (packing == 4 && out_packing == 4) | |||
| else if (bottom_blob.packing == 4 && out_packing == 4) | |||
| { | |||
| pipeline = pipeline_concat_pack4; | |||
| } | |||
| else if (packing == 4 && out_packing == 1) | |||
| else if (bottom_blob.packing == 4 && out_packing == 1) | |||
| { | |||
| pipeline = pipeline_concat_pack4to1; | |||
| } | |||
| @@ -450,15 +450,15 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& | |||
| constants[10].i = hoffset; | |||
| const Pipeline* pipeline = 0; | |||
| if (packing == 1 && out_packing == 1) | |||
| if (bottom_blob.packing == 1 && out_packing == 1) | |||
| { | |||
| pipeline = pipeline_concat; | |||
| } | |||
| else if (packing == 4 && out_packing == 4) | |||
| else if (bottom_blob.packing == 4 && out_packing == 4) | |||
| { | |||
| pipeline = pipeline_concat_pack4; | |||
| } | |||
| else if (packing == 4 && out_packing == 1) | |||
| else if (bottom_blob.packing == 4 && out_packing == 1) | |||
| { | |||
| pipeline = pipeline_concat_pack4to1; | |||
| } | |||
| @@ -587,15 +587,15 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& | |||
| constants[10].i = coffset; | |||
| const Pipeline* pipeline = 0; | |||
| if (packing == 1 && out_packing == 1) | |||
| if (bottom_blob.packing == 1 && out_packing == 1) | |||
| { | |||
| pipeline = pipeline_concat; | |||
| } | |||
| else if (packing == 4 && out_packing == 4) | |||
| else if (bottom_blob.packing == 4 && out_packing == 4) | |||
| { | |||
| pipeline = pipeline_concat_pack4; | |||
| } | |||
| else if (packing == 4 && out_packing == 1) | |||
| else if (bottom_blob.packing == 4 && out_packing == 1) | |||
| { | |||
| pipeline = pipeline_concat_pack4to1; | |||
| } | |||
| @@ -83,12 +83,21 @@ int Flatten::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, | |||
| { | |||
| int dims = bottom_blob.dims; | |||
| if (dims == 1 || dims == 2) | |||
| if (dims == 1) | |||
| { | |||
| top_blob = bottom_blob; | |||
| return 0; | |||
| } | |||
| if (dims == 2) | |||
| { | |||
| top_blob = bottom_blob; | |||
| top_blob.dims = 1; | |||
| top_blob.w = bottom_blob.w * bottom_blob.h; | |||
| top_blob.h = 1; | |||
| return 0; | |||
| } | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| @@ -55,32 +55,36 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gx]; | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gy]; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[ gy * p.w + gx ]; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gy * p.w + gx]; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.w + gx]; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.h + gy]; | |||
| return; | |||
| } | |||
| } | |||
| @@ -55,32 +55,36 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gx]; | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gy]; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[ gy * p.w + gx ]; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gy * p.w + gx]; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.w + gx]; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.h + gy]; | |||
| return; | |||
| } | |||
| } | |||
| @@ -55,32 +55,36 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gx]); | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy]); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[ gy * p.w + gx ]); | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy * p.w + gx]); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.w + gx]); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.h + gy]); | |||
| return; | |||
| } | |||
| } | |||
| @@ -55,32 +55,36 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gx]); | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy]); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[ gy * p.w + gx ]); | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy * p.w + gx]); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.w + gx]); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| int gi = gz * p.cstep + gy * p.w + gx; | |||
| bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.h + gy]); | |||
| return; | |||
| } | |||
| } | |||
| @@ -44,13 +44,13 @@ void main() | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||
| if (gx >= p.outw || gy >= p.outh || gz >= p.outc) | |||
| return; | |||
| if (p.dims == 1) // axis == 0 | |||
| { | |||
| float max_value = -99999999; | |||
| for (int i=0; i<p.w; i++) | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| max_value = max(max_value, bottom_top_blob_data[i]); | |||
| } | |||
| @@ -61,38 +61,66 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| float max_value = -99999999; | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = i * p.w + gx; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| max_workspace_data[gx] = max_value; | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| float max_value = -99999999; | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gx * p.w + i; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| max_workspace_data[gx] = max_value; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| float max_value = -99999999; | |||
| for (int z = 0; z < p.c; z++) | |||
| for (int i = 0; i < p.c; i++) | |||
| { | |||
| int v_offset = z * p.cstep + gy * p.w + gx; | |||
| int v_offset = i * p.cstep + gy * p.w + gx; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| max_workspace_data[ gy * p.w + gx ] = max_value; | |||
| max_workspace_data[gy * p.w + gx] = max_value; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| float max_value = -99999999; | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + i * p.w + gx; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| max_workspace_data[gy * p.w + gx] = max_value; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| float max_value = -99999999; | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + gx * p.w + i; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| max_workspace_data[gy * p.h + gx] = max_value; | |||
| return; | |||
| } | |||
| } | |||
| @@ -44,13 +44,13 @@ void main() | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||
| if (gx >= p.outw || gy >= p.outh || gz >= p.outc) | |||
| return; | |||
| if (p.dims == 1) // axis == 0 | |||
| { | |||
| vec4 max_value = vec4(-99999999); | |||
| for (int i=0; i<p.w; i++) | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| max_value = max(max_value, bottom_top_blob_data[i]); | |||
| } | |||
| @@ -62,39 +62,71 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| vec4 max_value = vec4(-99999999); | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = i * p.w + gx; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| vec2 max2 = max(max_value.rg, max_value.ba); | |||
| max_workspace_data[gx] = max(max2.r, max2.g); | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| vec4 max_value = vec4(-99999999); | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gx * p.w + i; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| vec2 max2 = max(max_value.rg, max_value.ba); | |||
| max_workspace_data[gx] = max(max2.r, max2.g); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| vec4 max_value = vec4(-99999999); | |||
| for (int z = 0; z < p.c; z++) | |||
| for (int i = 0; i < p.c; i++) | |||
| { | |||
| int v_offset = z * p.cstep + gy * p.w + gx; | |||
| int v_offset = i * p.cstep + gy * p.w + gx; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| vec2 max2 = max(max_value.rg, max_value.ba); | |||
| max_workspace_data[ gy * p.w + gx ] = max(max2.r, max2.g); | |||
| max_workspace_data[gy * p.w + gx] = max(max2.r, max2.g); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| vec4 max_value = vec4(-99999999); | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + i * p.w + gx; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| vec2 max2 = max(max_value.rg, max_value.ba); | |||
| max_workspace_data[gy * p.w + gx] = max(max2.r, max2.g); | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| vec4 max_value = vec4(-99999999); | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + gx * p.w + i; | |||
| max_value = max(max_value, bottom_top_blob_data[v_offset]); | |||
| } | |||
| vec2 max2 = max(max_value.rg, max_value.ba); | |||
| max_workspace_data[gy * p.h + gx] = max(max2.r, max2.g); | |||
| return; | |||
| } | |||
| } | |||
| @@ -44,13 +44,13 @@ void main() | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||
| if (gx >= p.outw || gy >= p.outh || gz >= p.outc) | |||
| return; | |||
| if (p.dims == 1) // axis == 0 | |||
| { | |||
| float sum_value = 0.f; | |||
| for (int i=0; i<p.w; i++) | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| sum_value += bottom_top_blob_data[i]; | |||
| } | |||
| @@ -61,38 +61,66 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| float sum_value = 0.f; | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = i * p.w + gx; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| sum_workspace_data[gx] = sum_value; | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| float sum_value = 0.f; | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gx * p.w + i; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| sum_workspace_data[gx] = sum_value; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| float sum_value = 0; | |||
| for (int z = 0; z < p.c; z++) | |||
| float sum_value = 0.f; | |||
| for (int i = 0; i < p.c; i++) | |||
| { | |||
| int v_offset = z * p.cstep + gy * p.w + gx; | |||
| int v_offset = i * p.cstep + gy * p.w + gx; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| sum_workspace_data[ gy * p.w + gx ] = sum_value; | |||
| sum_workspace_data[gy * p.w + gx] = sum_value; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| float sum_value = 0.f; | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + i * p.w + gx; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| sum_workspace_data[gy * p.w + gx] = sum_value; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| float sum_value = 0.f; | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + gx * p.w + i; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| sum_workspace_data[gy * p.h + gx] = sum_value; | |||
| return; | |||
| } | |||
| } | |||
| @@ -44,13 +44,13 @@ void main() | |||
| int gy = int(gl_GlobalInvocationID.y); | |||
| int gz = int(gl_GlobalInvocationID.z); | |||
| if (gx >= p.w || gy >= p.h || gz >= p.c) | |||
| if (gx >= p.outw || gy >= p.outh || gz >= p.outc) | |||
| return; | |||
| if (p.dims == 1) // axis == 0 | |||
| { | |||
| vec4 sum_value = vec4(0.0); | |||
| for (int i=0; i<p.w; i++) | |||
| vec4 sum_value = vec4(0.f); | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| sum_value += bottom_top_blob_data[i]; | |||
| } | |||
| @@ -62,22 +62,38 @@ void main() | |||
| if (p.dims == 2 && axis == 0) | |||
| { | |||
| // FIXME TODO | |||
| vec4 sum_value = vec4(0.f); | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = i * p.w + gx; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| vec2 sum2 = sum_value.rg + sum_value.ba; | |||
| sum_workspace_data[gx] = sum2.r + sum2.g; | |||
| return; | |||
| } | |||
| if (p.dims == 2 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| vec4 sum_value = vec4(0.f); | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gx * p.w + i; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| vec2 sum2 = sum_value.rg + sum_value.ba; | |||
| sum_workspace_data[gx] = sum2.r + sum2.g; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 0) | |||
| { | |||
| vec4 sum_value = vec4(0.0); | |||
| for (int z = 0; z < p.c; z++) | |||
| vec4 sum_value = vec4(0.f); | |||
| for (int i = 0; i < p.c; i++) | |||
| { | |||
| int v_offset = z * p.cstep + gy * p.w + gx; | |||
| int v_offset = i * p.cstep + gy * p.w + gx; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| vec2 sum2 = sum_value.rg + sum_value.ba; | |||
| @@ -88,13 +104,29 @@ void main() | |||
| if (p.dims == 3 && axis == 1) | |||
| { | |||
| // FIXME TODO | |||
| vec4 sum_value = vec4(0.f); | |||
| for (int i = 0; i < p.h; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + i * p.w + gx; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| vec2 sum2 = sum_value.rg + sum_value.ba; | |||
| sum_workspace_data[gy * p.w + gx] = sum2.r + sum2.g; | |||
| return; | |||
| } | |||
| if (p.dims == 3 && axis == 2) | |||
| { | |||
| // FIXME TODO | |||
| vec4 sum_value = vec4(0.f); | |||
| for (int i = 0; i < p.w; i++) | |||
| { | |||
| int v_offset = gy * p.cstep + gx * p.w + i; | |||
| sum_value += bottom_top_blob_data[v_offset]; | |||
| } | |||
| vec2 sum2 = sum_value.rg + sum_value.ba; | |||
| sum_workspace_data[gy * p.h + gx] = sum2.r + sum2.g; | |||
| return; | |||
| } | |||
| } | |||
| @@ -278,7 +278,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| int channels = bottom_top_blob.c; | |||
| Mat max; | |||
| max.create(h, channels, elemsize, opt.workspace_allocator); | |||
| max.create(w, channels, elemsize, opt.workspace_allocator); | |||
| if (max.empty()) | |||
| return -100; | |||
| max.fill(-FLT_MAX); | |||
| @@ -290,13 +290,11 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float max = -FLT_MAX; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| max = std::max(max, ptr[j]); | |||
| maxptr[j] = std::max(maxptr[j], ptr[j]); | |||
| } | |||
| maxptr[i] = max; | |||
| ptr += w; | |||
| } | |||
| } | |||
| @@ -309,10 +307,9 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float max = maxptr[i]; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] = exp(ptr[j] - max); | |||
| ptr[j] = exp(ptr[j] - maxptr[j]); | |||
| } | |||
| ptr += w; | |||
| @@ -320,7 +317,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| } | |||
| Mat sum; | |||
| sum.create(h, channels, elemsize, opt.workspace_allocator); | |||
| sum.create(w, channels, elemsize, opt.workspace_allocator); | |||
| if (sum.empty()) | |||
| return -100; | |||
| sum.fill(0.f); | |||
| @@ -332,13 +329,11 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float sum = 0.f; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| sum += ptr[j]; | |||
| sumptr[j] += ptr[j]; | |||
| } | |||
| sumptr[i] = sum; | |||
| ptr += w; | |||
| } | |||
| } | |||
| @@ -351,10 +346,9 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float sum = sumptr[i]; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] /= sum; | |||
| ptr[j] /= sumptr[j]; | |||
| } | |||
| ptr += w; | |||
| @@ -371,7 +365,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| int channels = bottom_top_blob.c; | |||
| Mat max; | |||
| max.create(w, channels, elemsize, opt.workspace_allocator); | |||
| max.create(h, channels, elemsize, opt.workspace_allocator); | |||
| if (max.empty()) | |||
| return -100; | |||
| max.fill(-FLT_MAX); | |||
| @@ -383,11 +377,13 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float max = -FLT_MAX; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| maxptr[j] = std::max(maxptr[j], ptr[j]); | |||
| max = std::max(max, ptr[j]); | |||
| } | |||
| maxptr[i] = max; | |||
| ptr += w; | |||
| } | |||
| } | |||
| @@ -400,9 +396,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float max = maxptr[i]; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] = exp(ptr[j] - maxptr[j]); | |||
| ptr[j] = exp(ptr[j] - max); | |||
| } | |||
| ptr += w; | |||
| @@ -410,7 +407,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| } | |||
| Mat sum; | |||
| sum.create(w, channels, elemsize, opt.workspace_allocator); | |||
| sum.create(h, channels, elemsize, opt.workspace_allocator); | |||
| if (sum.empty()) | |||
| return -100; | |||
| sum.fill(0.f); | |||
| @@ -422,11 +419,13 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float sum = 0.f; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| sumptr[j] += ptr[j]; | |||
| sum += ptr[j]; | |||
| } | |||
| sumptr[i] = sum; | |||
| ptr += w; | |||
| } | |||
| } | |||
| @@ -439,9 +438,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const | |||
| for (int i=0; i<h; i++) | |||
| { | |||
| float sum = sumptr[i]; | |||
| for (int j=0; j<w; j++) | |||
| { | |||
| ptr[j] /= sumptr[j]; | |||
| ptr[j] /= sum; | |||
| } | |||
| ptr += w; | |||
| @@ -558,13 +558,13 @@ int Softmax::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Optio | |||
| } | |||
| else if (dims == 3 && axis == 1) | |||
| { | |||
| max_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| sum_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| max_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| sum_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| } | |||
| else if (dims == 3 && axis == 2) | |||
| { | |||
| max_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| sum_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| max_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| sum_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator); | |||
| } | |||
| // fprintf(stderr, "Softmax::forward_inplace %p\n", bottom_top_blob.buffer()); | |||
| @@ -1482,7 +1482,7 @@ int main(int argc, char** argv) | |||
| const caffe::ReorgParameter& reorg_param = layer.reorg_param(); | |||
| fprintf(pp, " 0=%d", reorg_param.stride()); | |||
| } | |||
| else if (layer.type() == "Reshape")// -1 1 512 | |||
| else if (layer.type() == "Reshape") | |||
| { | |||
| const caffe::ReshapeParameter& reshape_param = layer.reshape_param(); | |||
| const caffe::BlobShape& bs = reshape_param.shape(); | |||
| @@ -1492,11 +1492,11 @@ int main(int argc, char** argv) | |||
| } | |||
| else if (bs.dim_size() == 2) | |||
| { | |||
| fprintf(pp, " 0=%ld 1=%ld 2=-233", bs.dim(1), bs.dim(0)); | |||
| fprintf(pp, " 0=%ld 1=-233 2=-233", bs.dim(1)); | |||
| } | |||
| else if (bs.dim_size() == 3) | |||
| { | |||
| fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(2), bs.dim(1), bs.dim(0)); | |||
| fprintf(pp, " 0=%ld 1=%ld 2=-233", bs.dim(2), bs.dim(1)); | |||
| } | |||
| else // bs.dim_size() == 4 | |||
| { | |||