Browse Source

fix softmax axis order on 3-dim, fix caffe reshape conversion, regenerate ssd param

tags/20190320
nihui 7 years ago
parent
commit
d85775fbcd
14 changed files with 240 additions and 95 deletions
  1. +1
    -1
      benchmark/mobilenet_ssd.param
  2. +1
    -1
      benchmark/squeezenet_ssd.param
  3. +9
    -9
      src/layer/concat.cpp
  4. +10
    -1
      src/layer/flatten.cpp
  5. +9
    -5
      src/layer/shader/softmax_div_sum.comp
  6. +9
    -5
      src/layer/shader/softmax_div_sum_pack4.comp
  7. +9
    -5
      src/layer/shader/softmax_exp_sub_max.comp
  8. +9
    -5
      src/layer/shader/softmax_exp_sub_max_pack4.comp
  9. +37
    -9
      src/layer/shader/softmax_reduce_max.comp
  10. +41
    -9
      src/layer/shader/softmax_reduce_max_pack4.comp
  11. +38
    -10
      src/layer/shader/softmax_reduce_sum.comp
  12. +42
    -10
      src/layer/shader/softmax_reduce_sum_pack4.comp
  13. +22
    -22
      src/layer/softmax.cpp
  14. +3
    -3
      tools/caffe/caffe2ncnn.cpp

+ 1
- 1
benchmark/mobilenet_ssd.param View File

@@ -123,7 +123,7 @@ PriorBox conv17_2_mbox_priorbox 2 1 conv17_2_conv17_2/relu_splitncnn_0 d
Concat mbox_loc 6 1 conv11_mbox_loc_flat conv13_mbox_loc_flat conv14_2_mbox_loc_flat conv15_2_mbox_loc_flat conv16_2_mbox_loc_flat conv17_2_mbox_loc_flat mbox_loc 0=0
Concat mbox_conf 6 1 conv11_mbox_conf_flat conv13_mbox_conf_flat conv14_2_mbox_conf_flat conv15_2_mbox_conf_flat conv16_2_mbox_conf_flat conv17_2_mbox_conf_flat mbox_conf 0=0
Concat mbox_priorbox 6 1 conv11_mbox_priorbox conv13_mbox_priorbox conv14_2_mbox_priorbox conv15_2_mbox_priorbox conv16_2_mbox_priorbox conv17_2_mbox_priorbox mbox_priorbox 0=1
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=0 3=0
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=-233 3=0
Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1
Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten
DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox detection_out 0=21 1=0.450000 2=100 3=100 4=0.250000

+ 1
- 1
benchmark/squeezenet_ssd.param View File

@@ -175,7 +175,7 @@ PriorBox conv13_2_mbox_priorbox 2 1 conv13_2_conv13_2/relu_splitncnn_0 d
Concat mbox_loc 6 1 fire5_mbox_loc_flat fire9_mbox_loc_flat fire10_mbox_loc_flat fire11_mbox_loc_flat conv12_2_mbox_loc_flat conv13_2_mbox_loc_flat mbox_loc 0=0
Concat mbox_conf 6 1 fire5_mbox_conf_flat fire9_mbox_conf_flat fire10_mbox_conf_flat fire11_mbox_conf_flat conv12_2_mbox_conf_flat conv13_2_mbox_conf_flat mbox_conf 0=0
Concat mbox_priorbox 6 1 fire5_mbox_priorbox fire9_mbox_priorbox fire10_mbox_priorbox fire11_mbox_priorbox conv12_2_mbox_priorbox conv13_2_mbox_priorbox mbox_priorbox 0=1
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=0 3=0
Reshape mbox_conf_reshape 1 1 mbox_conf mbox_conf_reshape 0=21 1=-1 2=-233 3=0
Softmax mbox_conf_softmax 1 1 mbox_conf_reshape mbox_conf_softmax 0=1
Flatten mbox_conf_flatten 1 1 mbox_conf_softmax mbox_conf_flatten
DetectionOutput detection_out 3 1 mbox_loc mbox_conf_flatten mbox_priorbox detection_out 0=21 1=0.450000 2=100 3=100 4=0.250000

+ 9
- 9
src/layer/concat.cpp View File

@@ -370,15 +370,15 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>&
constants[10].i = woffset;

const Pipeline* pipeline = 0;
if (packing == 1 && out_packing == 1)
if (bottom_blob.packing == 1 && out_packing == 1)
{
pipeline = pipeline_concat;
}
else if (packing == 4 && out_packing == 4)
else if (bottom_blob.packing == 4 && out_packing == 4)
{
pipeline = pipeline_concat_pack4;
}
else if (packing == 4 && out_packing == 1)
else if (bottom_blob.packing == 4 && out_packing == 1)
{
pipeline = pipeline_concat_pack4to1;
}
@@ -450,15 +450,15 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>&
constants[10].i = hoffset;

const Pipeline* pipeline = 0;
if (packing == 1 && out_packing == 1)
if (bottom_blob.packing == 1 && out_packing == 1)
{
pipeline = pipeline_concat;
}
else if (packing == 4 && out_packing == 4)
else if (bottom_blob.packing == 4 && out_packing == 4)
{
pipeline = pipeline_concat_pack4;
}
else if (packing == 4 && out_packing == 1)
else if (bottom_blob.packing == 4 && out_packing == 1)
{
pipeline = pipeline_concat_pack4to1;
}
@@ -587,15 +587,15 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>&
constants[10].i = coffset;

const Pipeline* pipeline = 0;
if (packing == 1 && out_packing == 1)
if (bottom_blob.packing == 1 && out_packing == 1)
{
pipeline = pipeline_concat;
}
else if (packing == 4 && out_packing == 4)
else if (bottom_blob.packing == 4 && out_packing == 4)
{
pipeline = pipeline_concat_pack4;
}
else if (packing == 4 && out_packing == 1)
else if (bottom_blob.packing == 4 && out_packing == 1)
{
pipeline = pipeline_concat_pack4to1;
}


+ 10
- 1
src/layer/flatten.cpp View File

@@ -83,12 +83,21 @@ int Flatten::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd,
{
int dims = bottom_blob.dims;

if (dims == 1 || dims == 2)
if (dims == 1)
{
top_blob = bottom_blob;
return 0;
}

if (dims == 2)
{
top_blob = bottom_blob;
top_blob.dims = 1;
top_blob.w = bottom_blob.w * bottom_blob.h;
top_blob.h = 1;
return 0;
}

int w = bottom_blob.w;
int h = bottom_blob.h;
int channels = bottom_blob.c;


+ 9
- 5
src/layer/shader/softmax_div_sum.comp View File

@@ -55,32 +55,36 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gx];
return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gy];
return;
}

if (p.dims == 3 && axis == 0)
{
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[ gy * p.w + gx ];
bottom_top_blob_data[gi] /= sum_workspace_data[gy * p.w + gx];
return;
}

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.w + gx];
return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.h + gy];
return;
}
}

+ 9
- 5
src/layer/shader/softmax_div_sum_pack4.comp View File

@@ -55,32 +55,36 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gx];
return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gy];
return;
}

if (p.dims == 3 && axis == 0)
{
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[ gy * p.w + gx ];
bottom_top_blob_data[gi] /= sum_workspace_data[gy * p.w + gx];
return;
}

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.w + gx];
return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] /= sum_workspace_data[gz * p.h + gy];
return;
}
}

+ 9
- 5
src/layer/shader/softmax_exp_sub_max.comp View File

@@ -55,32 +55,36 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gx]);
return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy]);
return;
}

if (p.dims == 3 && axis == 0)
{
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[ gy * p.w + gx ]);
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy * p.w + gx]);
return;
}

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.w + gx]);
return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.h + gy]);
return;
}
}

+ 9
- 5
src/layer/shader/softmax_exp_sub_max_pack4.comp View File

@@ -55,32 +55,36 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gx]);
return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
int gi = gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy]);
return;
}

if (p.dims == 3 && axis == 0)
{
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[ gy * p.w + gx ]);
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gy * p.w + gx]);
return;
}

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.w + gx]);
return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
int gi = gz * p.cstep + gy * p.w + gx;
bottom_top_blob_data[gi] = exp(bottom_top_blob_data[gi] - max_workspace_data[gz * p.h + gy]);
return;
}
}

+ 37
- 9
src/layer/shader/softmax_reduce_max.comp View File

@@ -44,13 +44,13 @@ void main()
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.w || gy >= p.h || gz >= p.c)
if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

if (p.dims == 1) // axis == 0
{
float max_value = -99999999;
for (int i=0; i<p.w; i++)
for (int i = 0; i < p.w; i++)
{
max_value = max(max_value, bottom_top_blob_data[i]);
}
@@ -61,38 +61,66 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
float max_value = -99999999;
for (int i = 0; i < p.h; i++)
{
int v_offset = i * p.w + gx;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
max_workspace_data[gx] = max_value;

return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
float max_value = -99999999;
for (int i = 0; i < p.w; i++)
{
int v_offset = gx * p.w + i;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
max_workspace_data[gx] = max_value;

return;
}

if (p.dims == 3 && axis == 0)
{
float max_value = -99999999;
for (int z = 0; z < p.c; z++)
for (int i = 0; i < p.c; i++)
{
int v_offset = z * p.cstep + gy * p.w + gx;
int v_offset = i * p.cstep + gy * p.w + gx;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
max_workspace_data[ gy * p.w + gx ] = max_value;
max_workspace_data[gy * p.w + gx] = max_value;

return;
}

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
float max_value = -99999999;
for (int i = 0; i < p.h; i++)
{
int v_offset = gy * p.cstep + i * p.w + gx;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
max_workspace_data[gy * p.w + gx] = max_value;

return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
float max_value = -99999999;
for (int i = 0; i < p.w; i++)
{
int v_offset = gy * p.cstep + gx * p.w + i;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
max_workspace_data[gy * p.h + gx] = max_value;

return;
}
}

+ 41
- 9
src/layer/shader/softmax_reduce_max_pack4.comp View File

@@ -44,13 +44,13 @@ void main()
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.w || gy >= p.h || gz >= p.c)
if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

if (p.dims == 1) // axis == 0
{
vec4 max_value = vec4(-99999999);
for (int i=0; i<p.w; i++)
for (int i = 0; i < p.w; i++)
{
max_value = max(max_value, bottom_top_blob_data[i]);
}
@@ -62,39 +62,71 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
vec4 max_value = vec4(-99999999);
for (int i = 0; i < p.h; i++)
{
int v_offset = i * p.w + gx;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
vec2 max2 = max(max_value.rg, max_value.ba);
max_workspace_data[gx] = max(max2.r, max2.g);

return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
vec4 max_value = vec4(-99999999);
for (int i = 0; i < p.w; i++)
{
int v_offset = gx * p.w + i;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
vec2 max2 = max(max_value.rg, max_value.ba);
max_workspace_data[gx] = max(max2.r, max2.g);

return;
}

if (p.dims == 3 && axis == 0)
{
vec4 max_value = vec4(-99999999);
for (int z = 0; z < p.c; z++)
for (int i = 0; i < p.c; i++)
{
int v_offset = z * p.cstep + gy * p.w + gx;
int v_offset = i * p.cstep + gy * p.w + gx;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
vec2 max2 = max(max_value.rg, max_value.ba);
max_workspace_data[ gy * p.w + gx ] = max(max2.r, max2.g);
max_workspace_data[gy * p.w + gx] = max(max2.r, max2.g);

return;
}

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
vec4 max_value = vec4(-99999999);
for (int i = 0; i < p.h; i++)
{
int v_offset = gy * p.cstep + i * p.w + gx;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
vec2 max2 = max(max_value.rg, max_value.ba);
max_workspace_data[gy * p.w + gx] = max(max2.r, max2.g);

return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
vec4 max_value = vec4(-99999999);
for (int i = 0; i < p.w; i++)
{
int v_offset = gy * p.cstep + gx * p.w + i;
max_value = max(max_value, bottom_top_blob_data[v_offset]);
}
vec2 max2 = max(max_value.rg, max_value.ba);
max_workspace_data[gy * p.h + gx] = max(max2.r, max2.g);

return;
}
}

+ 38
- 10
src/layer/shader/softmax_reduce_sum.comp View File

@@ -44,13 +44,13 @@ void main()
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.w || gy >= p.h || gz >= p.c)
if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

if (p.dims == 1) // axis == 0
{
float sum_value = 0.f;
for (int i=0; i<p.w; i++)
for (int i = 0; i < p.w; i++)
{
sum_value += bottom_top_blob_data[i];
}
@@ -61,38 +61,66 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
float sum_value = 0.f;
for (int i = 0; i < p.h; i++)
{
int v_offset = i * p.w + gx;
sum_value += bottom_top_blob_data[v_offset];
}
sum_workspace_data[gx] = sum_value;

return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
float sum_value = 0.f;
for (int i = 0; i < p.w; i++)
{
int v_offset = gx * p.w + i;
sum_value += bottom_top_blob_data[v_offset];
}
sum_workspace_data[gx] = sum_value;

return;
}

if (p.dims == 3 && axis == 0)
{
float sum_value = 0;
for (int z = 0; z < p.c; z++)
float sum_value = 0.f;
for (int i = 0; i < p.c; i++)
{
int v_offset = z * p.cstep + gy * p.w + gx;
int v_offset = i * p.cstep + gy * p.w + gx;
sum_value += bottom_top_blob_data[v_offset];
}
sum_workspace_data[ gy * p.w + gx ] = sum_value;
sum_workspace_data[gy * p.w + gx] = sum_value;

return;
}

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
float sum_value = 0.f;
for (int i = 0; i < p.h; i++)
{
int v_offset = gy * p.cstep + i * p.w + gx;
sum_value += bottom_top_blob_data[v_offset];
}
sum_workspace_data[gy * p.w + gx] = sum_value;

return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
float sum_value = 0.f;
for (int i = 0; i < p.w; i++)
{
int v_offset = gy * p.cstep + gx * p.w + i;
sum_value += bottom_top_blob_data[v_offset];
}
sum_workspace_data[gy * p.h + gx] = sum_value;

return;
}
}

+ 42
- 10
src/layer/shader/softmax_reduce_sum_pack4.comp View File

@@ -44,13 +44,13 @@ void main()
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);

if (gx >= p.w || gy >= p.h || gz >= p.c)
if (gx >= p.outw || gy >= p.outh || gz >= p.outc)
return;

if (p.dims == 1) // axis == 0
{
vec4 sum_value = vec4(0.0);
for (int i=0; i<p.w; i++)
vec4 sum_value = vec4(0.f);
for (int i = 0; i < p.w; i++)
{
sum_value += bottom_top_blob_data[i];
}
@@ -62,22 +62,38 @@ void main()

if (p.dims == 2 && axis == 0)
{
// FIXME TODO
vec4 sum_value = vec4(0.f);
for (int i = 0; i < p.h; i++)
{
int v_offset = i * p.w + gx;
sum_value += bottom_top_blob_data[v_offset];
}
vec2 sum2 = sum_value.rg + sum_value.ba;
sum_workspace_data[gx] = sum2.r + sum2.g;

return;
}

if (p.dims == 2 && axis == 1)
{
// FIXME TODO
vec4 sum_value = vec4(0.f);
for (int i = 0; i < p.w; i++)
{
int v_offset = gx * p.w + i;
sum_value += bottom_top_blob_data[v_offset];
}
vec2 sum2 = sum_value.rg + sum_value.ba;
sum_workspace_data[gx] = sum2.r + sum2.g;

return;
}

if (p.dims == 3 && axis == 0)
{
vec4 sum_value = vec4(0.0);
for (int z = 0; z < p.c; z++)
vec4 sum_value = vec4(0.f);
for (int i = 0; i < p.c; i++)
{
int v_offset = z * p.cstep + gy * p.w + gx;
int v_offset = i * p.cstep + gy * p.w + gx;
sum_value += bottom_top_blob_data[v_offset];
}
vec2 sum2 = sum_value.rg + sum_value.ba;
@@ -88,13 +104,29 @@ void main()

if (p.dims == 3 && axis == 1)
{
// FIXME TODO
vec4 sum_value = vec4(0.f);
for (int i = 0; i < p.h; i++)
{
int v_offset = gy * p.cstep + i * p.w + gx;
sum_value += bottom_top_blob_data[v_offset];
}
vec2 sum2 = sum_value.rg + sum_value.ba;
sum_workspace_data[gy * p.w + gx] = sum2.r + sum2.g;

return;
}

if (p.dims == 3 && axis == 2)
{
// FIXME TODO
vec4 sum_value = vec4(0.f);
for (int i = 0; i < p.w; i++)
{
int v_offset = gy * p.cstep + gx * p.w + i;
sum_value += bottom_top_blob_data[v_offset];
}
vec2 sum2 = sum_value.rg + sum_value.ba;
sum_workspace_data[gy * p.h + gx] = sum2.r + sum2.g;

return;
}
}

+ 22
- 22
src/layer/softmax.cpp View File

@@ -278,7 +278,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
int channels = bottom_top_blob.c;

Mat max;
max.create(h, channels, elemsize, opt.workspace_allocator);
max.create(w, channels, elemsize, opt.workspace_allocator);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
@@ -290,13 +290,11 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float max = -FLT_MAX;
for (int j=0; j<w; j++)
{
max = std::max(max, ptr[j]);
maxptr[j] = std::max(maxptr[j], ptr[j]);
}

maxptr[i] = max;
ptr += w;
}
}
@@ -309,10 +307,9 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float max = maxptr[i];
for (int j=0; j<w; j++)
{
ptr[j] = exp(ptr[j] - max);
ptr[j] = exp(ptr[j] - maxptr[j]);
}

ptr += w;
@@ -320,7 +317,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
}

Mat sum;
sum.create(h, channels, elemsize, opt.workspace_allocator);
sum.create(w, channels, elemsize, opt.workspace_allocator);
if (sum.empty())
return -100;
sum.fill(0.f);
@@ -332,13 +329,11 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float sum = 0.f;
for (int j=0; j<w; j++)
{
sum += ptr[j];
sumptr[j] += ptr[j];
}

sumptr[i] = sum;
ptr += w;
}
}
@@ -351,10 +346,9 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float sum = sumptr[i];
for (int j=0; j<w; j++)
{
ptr[j] /= sum;
ptr[j] /= sumptr[j];
}

ptr += w;
@@ -371,7 +365,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
int channels = bottom_top_blob.c;

Mat max;
max.create(w, channels, elemsize, opt.workspace_allocator);
max.create(h, channels, elemsize, opt.workspace_allocator);
if (max.empty())
return -100;
max.fill(-FLT_MAX);
@@ -383,11 +377,13 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float max = -FLT_MAX;
for (int j=0; j<w; j++)
{
maxptr[j] = std::max(maxptr[j], ptr[j]);
max = std::max(max, ptr[j]);
}

maxptr[i] = max;
ptr += w;
}
}
@@ -400,9 +396,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float max = maxptr[i];
for (int j=0; j<w; j++)
{
ptr[j] = exp(ptr[j] - maxptr[j]);
ptr[j] = exp(ptr[j] - max);
}

ptr += w;
@@ -410,7 +407,7 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
}

Mat sum;
sum.create(w, channels, elemsize, opt.workspace_allocator);
sum.create(h, channels, elemsize, opt.workspace_allocator);
if (sum.empty())
return -100;
sum.fill(0.f);
@@ -422,11 +419,13 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float sum = 0.f;
for (int j=0; j<w; j++)
{
sumptr[j] += ptr[j];
sum += ptr[j];
}

sumptr[i] = sum;
ptr += w;
}
}
@@ -439,9 +438,10 @@ int Softmax::forward_inplace(Mat& bottom_top_blob, const Option& opt) const

for (int i=0; i<h; i++)
{
float sum = sumptr[i];
for (int j=0; j<w; j++)
{
ptr[j] /= sumptr[j];
ptr[j] /= sum;
}

ptr += w;
@@ -558,13 +558,13 @@ int Softmax::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Optio
}
else if (dims == 3 && axis == 1)
{
max_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
sum_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
max_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
sum_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
}
else if (dims == 3 && axis == 2)
{
max_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
sum_workspace.create(w, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
max_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
sum_workspace.create(h, channels, 4u, opt.workspace_vkallocator, opt.staging_vkallocator);
}

// fprintf(stderr, "Softmax::forward_inplace %p\n", bottom_top_blob.buffer());


+ 3
- 3
tools/caffe/caffe2ncnn.cpp View File

@@ -1482,7 +1482,7 @@ int main(int argc, char** argv)
const caffe::ReorgParameter& reorg_param = layer.reorg_param();
fprintf(pp, " 0=%d", reorg_param.stride());
}
else if (layer.type() == "Reshape")// -1 1 512
else if (layer.type() == "Reshape")
{
const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
const caffe::BlobShape& bs = reshape_param.shape();
@@ -1492,11 +1492,11 @@ int main(int argc, char** argv)
}
else if (bs.dim_size() == 2)
{
fprintf(pp, " 0=%ld 1=%ld 2=-233", bs.dim(1), bs.dim(0));
fprintf(pp, " 0=%ld 1=-233 2=-233", bs.dim(1));
}
else if (bs.dim_size() == 3)
{
fprintf(pp, " 0=%ld 1=%ld 2=%ld", bs.dim(2), bs.dim(1), bs.dim(0));
fprintf(pp, " 0=%ld 1=%ld 2=-233", bs.dim(2), bs.dim(1));
}
else // bs.dim_size() == 4
{


Loading…
Cancel
Save