| @@ -245,6 +245,10 @@ static int convolutiondepthwise(const Mat& bottom_blob, Mat& top_blob, const Mat | |||
| float* outptr = top_blob.channel(g * outch_g + p); | |||
| const float* weight_data_ptr = (const float*)weight_data + maxk * inch_g * outch_g * g; | |||
| // shadowed variable for less openmp task args | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -181,6 +181,11 @@ int ConvolutionDepthWise3D::forward(const Mat& bottom_blob, Mat& top_blob, const | |||
| float* outptr = top_blob.channel(g * num_output_g + p); | |||
| const float* weight_data_ptr = (const float*)weight_data + maxk * channels_g * num_output_g * g; | |||
| // shadowed variable for less openmp task args | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outd = top_blob.d; | |||
| for (int z = 0; z < outd; z++) | |||
| { | |||
| for (int i = 0; i < outh; i++) | |||
| @@ -67,16 +67,9 @@ int Deconvolution::load_model(const ModelBin& mb) | |||
| static int deconvolution(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int stride_w, int stride_h, int dilation_w, int dilation_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int inch = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outch = top_blob.c; | |||
| const int bias_term = bias_data.empty() ? 0 : 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| // kernel offsets | |||
| @@ -103,10 +96,17 @@ static int deconvolution(const Mat& bottom_blob, Mat& top_blob, const Mat& weigh | |||
| { | |||
| Mat out = top_blob.channel(p); | |||
| const float bias = bias_term ? bias_data[p] : 0.f; | |||
| const float bias = bias_data.empty() ? 0.f : bias_data[p]; | |||
| out.fill(bias); | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int inch = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| for (int j = 0; j < w; j++) | |||
| @@ -74,18 +74,10 @@ int Deconvolution3D::load_model(const ModelBin& mb) | |||
| static int deconvolution3d(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int kernel_d, int stride_w, int stride_h, int stride_d, int dilation_w, int dilation_h, int dilation_d, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int d = bottom_blob.d; | |||
| const int inch = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outd = top_blob.d; | |||
| const int outch = top_blob.c; | |||
| const int bias_term = bias_data.empty() ? 0 : 1; | |||
| const int maxk = kernel_w * kernel_h * kernel_d; | |||
| // kernel offsets | |||
| @@ -117,10 +109,19 @@ static int deconvolution3d(const Mat& bottom_blob, Mat& top_blob, const Mat& wei | |||
| { | |||
| Mat out = top_blob.channel(p); | |||
| const float bias = bias_term ? bias_data[p] : 0.f; | |||
| const float bias = bias_data.empty() ? 0.f : bias_data[p]; | |||
| out.fill(bias); | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int d = bottom_blob.d; | |||
| const int inch = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outd = top_blob.d; | |||
| for (int z = 0; z < d; z++) | |||
| { | |||
| for (int i = 0; i < h; i++) | |||
| @@ -68,16 +68,11 @@ int DeconvolutionDepthWise::load_model(const ModelBin& mb) | |||
| static int deconvolutiondepthwise(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int stride_w, int stride_h, int dilation_w, int dilation_h, int group, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int inch = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outch = top_blob.c; | |||
| const int bias_term = bias_data.empty() ? 0 : 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| // kernel offsets | |||
| @@ -109,10 +104,16 @@ static int deconvolutiondepthwise(const Mat& bottom_blob, Mat& top_blob, const M | |||
| const float* kptr = (const float*)weight_data + maxk * g; | |||
| Mat out = top_blob.channel(g); | |||
| const float bias = bias_term ? bias_data[g] : 0.f; | |||
| const float bias = bias_data.empty() ? 0.f : bias_data[g]; | |||
| out.fill(bias); | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| for (int j = 0; j < w; j++) | |||
| @@ -157,10 +158,17 @@ static int deconvolutiondepthwise(const Mat& bottom_blob, Mat& top_blob, const M | |||
| Mat out = top_blob.channel(g * outch_g + p); | |||
| const float* weight_data_ptr = (const float*)weight_data + maxk * inch_g * outch_g * g; | |||
| const float bias = bias_term ? bias_data[g * outch_g + p] : 0.f; | |||
| const float bias = bias_data.empty() ? 0.f : bias_data[g * outch_g + p]; | |||
| out.fill(bias); | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < h; i++) | |||
| { | |||
| for (int j = 0; j < w; j++) | |||
| @@ -75,18 +75,12 @@ int DeconvolutionDepthWise3D::load_model(const ModelBin& mb) | |||
| static int deconvolutiondepthwise3d(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data, const Mat& bias_data, int kernel_w, int kernel_h, int kernel_d, int stride_w, int stride_h, int stride_d, int dilation_w, int dilation_h, int dilation_d, int group, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int d = bottom_blob.d; | |||
| const int inch = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outd = top_blob.d; | |||
| const int outch = top_blob.c; | |||
| const int bias_term = bias_data.empty() ? 0 : 1; | |||
| const int maxk = kernel_w * kernel_h * kernel_d; | |||
| // kernel offsets | |||
| @@ -123,10 +117,18 @@ static int deconvolutiondepthwise3d(const Mat& bottom_blob, Mat& top_blob, const | |||
| const float* kptr = (const float*)weight_data + maxk * g; | |||
| Mat out = top_blob.channel(g); | |||
| const float bias = bias_term ? bias_data[g] : 0.f; | |||
| const float bias = bias_data.empty() ? 0.f : bias_data[g]; | |||
| out.fill(bias); | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int d = bottom_blob.d; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outd = top_blob.d; | |||
| for (int z = 0; z < d; z++) | |||
| { | |||
| for (int i = 0; i < h; i++) | |||
| @@ -174,10 +176,19 @@ static int deconvolutiondepthwise3d(const Mat& bottom_blob, Mat& top_blob, const | |||
| Mat out = top_blob.channel(g * outch_g + p); | |||
| const float* weight_data_ptr = (const float*)weight_data + maxk * inch_g * outch_g * g; | |||
| const float bias = bias_term ? bias_data[g * outch_g + p] : 0.f; | |||
| const float bias = bias_data.empty() ? 0.f : bias_data[g * outch_g + p]; | |||
| out.fill(bias); | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int d = bottom_blob.d; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| const int outd = top_blob.d; | |||
| for (int z = 0; z < d; z++) | |||
| { | |||
| for (int i = 0; i < h; i++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack16_avx512(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack16_avx512(const Mat& bottom_blob, Mat& top_blob, c | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack16to1_avx512(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack16to1_avx512(const Mat& bottom_blob, Mat& top_blob | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack16to4_avx512(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack16to4_avx512(const Mat& bottom_blob, Mat& top_blob | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack16to8_avx512(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack16to8_avx512(const Mat& bottom_blob, Mat& top_blob | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack1to16_avx512(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack1to16_avx512(const Mat& bottom_blob, Mat& top_blob | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack1to4_sse(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack1to4_sse(const Mat& bottom_blob, Mat& top_blob, co | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack1to8_avx(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack1to8_avx(const Mat& bottom_blob, Mat& top_blob, co | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack4_sse(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack4_sse(const Mat& bottom_blob, Mat& top_blob, const | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack4to1_sse(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack4to1_sse(const Mat& bottom_blob, Mat& top_blob, co | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack4to16_avx512(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack4to16_avx512(const Mat& bottom_blob, Mat& top_blob | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack4to8_avx(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack4to8_avx(const Mat& bottom_blob, Mat& top_blob, co | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack8_avx(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack8_avx(const Mat& bottom_blob, Mat& top_blob, const | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack8to1_avx(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack8to1_avx(const Mat& bottom_blob, Mat& top_blob, co | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack8to16_avx512(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack8to16_avx512(const Mat& bottom_blob, Mat& top_blob | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -14,19 +14,11 @@ | |||
| static void deconvolution_pack8to4_avx(const Mat& bottom_blob, Mat& top_blob, const Mat& weight_data_packed, const Mat& bias_data, int kernel_w, int kernel_h, int dilation_w, int dilation_h, int stride_w, int stride_h, int activation_type, const Mat& activation_params, const Option& opt) | |||
| { | |||
| int w = bottom_blob.w; | |||
| int h = bottom_blob.h; | |||
| int channels = bottom_blob.c; | |||
| int outw = top_blob.w; | |||
| int outh = top_blob.h; | |||
| int outch = top_blob.c; | |||
| const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; | |||
| const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; | |||
| const int maxk = kernel_w * kernel_h; | |||
| const float* bias_data_ptr = bias_data; | |||
| // num_output | |||
| @@ -35,6 +27,15 @@ static void deconvolution_pack8to4_avx(const Mat& bottom_blob, Mat& top_blob, co | |||
| { | |||
| float* outptr = top_blob.channel(p); | |||
| const int maxk = kernel_w * kernel_h; | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob.w; | |||
| const int outh = top_blob.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||
| @@ -311,6 +311,13 @@ int Deconvolution_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opti | |||
| { | |||
| float* outptr = top_blob_bordered.channel(p); | |||
| // shadowed variable for less openmp task args | |||
| const int w = bottom_blob.w; | |||
| const int h = bottom_blob.h; | |||
| const int channels = bottom_blob.c; | |||
| const int outw = top_blob_bordered.w; | |||
| const int outh = top_blob_bordered.h; | |||
| for (int i = 0; i < outh; i++) | |||
| { | |||
| for (int j = 0; j < outw; j++) | |||