diff --git a/src/layer/arm/convolution_3x3_pack1to4.h b/src/layer/arm/convolution_3x3_pack1to4.h index 333b66abd..f515cc1d3 100644 --- a/src/layer/arm/convolution_3x3_pack1to4.h +++ b/src/layer/arm/convolution_3x3_pack1to4.h @@ -12,6 +12,545 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +static void conv3x3s1_pack1to4_neon(const Mat& bottom_blob, Mat& top_blob, const Mat& kernel, const Mat& _bias, const Option& opt) +{ + int inch = bottom_blob.c; + int outw = top_blob.w; + int outh = top_blob.h; + int outch = top_blob.c; + + const float* bias = _bias; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p=0; pforward_inplace(top_blob, opt); + } + + return 0; + } + if (kernel_w == 3 && kernel_h == 3 && stride_w == 2 && stride_h == 2 && dilation_w == 1 && dilation_h == 1) { conv3x3s2_pack1to4_neon(bottom_blob_bordered, top_blob, weight_data_pack1to4, bias_data, opt);