| @@ -30,8 +30,10 @@ | |||
| * [Dropout](#dropout) | |||
| * [Eltwise](#eltwise) | |||
| * [ELU](#elu) | |||
| * [Embed](#embed) | |||
| * [Exp](#exp) | |||
| * [Flatten](#flatten) | |||
| * [Fold](#fold) | |||
| * [GELU](#gelu) | |||
| * [GLU](#glu) | |||
| * [Gemm](#gemm) | |||
| @@ -84,6 +86,7 @@ | |||
| * [Threshold](#threshold) | |||
| * [Tile](#tile) | |||
| * [UnaryOp](#unaryop) | |||
| * [Unfold](#unfold) | |||
| # AbsVal | |||
| ``` | |||
| @@ -474,12 +477,15 @@ y = crop(x) | |||
| | --------- | ------------- | ----- | --------- | ----------------- | | |||
| | 0 | woffset | int | 0 | | | |||
| | 1 | hoffset | int | 0 | | | |||
| | 2 | coffset | int | 1 | | | |||
| | 3 | outw | int | 1 | | | |||
| | 13 | doffset | int | 0 | | | |||
| | 2 | coffset | int | 0 | | | |||
| | 3 | outw | int | 0 | | | |||
| | 4 | outh | int | 0 | | | |||
| | 14 | outd | int | 0 | | | |||
| | 5 | outc | int | 0 | | | |||
| | 6 | woffset2 | int | 0 | | | |||
| | 7 | hoffset2 | int | 1 | | | |||
| | 7 | hoffset2 | int | 0 | | | |||
| | 15 | doffset2 | int | 0 | | | |||
| | 8 | coffset2 | int | 0 | | | |||
| | 9 | starts | array | [ ] | | | |||
| | 10 | ends | array | [ ] | | | |||
| @@ -819,6 +825,23 @@ else y = x | |||
| | --------- | ------------- | ----- | --------- | ----------------- | | |||
| | 0 | alpha | float | 0.1f | | | |||
| # Embed | |||
| ``` | |||
| y = embedding(x) | |||
| ``` | |||
| | param id | name | type | default | description | | |||
| | --------- | ------------- | ----- | --------- | ----------------- | | |||
| | 0 | num_output | int | 0 | | | |||
| | 1 | input_dim | int | 0 | | | |||
| | 2 | bias_term | int | 0 | | | |||
| | 3 | weight_data_size | int | 0 | | | |||
| | weight | type | shape | | |||
| | ------------- | ----- | --------------------- | | |||
| | weight_data | float | [weight_data_size] | | |||
| | bias_term | float | [num_output] | | |||
| # Exp | |||
| ``` | |||
| if base == -1 y = exp(shift + x * scale) | |||
| @@ -839,6 +862,29 @@ Reshape blob to 1 dimension | |||
| * one_blob_only | |||
| # Fold | |||
| ``` | |||
| y = fold(x) | |||
| ``` | |||
| * one_blob_only | |||
| | param id | name | type | default | description | | |||
| | --------- | ------------- | ----- | --------- | ----------------- | | |||
| | 0 | num_output | int | 0 | | | |||
| | 1 | kernel_w | int | 0 | | | |||
| | 2 | dilation_w | int | 1 | | | |||
| | 3 | stride_w | int | 1 | | | |||
| | 4 | pad_left | int | 0 | | | |||
| | 11 | kernel_h | int | kernel_w | | | |||
| | 12 | dilation_h | int | dilation_w | | | |||
| | 13 | stride_h | int | stride_w | | | |||
| | 14 | pad_top | int | pad_left | | | |||
| | 15 | pad_right | int | pad_left | | | |||
| | 16 | pad_bottom | int | pad_top | | | |||
| | 20 | output_w | int | 0 | | | |||
| | 21 | output_h | int | output_w | | | |||
| # GELU | |||
| ``` | |||
| if fast_gelu == 1 y = 0.5 * x * (1 + tanh(0.79788452 * (x + 0.044715 * x * x * x))); | |||
| @@ -1187,6 +1233,7 @@ y = data | |||
| | 1 | h | int | 0 | | | |||
| | 11 | d | int | 0 | | | |||
| | 2 | c | int | 0 | | | |||
| | 21 | load_type | int | 1 | 1=fp32 | | |||
| | weight | type | shape | | |||
| | ------------- | ----- | --------------------- | | |||
| @@ -1537,6 +1584,7 @@ y = reduce_op(x * coeff) | |||
| | 2 | coeff | float | 1.f | | | |||
| | 3 | axes | array | [ ] | | | |||
| | 4 | keepdims | int | 0 | | | |||
| | 5 | fixbug0 | int | 0 | hack for bug fix, should be 1 | | |||
| Operation type: | |||
| - 0 = SUM | |||
| @@ -1829,3 +1877,24 @@ Operation type: | |||
| - 17 = LOG10 | |||
| - 18 = ROUND | |||
| - 19 = TRUNC | |||
| # Unfold | |||
| ``` | |||
| y = unfold(x) | |||
| ``` | |||
| * one_blob_only | |||
| | param id | name | type | default | description | | |||
| | --------- | ------------- | ----- | --------- | ----------------- | | |||
| | 0 | num_output | int | 0 | | | |||
| | 1 | kernel_w | int | 0 | | | |||
| | 2 | dilation_w | int | 1 | | | |||
| | 3 | stride_w | int | 1 | | | |||
| | 4 | pad_left | int | 0 | | | |||
| | 11 | kernel_h | int | kernel_w | | | |||
| | 12 | dilation_h | int | dilation_w | | | |||
| | 13 | stride_h | int | stride_w | | | |||
| | 14 | pad_top | int | pad_left | | | |||
| | 15 | pad_right | int | pad_left | | | |||
| | 16 | pad_bottom | int | pad_top | | | |||
| @@ -68,7 +68,8 @@ int Convolution1D_arm::create_pipeline(const Option& opt) | |||
| convolution1d_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -233,13 +234,14 @@ int Convolution1D_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector | |||
| } | |||
| #if NCNN_BF16 | |||
| int Convolution1D_arm::create_pipeline_bf16s(const Option& /*opt*/) | |||
| int Convolution1D_arm::create_pipeline_bf16s(const Option& opt) | |||
| { | |||
| const int num_input = weight_data_size / kernel_w / num_output; | |||
| convolution1d_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -36,7 +36,8 @@ int Convolution1D_arm::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -194,7 +194,8 @@ int Convolution_arm::create_pipeline(const Option& opt) | |||
| convolution_dilation1->create_pipeline(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -224,7 +225,8 @@ int Convolution_arm::create_pipeline(const Option& opt) | |||
| else | |||
| conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -270,7 +272,8 @@ int Convolution_arm::create_pipeline(const Option& opt) | |||
| { | |||
| convolution_im2col_gemm_transform_kernel(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -305,7 +308,8 @@ int Convolution_arm::create_pipeline(const Option& opt) | |||
| convolution_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -904,7 +908,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt) | |||
| else | |||
| conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -950,7 +955,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt) | |||
| { | |||
| convolution_im2col_gemm_transform_kernel_bf16s(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -971,7 +977,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt) | |||
| convolution_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -1284,7 +1291,8 @@ int Convolution_arm::create_pipeline_int8_arm(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -108,7 +108,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt) | |||
| else | |||
| conv3x3s1_winograd23_transform_kernel_fp16sa(weight_data, weight_winograd23_data, num_input, num_output, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| if (opt.use_fp16_arithmetic) | |||
| { | |||
| @@ -189,7 +190,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -219,7 +221,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -119,7 +119,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| ncnn::cast_float32_to_bfloat16(weight_data, weight_data_tm, opt); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -161,7 +162,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -169,7 +171,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -1022,7 +1025,8 @@ int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -1030,7 +1034,8 @@ int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -76,7 +76,8 @@ int ConvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -84,7 +85,8 @@ int ConvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -211,7 +211,8 @@ int Deconvolution_arm::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -954,7 +955,8 @@ int Deconvolution_arm::create_pipeline_bf16s(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -154,7 +154,8 @@ int Deconvolution_arm::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -104,7 +104,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| ncnn::cast_float32_to_bfloat16(weight_data_transposed, weight_data_tm, opt); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -190,7 +191,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -145,7 +145,8 @@ int DeconvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -4201,7 +4201,8 @@ int Gemm_arm::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| A_data.release(); | |||
| if (opt.lightmode) | |||
| A_data.release(); | |||
| } | |||
| if (constantB) | |||
| @@ -4241,7 +4242,8 @@ int Gemm_arm::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| B_data.release(); | |||
| if (opt.lightmode) | |||
| B_data.release(); | |||
| } | |||
| if (constantC && constant_broadcast_type_C != -1) | |||
| @@ -4271,7 +4273,8 @@ int Gemm_arm::create_pipeline(const Option& opt) | |||
| CT_data = C2; | |||
| } | |||
| C_data.release(); | |||
| if (opt.lightmode) | |||
| C_data.release(); | |||
| } | |||
| if (constantA || constantB || constantC) | |||
| @@ -4889,7 +4892,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt) | |||
| } | |||
| } | |||
| A_data.release(); | |||
| if (opt.lightmode) | |||
| A_data.release(); | |||
| } | |||
| if (constantB) | |||
| @@ -4929,7 +4933,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt) | |||
| } | |||
| } | |||
| B_data.release(); | |||
| if (opt.lightmode) | |||
| B_data.release(); | |||
| } | |||
| if (constantC && constant_broadcast_type_C != -1) | |||
| @@ -4959,7 +4964,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt) | |||
| CT_data = C2; | |||
| } | |||
| C_data.release(); | |||
| if (opt.lightmode) | |||
| C_data.release(); | |||
| } | |||
| if (constantA || constantB || constantC) | |||
| @@ -2736,7 +2736,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt) | |||
| } | |||
| } | |||
| A_data.release(); | |||
| if (opt.lightmode) | |||
| A_data.release(); | |||
| } | |||
| if (constantB) | |||
| @@ -2776,7 +2777,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt) | |||
| } | |||
| } | |||
| B_data.release(); | |||
| if (opt.lightmode) | |||
| B_data.release(); | |||
| } | |||
| if (constantC && constant_broadcast_type_C != -1) | |||
| @@ -2802,7 +2804,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt) | |||
| } | |||
| } | |||
| C_data.release(); | |||
| if (opt.lightmode) | |||
| C_data.release(); | |||
| } | |||
| if (constantA || constantB || constantC) | |||
| @@ -427,7 +427,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt) | |||
| } | |||
| } | |||
| A_data.release(); | |||
| if (opt.lightmode) | |||
| A_data.release(); | |||
| } | |||
| if (constantB) | |||
| @@ -467,7 +468,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt) | |||
| } | |||
| } | |||
| B_data.release(); | |||
| if (opt.lightmode) | |||
| B_data.release(); | |||
| } | |||
| if (constantC && constant_broadcast_type_C != -1) | |||
| @@ -497,7 +499,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt) | |||
| CT_data = C2; | |||
| } | |||
| C_data.release(); | |||
| if (opt.lightmode) | |||
| C_data.release(); | |||
| } | |||
| if (constantA || constantB || constantC) | |||
| @@ -250,9 +250,12 @@ int GRU_arm::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -1372,9 +1375,12 @@ int GRU_arm::create_pipeline_bf16s(const Option& opt) | |||
| } | |||
| } | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -914,9 +914,12 @@ int GRU_arm::create_pipeline_fp16s(const Option& opt) | |||
| } | |||
| } | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -122,7 +122,8 @@ int InnerProduct_arm::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -865,7 +866,8 @@ int InnerProduct_arm::create_pipeline_bf16s(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -1258,7 +1260,8 @@ int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -41,7 +41,8 @@ int InnerProduct_arm::create_pipeline_fp16s(const Option& opt) | |||
| } | |||
| #endif | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -124,9 +124,12 @@ int LSTM_arm::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -928,9 +931,12 @@ int LSTM_arm::create_pipeline_bf16s(const Option& opt) | |||
| } | |||
| } | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -835,9 +835,12 @@ int LSTM_arm::create_pipeline_fp16s(const Option& opt) | |||
| } | |||
| } | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -84,8 +84,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) | |||
| q_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| q_gemm->create_pipeline(opt); | |||
| q_weight_data.release(); | |||
| q_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| q_weight_data.release(); | |||
| q_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -110,8 +113,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) | |||
| k_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| k_gemm->create_pipeline(opt); | |||
| k_weight_data.release(); | |||
| k_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| k_weight_data.release(); | |||
| k_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -136,8 +142,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) | |||
| v_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| v_gemm->create_pipeline(opt); | |||
| v_weight_data.release(); | |||
| v_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| v_weight_data.release(); | |||
| v_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -160,8 +169,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt) | |||
| o_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| o_gemm->create_pipeline(opt); | |||
| out_weight_data.release(); | |||
| out_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| out_weight_data.release(); | |||
| out_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -139,9 +139,12 @@ int RNN_arm::create_pipeline(const Option& opt) | |||
| bias_c_data_packed = bias_c_data; | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -736,9 +739,12 @@ int RNN_arm::create_pipeline_bf16s(const Option& opt) | |||
| cast_float32_to_bfloat16(bias_c_data, bias_c_data_packed, opt); | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -517,9 +517,12 @@ int RNN_arm::create_pipeline_fp16s(const Option& opt) | |||
| cast_float32_to_float16(bias_c_data, bias_c_data_packed, opt); | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -78,6 +78,9 @@ int Convolution1D_loongarch::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -281,7 +284,7 @@ int Convolution1D_loongarch::forward(const Mat& bottom_blob, Mat& top_blob, cons | |||
| sum = bias_data[p]; | |||
| } | |||
| const float* kptr = (const float*)weight_data + kernel_w * h * p; | |||
| const float* kptr = weight_data_packed.channel(p); | |||
| for (int q = 0; q < h; q++) | |||
| { | |||
| @@ -225,7 +225,8 @@ int Convolution_loongarch::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -789,7 +790,8 @@ int Convolution_loongarch::create_pipeline_int8_loongarch(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -83,7 +83,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -91,7 +92,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -600,7 +602,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option& | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -608,7 +611,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option& | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -126,7 +126,8 @@ int Deconvolution_loongarch::create_pipeline(const Option& opt) | |||
| { | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -82,7 +82,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data_transposed; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -90,7 +91,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -99,7 +99,8 @@ int InnerProduct_loongarch::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -652,7 +653,8 @@ int InnerProduct_loongarch::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -1140,7 +1142,8 @@ int InnerProduct_loongarch::create_pipeline_int8_loongarch(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -78,6 +78,9 @@ int Convolution1D_mips::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -281,7 +284,7 @@ int Convolution1D_mips::forward(const Mat& bottom_blob, Mat& top_blob, const Opt | |||
| sum = bias_data[p]; | |||
| } | |||
| const float* kptr = (const float*)weight_data + kernel_w * h * p; | |||
| const float* kptr = weight_data_packed.channel(p); | |||
| for (int q = 0; q < h; q++) | |||
| { | |||
| @@ -225,7 +225,8 @@ int Convolution_mips::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -789,7 +790,8 @@ int Convolution_mips::create_pipeline_int8_mips(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -83,7 +83,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -91,7 +92,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -600,7 +602,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -608,7 +611,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -126,7 +126,8 @@ int Deconvolution_mips::create_pipeline(const Option& opt) | |||
| { | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -82,7 +82,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data_transposed; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -90,7 +91,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -99,7 +99,8 @@ int InnerProduct_mips::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -652,7 +653,8 @@ int InnerProduct_mips::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -1140,7 +1142,8 @@ int InnerProduct_mips::create_pipeline_int8_mips(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -95,6 +95,9 @@ int Convolution1D_riscv::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -308,7 +311,7 @@ int Convolution1D_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Op | |||
| sum = bias_data[p]; | |||
| } | |||
| const float* kptr = (const float*)weight_data + kernel_w * h * p; | |||
| const float* kptr = weight_data_packed.channel(p); | |||
| for (int q = 0; q < h; q++) | |||
| { | |||
| @@ -470,7 +473,8 @@ int Convolution1D_riscv::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -237,7 +237,8 @@ int Convolution_riscv::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -834,7 +835,8 @@ int Convolution_riscv::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -104,7 +104,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -112,7 +113,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -682,7 +684,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -690,7 +693,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -148,7 +148,8 @@ int Deconvolution_riscv::create_pipeline(const Option& opt) | |||
| { | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -530,7 +531,8 @@ int Deconvolution_riscv::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -97,7 +97,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data_transposed; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -105,7 +106,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -619,7 +621,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -627,7 +630,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -3984,7 +3984,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| A_data.release(); | |||
| if (opt.lightmode) | |||
| A_data.release(); | |||
| } | |||
| if (constantB) | |||
| @@ -4024,7 +4025,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| B_data.release(); | |||
| if (opt.lightmode) | |||
| B_data.release(); | |||
| } | |||
| if (constantC && constant_broadcast_type_C != -1) | |||
| @@ -4054,7 +4056,8 @@ int Gemm_riscv::create_pipeline(const Option& opt) | |||
| CT_data = C2; | |||
| } | |||
| C_data.release(); | |||
| if (opt.lightmode) | |||
| C_data.release(); | |||
| } | |||
| if (constantA || constantB || constantC) | |||
| @@ -714,9 +714,12 @@ int GRU_riscv::create_pipeline_fp16sa(const Option& opt) | |||
| cast_float32_to_float16(weight_hc_data, weight_hc_data_fp16sa, opt); | |||
| cast_float32_to_float16(bias_c_data, bias_c_data_fp16sa, opt); | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -106,7 +106,8 @@ int InnerProduct_riscv::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -560,7 +561,8 @@ int InnerProduct_riscv::create_pipeline_fp16s(const Option& opt) | |||
| ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -156,6 +156,12 @@ int BatchNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| cmd.record_upload(b_data_packed, b_data_gpu, opt); | |||
| } | |||
| if (opt.lightmode) | |||
| { | |||
| a_data.release(); | |||
| b_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -133,8 +133,11 @@ int Convolution1D_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_convolution1d->create(shader_type_index, opt, specializations); | |||
| } | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -1148,8 +1148,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_convolution->create(shader_type_index, opt, specializations); | |||
| } | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -271,8 +271,11 @@ int ConvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_convolutiondepthwise_pack8->create(LayerShaderType::convolutiondepthwise_pack8, opt, specializations); | |||
| } | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -413,8 +416,11 @@ int ConvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_convolutiondepthwise_group_pack8to1->create(LayerShaderType::convolutiondepthwise_group_pack8to1, opt, specializations); | |||
| } | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -366,6 +366,12 @@ int Deconvolution_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_deconvolution_col2im->create(shader_type_index, opt, specializations); | |||
| } | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -462,8 +468,11 @@ int Deconvolution_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_deconvolution->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_deconvolution->create(shader_type_index, opt, specializations); | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -295,8 +295,11 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_deconvolutiondepthwise_pack8->create(LayerShaderType::deconvolutiondepthwise_pack8, opt, specializations); | |||
| } | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -437,8 +440,11 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_deconvolutiondepthwise_group_pack8to1->create(LayerShaderType::deconvolutiondepthwise_group_pack8to1, opt, specializations); | |||
| } | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -100,9 +100,12 @@ int Gemm_vulkan::create_pipeline(const Option& opt) | |||
| pipeline_gemm->create(LayerShaderType::gemm, opt, specializations); | |||
| } | |||
| A_data.release(); | |||
| B_data.release(); | |||
| C_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| A_data.release(); | |||
| B_data.release(); | |||
| C_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -154,8 +154,11 @@ int InnerProduct_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_innerproduct_gemm->create(shader_type_index, opt, specializations); | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -364,14 +367,20 @@ int InnerProduct_vulkan::create_pipeline(const Option& _opt) | |||
| pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz); | |||
| pipeline_innerproduct_gemm->create(shader_type_index, opt, specializations); | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -82,6 +82,11 @@ int MemoryData_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| cmd.record_upload(data_packed, data_gpu, opt, /*bool flatten*/ false); | |||
| } | |||
| if (opt.lightmode) | |||
| { | |||
| data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -73,8 +73,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) | |||
| q_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| q_gemm->create_pipeline(opt); | |||
| q_weight_data.release(); | |||
| q_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| q_weight_data.release(); | |||
| q_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -100,8 +103,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) | |||
| k_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| k_gemm->create_pipeline(opt); | |||
| k_weight_data.release(); | |||
| k_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| k_weight_data.release(); | |||
| k_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -127,8 +133,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) | |||
| v_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| v_gemm->create_pipeline(opt); | |||
| v_weight_data.release(); | |||
| v_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| v_weight_data.release(); | |||
| v_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -222,8 +231,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt) | |||
| o_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| o_gemm->create_pipeline(opt); | |||
| out_weight_data.release(); | |||
| out_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| out_weight_data.release(); | |||
| out_bias_data.release(); | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -264,6 +264,9 @@ int Normalize_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| { | |||
| cmd.record_upload(scale_data_packed, scale_data_gpu, opt); | |||
| } | |||
| if (opt.lightmode) | |||
| scale_data.release(); | |||
| } | |||
| return 0; | |||
| @@ -348,6 +348,11 @@ int Padding_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| cmd.record_upload(per_channel_pad_data_packed, per_channel_pad_data_gpu, opt); | |||
| } | |||
| if (opt.lightmode) | |||
| { | |||
| per_channel_pad_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -144,6 +144,11 @@ int PReLU_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| { | |||
| cmd.record_upload(slope_data_packed, slope_data_gpu, opt); | |||
| } | |||
| if (opt.lightmode) | |||
| { | |||
| slope_data.release(); | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -129,6 +129,13 @@ int PriorBox_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| cmd.record_upload(aspect_ratios, aspect_ratios_gpu, opt); | |||
| if (opt.lightmode) | |||
| { | |||
| min_sizes.release(); | |||
| max_sizes.release(); | |||
| aspect_ratios.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -137,7 +144,7 @@ int PriorBox_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector | |||
| int w = bottom_blobs[0].w; | |||
| int h = bottom_blobs[0].h; | |||
| if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes.empty()) | |||
| if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes_gpu.empty()) | |||
| { | |||
| // mxnet style _contrib_MultiBoxPrior | |||
| float step_w = step_width; | |||
| @@ -147,8 +154,8 @@ int PriorBox_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector | |||
| if (step_h == -233) | |||
| step_h = 1.f / (float)h; | |||
| int num_sizes = min_sizes.w; | |||
| int num_ratios = aspect_ratios.w; | |||
| int num_sizes = min_sizes_gpu.w; | |||
| int num_ratios = aspect_ratios_gpu.w; | |||
| int num_prior = num_sizes - 1 + num_ratios; | |||
| @@ -200,9 +207,9 @@ int PriorBox_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector | |||
| if (step_h == -233) | |||
| step_h = (float)image_h / h; | |||
| int num_min_size = min_sizes.w; | |||
| int num_max_size = max_sizes.w; | |||
| int num_aspect_ratio = aspect_ratios.w; | |||
| int num_min_size = min_sizes_gpu.w; | |||
| int num_max_size = max_sizes_gpu.w; | |||
| int num_aspect_ratio = aspect_ratios_gpu.w; | |||
| int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size; | |||
| if (flip) | |||
| @@ -218,6 +218,12 @@ int Scale_vulkan::upload_model(VkTransfer& cmd, const Option& opt) | |||
| } | |||
| } | |||
| if (opt.lightmode) | |||
| { | |||
| scale_data.release(); | |||
| bias_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -34,7 +34,7 @@ Convolution1D_x86::Convolution1D_x86() | |||
| #endif // __SSE2__ | |||
| } | |||
| int Convolution1D_x86::create_pipeline(const Option& /*opt*/) | |||
| int Convolution1D_x86::create_pipeline(const Option& opt) | |||
| { | |||
| if (dynamic_weight) | |||
| return 0; | |||
| @@ -43,7 +43,8 @@ int Convolution1D_x86::create_pipeline(const Option& /*opt*/) | |||
| convolution1d_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -334,7 +334,8 @@ int Convolution_x86::create_pipeline(const Option& opt) | |||
| convolution_dilation1->create_pipeline(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -451,7 +452,8 @@ int Convolution_x86::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -542,7 +544,8 @@ int Convolution_x86::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -1250,7 +1253,8 @@ int Convolution_x86::create_pipeline_int8_x86(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -132,7 +132,8 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -140,7 +141,8 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -843,7 +845,8 @@ int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt) | |||
| weight_data_tm = weight_data; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -851,7 +854,8 @@ int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -193,7 +193,8 @@ int Deconvolution_x86::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -109,7 +109,8 @@ int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt) | |||
| weight_data_tm = weight_data_transposed; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -117,7 +118,8 @@ int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt) | |||
| // group convolution | |||
| create_group_ops(opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -203,7 +203,8 @@ int DeformableConv2D_x86::create_pipeline(const Option& opt) | |||
| deformableconv2d_transform_kernel_packed_sse(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h, elempack, out_elempack); | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -7235,7 +7235,8 @@ int Gemm_x86::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| A_data.release(); | |||
| if (opt.lightmode) | |||
| A_data.release(); | |||
| } | |||
| if (constantB) | |||
| @@ -7279,7 +7280,8 @@ int Gemm_x86::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| B_data.release(); | |||
| if (opt.lightmode) | |||
| B_data.release(); | |||
| } | |||
| if (constantC && constant_broadcast_type_C != -1) | |||
| @@ -7315,7 +7317,8 @@ int Gemm_x86::create_pipeline(const Option& opt) | |||
| CT_data = C2; | |||
| } | |||
| C_data.release(); | |||
| if (opt.lightmode) | |||
| C_data.release(); | |||
| } | |||
| if (constantA || constantB || constantC) | |||
| @@ -80,7 +80,8 @@ int InnerProduct_x86::create_pipeline(const Option& opt) | |||
| innerproduct_transform_kernel_sse(weight_data, weight_data_tm, num_input, num_output, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -175,7 +176,8 @@ int InnerProduct_x86::create_pipeline_fp16s(const Option& opt) | |||
| innerproduct_transform_kernel_fp16s_sse(weight_data, weight_data_tm, num_input, num_output, opt); | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -281,7 +283,8 @@ int InnerProduct_x86::create_pipeline_int8_x86(const Option& opt) | |||
| scale_in_data[p] = scale_in; | |||
| } | |||
| weight_data.release(); | |||
| if (opt.lightmode) | |||
| weight_data.release(); | |||
| return 0; | |||
| } | |||
| @@ -182,9 +182,12 @@ int LSTM_x86::create_pipeline(const Option& opt) | |||
| } | |||
| } | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| weight_xc_data.release(); | |||
| bias_c_data.release(); | |||
| weight_hc_data.release(); | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -65,8 +65,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) | |||
| q_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| q_gemm->create_pipeline(opt); | |||
| q_weight_data.release(); | |||
| q_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| q_weight_data.release(); | |||
| q_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -91,8 +94,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) | |||
| k_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| k_gemm->create_pipeline(opt); | |||
| k_weight_data.release(); | |||
| k_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| k_weight_data.release(); | |||
| k_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -117,8 +123,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) | |||
| v_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| v_gemm->create_pipeline(opt); | |||
| v_weight_data.release(); | |||
| v_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| v_weight_data.release(); | |||
| v_bias_data.release(); | |||
| } | |||
| } | |||
| { | |||
| @@ -193,8 +202,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt) | |||
| o_gemm->load_model(ModelBinFromMatArray(weights)); | |||
| o_gemm->create_pipeline(opt); | |||
| out_weight_data.release(); | |||
| out_bias_data.release(); | |||
| if (opt.lightmode) | |||
| { | |||
| out_weight_data.release(); | |||
| out_bias_data.release(); | |||
| } | |||
| } | |||
| return 0; | |||
| @@ -32,6 +32,7 @@ | |||
| #include "layer/batchnorm.h" | |||
| #include "layer/bias.h" | |||
| #include "layer/binaryop.h" | |||
| #include "layer/celu.h" | |||
| #include "layer/clip.h" | |||
| #include "layer/concat.h" | |||
| #include "layer/convolution.h" | |||
| @@ -51,6 +52,7 @@ | |||
| #include "layer/deconvolutiondepthwise3d.h" | |||
| #include "layer/deformableconv2d.h" | |||
| #include "layer/detectionoutput.h" | |||
| #include "layer/diag.h" | |||
| #include "layer/dropout.h" | |||
| #include "layer/eltwise.h" | |||
| #include "layer/elu.h" | |||
| @@ -835,6 +837,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| fprintf_param_value(" 1=%d", with_scalar) | |||
| fprintf_param_value(" 2=%e", b) | |||
| } | |||
| else if (layer->type == "CELU") | |||
| { | |||
| ncnn::CELU* op = (ncnn::CELU*)layer; | |||
| ncnn::CELU* op_default = (ncnn::CELU*)layer_default; | |||
| fprintf_param_value(" 0=%e", alpha) | |||
| } | |||
| else if (layer->type == "Clip") | |||
| { | |||
| ncnn::Clip* op = (ncnn::Clip*)layer; | |||
| @@ -888,18 +897,21 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| } | |||
| fprintf_param_value(" 19=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| #if NCNN_INT8 | |||
| // write int8_scale data | |||
| if (op->int8_scale_term) | |||
| { | |||
| fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); | |||
| fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); | |||
| fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); | |||
| } | |||
| // write int8_scale data | |||
| if (op->int8_scale_term) | |||
| { | |||
| fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); | |||
| fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); | |||
| fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -931,9 +943,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| { | |||
| if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); | |||
| } | |||
| fprintf_param_value(" 19=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -1040,32 +1056,35 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| } | |||
| fprintf_param_value(" 19=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| #if NCNN_INT8 | |||
| // write int8_scale data | |||
| if (op->int8_scale_term == 1 || op->int8_scale_term == 101) | |||
| { | |||
| op->bottom_blob_int8_scales.w = 1; | |||
| } | |||
| if (op->int8_scale_term == 2 || op->int8_scale_term == 102) | |||
| { | |||
| op->weight_data_int8_scales.w = 1; | |||
| op->bottom_blob_int8_scales.w = 1; | |||
| } | |||
| if (op->int8_scale_term > 100) | |||
| { | |||
| op->top_blob_int8_scales.w = 1; | |||
| } | |||
| // write int8_scale data | |||
| if (op->int8_scale_term == 1 || op->int8_scale_term == 101) | |||
| { | |||
| op->bottom_blob_int8_scales.w = 1; | |||
| } | |||
| if (op->int8_scale_term == 2 || op->int8_scale_term == 102) | |||
| { | |||
| op->weight_data_int8_scales.w = 1; | |||
| op->bottom_blob_int8_scales.w = 1; | |||
| } | |||
| if (op->int8_scale_term > 100) | |||
| { | |||
| op->top_blob_int8_scales.w = 1; | |||
| } | |||
| if (op->int8_scale_term) | |||
| { | |||
| fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); | |||
| fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); | |||
| fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); | |||
| } | |||
| if (op->int8_scale_term) | |||
| { | |||
| fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100); | |||
| fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1); | |||
| fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1); | |||
| } | |||
| #endif // NCNN_INT8 | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -1098,9 +1117,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| { | |||
| if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); | |||
| } | |||
| fprintf_param_value(" 19=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -1261,9 +1284,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| { | |||
| if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); | |||
| } | |||
| fprintf_param_value(" 28=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -1296,9 +1323,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| { | |||
| if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); | |||
| } | |||
| fprintf_param_value(" 28=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -1418,9 +1449,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| { | |||
| if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); | |||
| } | |||
| fprintf_param_value(" 28=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -1454,9 +1489,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| { | |||
| if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp); | |||
| } | |||
| fprintf_param_value(" 28=%d", dynamic_weight) | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| if (op->dynamic_weight == 0) | |||
| { | |||
| fwrite_weight_tag_data(op->weight_data, bp); | |||
| fwrite_weight_data(op->bias_data, bp); | |||
| } | |||
| if (shape_ready) | |||
| { | |||
| @@ -1597,6 +1636,13 @@ int ModelWriter::save(const char* parampath, const char* binpath) | |||
| fprintf_param_value(" 7=%e", variances[2]) | |||
| fprintf_param_value(" 8=%e", variances[3]) | |||
| } | |||
| else if (layer->type == "Diag") | |||
| { | |||
| ncnn::Diag* op = (ncnn::Diag*)layer; | |||
| ncnn::Diag* op_default = (ncnn::Diag*)layer_default; | |||
| fprintf_param_value(" 0=%d", diagonal) | |||
| } | |||
| else if (layer->type == "Dropout") | |||
| { | |||
| ncnn::Dropout* op = (ncnn::Dropout*)layer; | |||