Browse Source

update ncnnoptimize layers, lightmode=false keeps original weight (#5414)

tags/20240410
nihui GitHub 2 years ago
parent
commit
db035d602d
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
69 changed files with 614 additions and 241 deletions
  1. +72
    -3
      docs/developer-guide/operators.md
  2. +5
    -3
      src/layer/arm/convolution1d_arm.cpp
  3. +2
    -1
      src/layer/arm/convolution1d_arm_asimdhp.cpp
  4. +16
    -8
      src/layer/arm/convolution_arm.cpp
  5. +6
    -3
      src/layer/arm/convolution_arm_asimdhp.cpp
  6. +10
    -5
      src/layer/arm/convolutiondepthwise_arm.cpp
  7. +4
    -2
      src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp
  8. +4
    -2
      src/layer/arm/deconvolution_arm.cpp
  9. +2
    -1
      src/layer/arm/deconvolution_arm_asimdhp.cpp
  10. +4
    -2
      src/layer/arm/deconvolutiondepthwise_arm.cpp
  11. +2
    -1
      src/layer/arm/deconvolutiondepthwise_arm_asimdhp.cpp
  12. +12
    -6
      src/layer/arm/gemm_arm.cpp
  13. +6
    -3
      src/layer/arm/gemm_arm_asimdhp.cpp
  14. +6
    -3
      src/layer/arm/gemm_arm_vfpv4.cpp
  15. +12
    -6
      src/layer/arm/gru_arm.cpp
  16. +6
    -3
      src/layer/arm/gru_arm_asimdhp.cpp
  17. +6
    -3
      src/layer/arm/innerproduct_arm.cpp
  18. +2
    -1
      src/layer/arm/innerproduct_arm_vfpv4.cpp
  19. +12
    -6
      src/layer/arm/lstm_arm.cpp
  20. +6
    -3
      src/layer/arm/lstm_arm_asimdhp.cpp
  21. +20
    -8
      src/layer/arm/multiheadattention_arm.cpp
  22. +12
    -6
      src/layer/arm/rnn_arm.cpp
  23. +6
    -3
      src/layer/arm/rnn_arm_asimdhp.cpp
  24. +4
    -1
      src/layer/loongarch/convolution1d_loongarch.cpp
  25. +4
    -2
      src/layer/loongarch/convolution_loongarch.cpp
  26. +8
    -4
      src/layer/loongarch/convolutiondepthwise_loongarch.cpp
  27. +2
    -1
      src/layer/loongarch/deconvolution_loongarch.cpp
  28. +4
    -2
      src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp
  29. +6
    -3
      src/layer/loongarch/innerproduct_loongarch.cpp
  30. +4
    -1
      src/layer/mips/convolution1d_mips.cpp
  31. +4
    -2
      src/layer/mips/convolution_mips.cpp
  32. +8
    -4
      src/layer/mips/convolutiondepthwise_mips.cpp
  33. +2
    -1
      src/layer/mips/deconvolution_mips.cpp
  34. +4
    -2
      src/layer/mips/deconvolutiondepthwise_mips.cpp
  35. +6
    -3
      src/layer/mips/innerproduct_mips.cpp
  36. +6
    -2
      src/layer/riscv/convolution1d_riscv.cpp
  37. +4
    -2
      src/layer/riscv/convolution_riscv.cpp
  38. +8
    -4
      src/layer/riscv/convolutiondepthwise_riscv.cpp
  39. +4
    -2
      src/layer/riscv/deconvolution_riscv.cpp
  40. +8
    -4
      src/layer/riscv/deconvolutiondepthwise_riscv.cpp
  41. +6
    -3
      src/layer/riscv/gemm_riscv.cpp
  42. +6
    -3
      src/layer/riscv/gru_riscv.cpp
  43. +4
    -2
      src/layer/riscv/innerproduct_riscv.cpp
  44. +6
    -0
      src/layer/vulkan/batchnorm_vulkan.cpp
  45. +5
    -2
      src/layer/vulkan/convolution1d_vulkan.cpp
  46. +5
    -2
      src/layer/vulkan/convolution_vulkan.cpp
  47. +10
    -4
      src/layer/vulkan/convolutiondepthwise_vulkan.cpp
  48. +11
    -2
      src/layer/vulkan/deconvolution_vulkan.cpp
  49. +10
    -4
      src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp
  50. +6
    -3
      src/layer/vulkan/gemm_vulkan.cpp
  51. +15
    -6
      src/layer/vulkan/innerproduct_vulkan.cpp
  52. +5
    -0
      src/layer/vulkan/memorydata_vulkan.cpp
  53. +20
    -8
      src/layer/vulkan/multiheadattention_vulkan.cpp
  54. +3
    -0
      src/layer/vulkan/normalize_vulkan.cpp
  55. +5
    -0
      src/layer/vulkan/padding_vulkan.cpp
  56. +5
    -0
      src/layer/vulkan/prelu_vulkan.cpp
  57. +13
    -6
      src/layer/vulkan/priorbox_vulkan.cpp
  58. +6
    -0
      src/layer/vulkan/scale_vulkan.cpp
  59. +3
    -2
      src/layer/x86/convolution1d_x86.cpp
  60. +8
    -4
      src/layer/x86/convolution_x86.cpp
  61. +8
    -4
      src/layer/x86/convolutiondepthwise_x86.cpp
  62. +2
    -1
      src/layer/x86/deconvolution_x86.cpp
  63. +4
    -2
      src/layer/x86/deconvolutiondepthwise_x86.cpp
  64. +2
    -1
      src/layer/x86/deformableconv2d_x86.cpp
  65. +6
    -3
      src/layer/x86/gemm_x86.cpp
  66. +6
    -3
      src/layer/x86/innerproduct_x86.cpp
  67. +6
    -3
      src/layer/x86/lstm_x86.cpp
  68. +20
    -8
      src/layer/x86/multiheadattention_x86.cpp
  69. +89
    -43
      tools/modelwriter.h

+ 72
- 3
docs/developer-guide/operators.md View File

@@ -30,8 +30,10 @@
* [Dropout](#dropout)
* [Eltwise](#eltwise)
* [ELU](#elu)
* [Embed](#embed)
* [Exp](#exp)
* [Flatten](#flatten)
* [Fold](#fold)
* [GELU](#gelu)
* [GLU](#glu)
* [Gemm](#gemm)
@@ -84,6 +86,7 @@
* [Threshold](#threshold)
* [Tile](#tile)
* [UnaryOp](#unaryop)
* [Unfold](#unfold)

# AbsVal
```
@@ -474,12 +477,15 @@ y = crop(x)
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | woffset | int | 0 | |
| 1 | hoffset | int | 0 | |
| 2 | coffset | int | 1 | |
| 3 | outw | int | 1 | |
| 13 | doffset | int | 0 | |
| 2 | coffset | int | 0 | |
| 3 | outw | int | 0 | |
| 4 | outh | int | 0 | |
| 14 | outd | int | 0 | |
| 5 | outc | int | 0 | |
| 6 | woffset2 | int | 0 | |
| 7 | hoffset2 | int | 1 | |
| 7 | hoffset2 | int | 0 | |
| 15 | doffset2 | int | 0 | |
| 8 | coffset2 | int | 0 | |
| 9 | starts | array | [ ] | |
| 10 | ends | array | [ ] | |
@@ -819,6 +825,23 @@ else y = x
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | alpha | float | 0.1f | |

# Embed
```
y = embedding(x)
```

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | num_output | int | 0 | |
| 1 | input_dim | int | 0 | |
| 2 | bias_term | int | 0 | |
| 3 | weight_data_size | int | 0 | |

| weight | type | shape |
| ------------- | ----- | --------------------- |
| weight_data | float | [weight_data_size] |
| bias_term | float | [num_output] |

# Exp
```
if base == -1 y = exp(shift + x * scale)
@@ -839,6 +862,29 @@ Reshape blob to 1 dimension

* one_blob_only

# Fold
```
y = fold(x)
```

* one_blob_only

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | num_output | int | 0 | |
| 1 | kernel_w | int | 0 | |
| 2 | dilation_w | int | 1 | |
| 3 | stride_w | int | 1 | |
| 4 | pad_left | int | 0 | |
| 11 | kernel_h | int | kernel_w | |
| 12 | dilation_h | int | dilation_w | |
| 13 | stride_h | int | stride_w | |
| 14 | pad_top | int | pad_left | |
| 15 | pad_right | int | pad_left | |
| 16 | pad_bottom | int | pad_top | |
| 20 | output_w | int | 0 | |
| 21 | output_h | int | output_w | |

# GELU
```
if fast_gelu == 1 y = 0.5 * x * (1 + tanh(0.79788452 * (x + 0.044715 * x * x * x)));
@@ -1187,6 +1233,7 @@ y = data
| 1 | h | int | 0 | |
| 11 | d | int | 0 | |
| 2 | c | int | 0 | |
| 21 | load_type | int | 1 | 1=fp32 |

| weight | type | shape |
| ------------- | ----- | --------------------- |
@@ -1537,6 +1584,7 @@ y = reduce_op(x * coeff)
| 2 | coeff | float | 1.f | |
| 3 | axes | array | [ ] | |
| 4 | keepdims | int | 0 | |
| 5 | fixbug0 | int | 0 | hack for bug fix, should be 1 |

Operation type:
- 0 = SUM
@@ -1829,3 +1877,24 @@ Operation type:
- 17 = LOG10
- 18 = ROUND
- 19 = TRUNC

# Unfold
```
y = unfold(x)
```

* one_blob_only

| param id | name | type | default | description |
| --------- | ------------- | ----- | --------- | ----------------- |
| 0 | num_output | int | 0 | |
| 1 | kernel_w | int | 0 | |
| 2 | dilation_w | int | 1 | |
| 3 | stride_w | int | 1 | |
| 4 | pad_left | int | 0 | |
| 11 | kernel_h | int | kernel_w | |
| 12 | dilation_h | int | dilation_w | |
| 13 | stride_h | int | stride_w | |
| 14 | pad_top | int | pad_left | |
| 15 | pad_right | int | pad_left | |
| 16 | pad_bottom | int | pad_top | |

+ 5
- 3
src/layer/arm/convolution1d_arm.cpp View File

@@ -68,7 +68,8 @@ int Convolution1D_arm::create_pipeline(const Option& opt)

convolution1d_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -233,13 +234,14 @@ int Convolution1D_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector
}

#if NCNN_BF16
int Convolution1D_arm::create_pipeline_bf16s(const Option& /*opt*/)
int Convolution1D_arm::create_pipeline_bf16s(const Option& opt)
{
const int num_input = weight_data_size / kernel_w / num_output;

convolution1d_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/arm/convolution1d_arm_asimdhp.cpp View File

@@ -36,7 +36,8 @@ int Convolution1D_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 16
- 8
src/layer/arm/convolution_arm.cpp View File

@@ -194,7 +194,8 @@ int Convolution_arm::create_pipeline(const Option& opt)

convolution_dilation1->create_pipeline(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -224,7 +225,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
else
conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -270,7 +272,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
{
convolution_im2col_gemm_transform_kernel(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -305,7 +308,8 @@ int Convolution_arm::create_pipeline(const Option& opt)
convolution_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -904,7 +908,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt)
else
conv3x3s1_winograd23_transform_kernel(weight_data, weight_winograd23_data, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -950,7 +955,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt)
{
convolution_im2col_gemm_transform_kernel_bf16s(weight_data, weight_sgemm_data, num_input, num_output, kernel_w, kernel_h, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -971,7 +977,8 @@ int Convolution_arm::create_pipeline_bf16s(const Option& opt)
convolution_transform_kernel_packed_bf16s(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -1284,7 +1291,8 @@ int Convolution_arm::create_pipeline_int8_arm(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 3
src/layer/arm/convolution_arm_asimdhp.cpp View File

@@ -108,7 +108,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt)
else
conv3x3s1_winograd23_transform_kernel_fp16sa(weight_data, weight_winograd23_data, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

if (opt.use_fp16_arithmetic)
{
@@ -189,7 +190,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -219,7 +221,8 @@ int Convolution_arm::create_pipeline_fp16s(const Option& opt)
ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 10
- 5
src/layer/arm/convolutiondepthwise_arm.cpp View File

@@ -119,7 +119,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
ncnn::cast_float32_to_bfloat16(weight_data, weight_data_tm, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -161,7 +162,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -169,7 +171,8 @@ int ConvolutionDepthWise_arm::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -1022,7 +1025,8 @@ int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -1030,7 +1034,8 @@ int ConvolutionDepthWise_arm::create_pipeline_int8_arm(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/arm/convolutiondepthwise_arm_asimdhp.cpp View File

@@ -76,7 +76,8 @@ int ConvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -84,7 +85,8 @@ int ConvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/arm/deconvolution_arm.cpp View File

@@ -211,7 +211,8 @@ int Deconvolution_arm::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -954,7 +955,8 @@ int Deconvolution_arm::create_pipeline_bf16s(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/arm/deconvolution_arm_asimdhp.cpp View File

@@ -154,7 +154,8 @@ int Deconvolution_arm::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/arm/deconvolutiondepthwise_arm.cpp View File

@@ -104,7 +104,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
ncnn::cast_float32_to_bfloat16(weight_data_transposed, weight_data_tm, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -190,7 +191,8 @@ int DeconvolutionDepthWise_arm::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/arm/deconvolutiondepthwise_arm_asimdhp.cpp View File

@@ -145,7 +145,8 @@ int DeconvolutionDepthWise_arm::create_pipeline_fp16s(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 12
- 6
src/layer/arm/gemm_arm.cpp View File

@@ -4201,7 +4201,8 @@ int Gemm_arm::create_pipeline(const Option& opt)
}
}

A_data.release();
if (opt.lightmode)
A_data.release();
}

if (constantB)
@@ -4241,7 +4242,8 @@ int Gemm_arm::create_pipeline(const Option& opt)
}
}

B_data.release();
if (opt.lightmode)
B_data.release();
}

if (constantC && constant_broadcast_type_C != -1)
@@ -4271,7 +4273,8 @@ int Gemm_arm::create_pipeline(const Option& opt)
CT_data = C2;
}

C_data.release();
if (opt.lightmode)
C_data.release();
}

if (constantA || constantB || constantC)
@@ -4889,7 +4892,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt)
}
}

A_data.release();
if (opt.lightmode)
A_data.release();
}

if (constantB)
@@ -4929,7 +4933,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt)
}
}

B_data.release();
if (opt.lightmode)
B_data.release();
}

if (constantC && constant_broadcast_type_C != -1)
@@ -4959,7 +4964,8 @@ int Gemm_arm::create_pipeline_bf16s(const Option& opt)
CT_data = C2;
}

C_data.release();
if (opt.lightmode)
C_data.release();
}

if (constantA || constantB || constantC)


+ 6
- 3
src/layer/arm/gemm_arm_asimdhp.cpp View File

@@ -2736,7 +2736,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt)
}
}

A_data.release();
if (opt.lightmode)
A_data.release();
}

if (constantB)
@@ -2776,7 +2777,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt)
}
}

B_data.release();
if (opt.lightmode)
B_data.release();
}

if (constantC && constant_broadcast_type_C != -1)
@@ -2802,7 +2804,8 @@ int Gemm_arm::create_pipeline_fp16sa(const Option& opt)
}
}

C_data.release();
if (opt.lightmode)
C_data.release();
}

if (constantA || constantB || constantC)


+ 6
- 3
src/layer/arm/gemm_arm_vfpv4.cpp View File

@@ -427,7 +427,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt)
}
}

A_data.release();
if (opt.lightmode)
A_data.release();
}

if (constantB)
@@ -467,7 +468,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt)
}
}

B_data.release();
if (opt.lightmode)
B_data.release();
}

if (constantC && constant_broadcast_type_C != -1)
@@ -497,7 +499,8 @@ int Gemm_arm::create_pipeline_fp16s(const Option& opt)
CT_data = C2;
}

C_data.release();
if (opt.lightmode)
C_data.release();
}

if (constantA || constantB || constantC)


+ 12
- 6
src/layer/arm/gru_arm.cpp View File

@@ -250,9 +250,12 @@ int GRU_arm::create_pipeline(const Option& opt)
}
}

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}
@@ -1372,9 +1375,12 @@ int GRU_arm::create_pipeline_bf16s(const Option& opt)
}
}

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 6
- 3
src/layer/arm/gru_arm_asimdhp.cpp View File

@@ -914,9 +914,12 @@ int GRU_arm::create_pipeline_fp16s(const Option& opt)
}
}

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 6
- 3
src/layer/arm/innerproduct_arm.cpp View File

@@ -122,7 +122,8 @@ int InnerProduct_arm::create_pipeline(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -865,7 +866,8 @@ int InnerProduct_arm::create_pipeline_bf16s(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -1258,7 +1260,8 @@ int InnerProduct_arm::create_pipeline_int8_arm(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/arm/innerproduct_arm_vfpv4.cpp View File

@@ -41,7 +41,8 @@ int InnerProduct_arm::create_pipeline_fp16s(const Option& opt)
}
#endif

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 12
- 6
src/layer/arm/lstm_arm.cpp View File

@@ -124,9 +124,12 @@ int LSTM_arm::create_pipeline(const Option& opt)
}
}

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}
@@ -928,9 +931,12 @@ int LSTM_arm::create_pipeline_bf16s(const Option& opt)
}
}

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 6
- 3
src/layer/arm/lstm_arm_asimdhp.cpp View File

@@ -835,9 +835,12 @@ int LSTM_arm::create_pipeline_fp16s(const Option& opt)
}
}

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 20
- 8
src/layer/arm/multiheadattention_arm.cpp View File

@@ -84,8 +84,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt)
q_gemm->load_model(ModelBinFromMatArray(weights));
q_gemm->create_pipeline(opt);

q_weight_data.release();
q_bias_data.release();
if (opt.lightmode)
{
q_weight_data.release();
q_bias_data.release();
}
}

{
@@ -110,8 +113,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt)
k_gemm->load_model(ModelBinFromMatArray(weights));
k_gemm->create_pipeline(opt);

k_weight_data.release();
k_bias_data.release();
if (opt.lightmode)
{
k_weight_data.release();
k_bias_data.release();
}
}

{
@@ -136,8 +142,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt)
v_gemm->load_model(ModelBinFromMatArray(weights));
v_gemm->create_pipeline(opt);

v_weight_data.release();
v_bias_data.release();
if (opt.lightmode)
{
v_weight_data.release();
v_bias_data.release();
}
}

{
@@ -160,8 +169,11 @@ int MultiHeadAttention_arm::create_pipeline(const Option& _opt)
o_gemm->load_model(ModelBinFromMatArray(weights));
o_gemm->create_pipeline(opt);

out_weight_data.release();
out_bias_data.release();
if (opt.lightmode)
{
out_weight_data.release();
out_bias_data.release();
}
}

{


+ 12
- 6
src/layer/arm/rnn_arm.cpp View File

@@ -139,9 +139,12 @@ int RNN_arm::create_pipeline(const Option& opt)

bias_c_data_packed = bias_c_data;

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}
@@ -736,9 +739,12 @@ int RNN_arm::create_pipeline_bf16s(const Option& opt)

cast_float32_to_bfloat16(bias_c_data, bias_c_data_packed, opt);

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 6
- 3
src/layer/arm/rnn_arm_asimdhp.cpp View File

@@ -517,9 +517,12 @@ int RNN_arm::create_pipeline_fp16s(const Option& opt)

cast_float32_to_float16(bias_c_data, bias_c_data_packed, opt);

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 4
- 1
src/layer/loongarch/convolution1d_loongarch.cpp View File

@@ -78,6 +78,9 @@ int Convolution1D_loongarch::create_pipeline(const Option& opt)
}
}

if (opt.lightmode)
weight_data.release();

return 0;
}

@@ -281,7 +284,7 @@ int Convolution1D_loongarch::forward(const Mat& bottom_blob, Mat& top_blob, cons
sum = bias_data[p];
}

const float* kptr = (const float*)weight_data + kernel_w * h * p;
const float* kptr = weight_data_packed.channel(p);

for (int q = 0; q < h; q++)
{


+ 4
- 2
src/layer/loongarch/convolution_loongarch.cpp View File

@@ -225,7 +225,8 @@ int Convolution_loongarch::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -789,7 +790,8 @@ int Convolution_loongarch::create_pipeline_int8_loongarch(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 8
- 4
src/layer/loongarch/convolutiondepthwise_loongarch.cpp View File

@@ -83,7 +83,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -91,7 +92,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -600,7 +602,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option&
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -608,7 +611,8 @@ int ConvolutionDepthWise_loongarch::create_pipeline_int8_loongarch(const Option&
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/loongarch/deconvolution_loongarch.cpp View File

@@ -126,7 +126,8 @@ int Deconvolution_loongarch::create_pipeline(const Option& opt)
{
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/loongarch/deconvolutiondepthwise_loongarch.cpp View File

@@ -82,7 +82,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt)
weight_data_tm = weight_data_transposed;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -90,7 +91,8 @@ int DeconvolutionDepthWise_loongarch::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 3
src/layer/loongarch/innerproduct_loongarch.cpp View File

@@ -99,7 +99,8 @@ int InnerProduct_loongarch::create_pipeline(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -652,7 +653,8 @@ int InnerProduct_loongarch::create_pipeline_fp16s(const Option& opt)
ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -1140,7 +1142,8 @@ int InnerProduct_loongarch::create_pipeline_int8_loongarch(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 1
src/layer/mips/convolution1d_mips.cpp View File

@@ -78,6 +78,9 @@ int Convolution1D_mips::create_pipeline(const Option& opt)
}
}

if (opt.lightmode)
weight_data.release();

return 0;
}

@@ -281,7 +284,7 @@ int Convolution1D_mips::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
sum = bias_data[p];
}

const float* kptr = (const float*)weight_data + kernel_w * h * p;
const float* kptr = weight_data_packed.channel(p);

for (int q = 0; q < h; q++)
{


+ 4
- 2
src/layer/mips/convolution_mips.cpp View File

@@ -225,7 +225,8 @@ int Convolution_mips::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -789,7 +790,8 @@ int Convolution_mips::create_pipeline_int8_mips(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 8
- 4
src/layer/mips/convolutiondepthwise_mips.cpp View File

@@ -83,7 +83,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -91,7 +92,8 @@ int ConvolutionDepthWise_mips::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -600,7 +602,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -608,7 +611,8 @@ int ConvolutionDepthWise_mips::create_pipeline_int8_mips(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/mips/deconvolution_mips.cpp View File

@@ -126,7 +126,8 @@ int Deconvolution_mips::create_pipeline(const Option& opt)
{
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/mips/deconvolutiondepthwise_mips.cpp View File

@@ -82,7 +82,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt)
weight_data_tm = weight_data_transposed;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -90,7 +91,8 @@ int DeconvolutionDepthWise_mips::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 3
src/layer/mips/innerproduct_mips.cpp View File

@@ -99,7 +99,8 @@ int InnerProduct_mips::create_pipeline(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -652,7 +653,8 @@ int InnerProduct_mips::create_pipeline_fp16s(const Option& opt)
ncnn::cast_float32_to_float16(weight_data_r2, weight_data_tm, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -1140,7 +1142,8 @@ int InnerProduct_mips::create_pipeline_int8_mips(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 2
src/layer/riscv/convolution1d_riscv.cpp View File

@@ -95,6 +95,9 @@ int Convolution1D_riscv::create_pipeline(const Option& opt)
}
}

if (opt.lightmode)
weight_data.release();

return 0;
}

@@ -308,7 +311,7 @@ int Convolution1D_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Op
sum = bias_data[p];
}

const float* kptr = (const float*)weight_data + kernel_w * h * p;
const float* kptr = weight_data_packed.channel(p);

for (int q = 0; q < h; q++)
{
@@ -470,7 +473,8 @@ int Convolution1D_riscv::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/riscv/convolution_riscv.cpp View File

@@ -237,7 +237,8 @@ int Convolution_riscv::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -834,7 +835,8 @@ int Convolution_riscv::create_pipeline_fp16s(const Option& opt)
ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 8
- 4
src/layer/riscv/convolutiondepthwise_riscv.cpp View File

@@ -104,7 +104,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -112,7 +113,8 @@ int ConvolutionDepthWise_riscv::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -682,7 +684,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -690,7 +693,8 @@ int ConvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/riscv/deconvolution_riscv.cpp View File

@@ -148,7 +148,8 @@ int Deconvolution_riscv::create_pipeline(const Option& opt)
{
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -530,7 +531,8 @@ int Deconvolution_riscv::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 8
- 4
src/layer/riscv/deconvolutiondepthwise_riscv.cpp View File

@@ -97,7 +97,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt)
weight_data_tm = weight_data_transposed;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -105,7 +106,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -619,7 +621,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -627,7 +630,8 @@ int DeconvolutionDepthWise_riscv::create_pipeline_fp16s(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 3
src/layer/riscv/gemm_riscv.cpp View File

@@ -3984,7 +3984,8 @@ int Gemm_riscv::create_pipeline(const Option& opt)
}
}

A_data.release();
if (opt.lightmode)
A_data.release();
}

if (constantB)
@@ -4024,7 +4025,8 @@ int Gemm_riscv::create_pipeline(const Option& opt)
}
}

B_data.release();
if (opt.lightmode)
B_data.release();
}

if (constantC && constant_broadcast_type_C != -1)
@@ -4054,7 +4056,8 @@ int Gemm_riscv::create_pipeline(const Option& opt)
CT_data = C2;
}

C_data.release();
if (opt.lightmode)
C_data.release();
}

if (constantA || constantB || constantC)


+ 6
- 3
src/layer/riscv/gru_riscv.cpp View File

@@ -714,9 +714,12 @@ int GRU_riscv::create_pipeline_fp16sa(const Option& opt)
cast_float32_to_float16(weight_hc_data, weight_hc_data_fp16sa, opt);
cast_float32_to_float16(bias_c_data, bias_c_data_fp16sa, opt);

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 4
- 2
src/layer/riscv/innerproduct_riscv.cpp View File

@@ -106,7 +106,8 @@ int InnerProduct_riscv::create_pipeline(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -560,7 +561,8 @@ int InnerProduct_riscv::create_pipeline_fp16s(const Option& opt)

ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 0
src/layer/vulkan/batchnorm_vulkan.cpp View File

@@ -156,6 +156,12 @@ int BatchNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
cmd.record_upload(b_data_packed, b_data_gpu, opt);
}

if (opt.lightmode)
{
a_data.release();
b_data.release();
}

return 0;
}



+ 5
- 2
src/layer/vulkan/convolution1d_vulkan.cpp View File

@@ -133,8 +133,11 @@ int Convolution1D_vulkan::create_pipeline(const Option& _opt)
pipeline_convolution1d->create(shader_type_index, opt, specializations);
}

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}


+ 5
- 2
src/layer/vulkan/convolution_vulkan.cpp View File

@@ -1148,8 +1148,11 @@ int Convolution_vulkan::create_pipeline(const Option& _opt)
pipeline_convolution->create(shader_type_index, opt, specializations);
}

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}


+ 10
- 4
src/layer/vulkan/convolutiondepthwise_vulkan.cpp View File

@@ -271,8 +271,11 @@ int ConvolutionDepthWise_vulkan::create_pipeline(const Option& _opt)
pipeline_convolutiondepthwise_pack8->create(LayerShaderType::convolutiondepthwise_pack8, opt, specializations);
}

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}
@@ -413,8 +416,11 @@ int ConvolutionDepthWise_vulkan::create_pipeline(const Option& _opt)
pipeline_convolutiondepthwise_group_pack8to1->create(LayerShaderType::convolutiondepthwise_group_pack8to1, opt, specializations);
}

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}


+ 11
- 2
src/layer/vulkan/deconvolution_vulkan.cpp View File

@@ -366,6 +366,12 @@ int Deconvolution_vulkan::create_pipeline(const Option& _opt)
pipeline_deconvolution_col2im->create(shader_type_index, opt, specializations);
}

if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}

@@ -462,8 +468,11 @@ int Deconvolution_vulkan::create_pipeline(const Option& _opt)
pipeline_deconvolution->set_optimal_local_size_xyz(local_size_xyz);
pipeline_deconvolution->create(shader_type_index, opt, specializations);

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}


+ 10
- 4
src/layer/vulkan/deconvolutiondepthwise_vulkan.cpp View File

@@ -295,8 +295,11 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt)
pipeline_deconvolutiondepthwise_pack8->create(LayerShaderType::deconvolutiondepthwise_pack8, opt, specializations);
}

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}
@@ -437,8 +440,11 @@ int DeconvolutionDepthWise_vulkan::create_pipeline(const Option& _opt)
pipeline_deconvolutiondepthwise_group_pack8to1->create(LayerShaderType::deconvolutiondepthwise_group_pack8to1, opt, specializations);
}

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}


+ 6
- 3
src/layer/vulkan/gemm_vulkan.cpp View File

@@ -100,9 +100,12 @@ int Gemm_vulkan::create_pipeline(const Option& opt)
pipeline_gemm->create(LayerShaderType::gemm, opt, specializations);
}

A_data.release();
B_data.release();
C_data.release();
if (opt.lightmode)
{
A_data.release();
B_data.release();
C_data.release();
}

return 0;
}


+ 15
- 6
src/layer/vulkan/innerproduct_vulkan.cpp View File

@@ -154,8 +154,11 @@ int InnerProduct_vulkan::create_pipeline(const Option& _opt)
pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz);
pipeline_innerproduct_gemm->create(shader_type_index, opt, specializations);

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}
@@ -364,14 +367,20 @@ int InnerProduct_vulkan::create_pipeline(const Option& _opt)
pipeline_innerproduct_gemm->set_optimal_local_size_xyz(local_size_xyz);
pipeline_innerproduct_gemm->create(shader_type_index, opt, specializations);

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}

weight_data.release();
bias_data.release();
if (opt.lightmode)
{
weight_data.release();
bias_data.release();
}

return 0;
}


+ 5
- 0
src/layer/vulkan/memorydata_vulkan.cpp View File

@@ -82,6 +82,11 @@ int MemoryData_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
cmd.record_upload(data_packed, data_gpu, opt, /*bool flatten*/ false);
}

if (opt.lightmode)
{
data.release();
}

return 0;
}



+ 20
- 8
src/layer/vulkan/multiheadattention_vulkan.cpp View File

@@ -73,8 +73,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt)
q_gemm->load_model(ModelBinFromMatArray(weights));
q_gemm->create_pipeline(opt);

q_weight_data.release();
q_bias_data.release();
if (opt.lightmode)
{
q_weight_data.release();
q_bias_data.release();
}
}

{
@@ -100,8 +103,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt)
k_gemm->load_model(ModelBinFromMatArray(weights));
k_gemm->create_pipeline(opt);

k_weight_data.release();
k_bias_data.release();
if (opt.lightmode)
{
k_weight_data.release();
k_bias_data.release();
}
}

{
@@ -127,8 +133,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt)
v_gemm->load_model(ModelBinFromMatArray(weights));
v_gemm->create_pipeline(opt);

v_weight_data.release();
v_bias_data.release();
if (opt.lightmode)
{
v_weight_data.release();
v_bias_data.release();
}
}

{
@@ -222,8 +231,11 @@ int MultiHeadAttention_vulkan::create_pipeline(const Option& opt)
o_gemm->load_model(ModelBinFromMatArray(weights));
o_gemm->create_pipeline(opt);

out_weight_data.release();
out_bias_data.release();
if (opt.lightmode)
{
out_weight_data.release();
out_bias_data.release();
}
}

return 0;


+ 3
- 0
src/layer/vulkan/normalize_vulkan.cpp View File

@@ -264,6 +264,9 @@ int Normalize_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
{
cmd.record_upload(scale_data_packed, scale_data_gpu, opt);
}

if (opt.lightmode)
scale_data.release();
}

return 0;


+ 5
- 0
src/layer/vulkan/padding_vulkan.cpp View File

@@ -348,6 +348,11 @@ int Padding_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
cmd.record_upload(per_channel_pad_data_packed, per_channel_pad_data_gpu, opt);
}

if (opt.lightmode)
{
per_channel_pad_data.release();
}

return 0;
}



+ 5
- 0
src/layer/vulkan/prelu_vulkan.cpp View File

@@ -144,6 +144,11 @@ int PReLU_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
{
cmd.record_upload(slope_data_packed, slope_data_gpu, opt);
}

if (opt.lightmode)
{
slope_data.release();
}
}

return 0;


+ 13
- 6
src/layer/vulkan/priorbox_vulkan.cpp View File

@@ -129,6 +129,13 @@ int PriorBox_vulkan::upload_model(VkTransfer& cmd, const Option& opt)

cmd.record_upload(aspect_ratios, aspect_ratios_gpu, opt);

if (opt.lightmode)
{
min_sizes.release();
max_sizes.release();
aspect_ratios.release();
}

return 0;
}

@@ -137,7 +144,7 @@ int PriorBox_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector
int w = bottom_blobs[0].w;
int h = bottom_blobs[0].h;

if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes.empty())
if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes_gpu.empty())
{
// mxnet style _contrib_MultiBoxPrior
float step_w = step_width;
@@ -147,8 +154,8 @@ int PriorBox_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector
if (step_h == -233)
step_h = 1.f / (float)h;

int num_sizes = min_sizes.w;
int num_ratios = aspect_ratios.w;
int num_sizes = min_sizes_gpu.w;
int num_ratios = aspect_ratios_gpu.w;

int num_prior = num_sizes - 1 + num_ratios;

@@ -200,9 +207,9 @@ int PriorBox_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector
if (step_h == -233)
step_h = (float)image_h / h;

int num_min_size = min_sizes.w;
int num_max_size = max_sizes.w;
int num_aspect_ratio = aspect_ratios.w;
int num_min_size = min_sizes_gpu.w;
int num_max_size = max_sizes_gpu.w;
int num_aspect_ratio = aspect_ratios_gpu.w;

int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size;
if (flip)


+ 6
- 0
src/layer/vulkan/scale_vulkan.cpp View File

@@ -218,6 +218,12 @@ int Scale_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
}
}

if (opt.lightmode)
{
scale_data.release();
bias_data.release();
}

return 0;
}



+ 3
- 2
src/layer/x86/convolution1d_x86.cpp View File

@@ -34,7 +34,7 @@ Convolution1D_x86::Convolution1D_x86()
#endif // __SSE2__
}

int Convolution1D_x86::create_pipeline(const Option& /*opt*/)
int Convolution1D_x86::create_pipeline(const Option& opt)
{
if (dynamic_weight)
return 0;
@@ -43,7 +43,8 @@ int Convolution1D_x86::create_pipeline(const Option& /*opt*/)

convolution1d_transform_kernel_packed(weight_data, weight_data_tm, num_input, num_output, kernel_w);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 8
- 4
src/layer/x86/convolution_x86.cpp View File

@@ -334,7 +334,8 @@ int Convolution_x86::create_pipeline(const Option& opt)

convolution_dilation1->create_pipeline(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -451,7 +452,8 @@ int Convolution_x86::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -542,7 +544,8 @@ int Convolution_x86::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -1250,7 +1253,8 @@ int Convolution_x86::create_pipeline_int8_x86(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 8
- 4
src/layer/x86/convolutiondepthwise_x86.cpp View File

@@ -132,7 +132,8 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -140,7 +141,8 @@ int ConvolutionDepthWise_x86::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -843,7 +845,8 @@ int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt)
weight_data_tm = weight_data;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -851,7 +854,8 @@ int ConvolutionDepthWise_x86::create_pipeline_int8_x86(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/x86/deconvolution_x86.cpp View File

@@ -193,7 +193,8 @@ int Deconvolution_x86::create_pipeline(const Option& opt)
}
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 4
- 2
src/layer/x86/deconvolutiondepthwise_x86.cpp View File

@@ -109,7 +109,8 @@ int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt)
weight_data_tm = weight_data_transposed;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -117,7 +118,8 @@ int DeconvolutionDepthWise_x86::create_pipeline(const Option& opt)
// group convolution
create_group_ops(opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 2
- 1
src/layer/x86/deformableconv2d_x86.cpp View File

@@ -203,7 +203,8 @@ int DeformableConv2D_x86::create_pipeline(const Option& opt)
deformableconv2d_transform_kernel_packed_sse(weight_data, weight_data_tm, num_input, num_output, kernel_w, kernel_h, elempack, out_elempack);
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 3
src/layer/x86/gemm_x86.cpp View File

@@ -7235,7 +7235,8 @@ int Gemm_x86::create_pipeline(const Option& opt)
}
}

A_data.release();
if (opt.lightmode)
A_data.release();
}

if (constantB)
@@ -7279,7 +7280,8 @@ int Gemm_x86::create_pipeline(const Option& opt)
}
}

B_data.release();
if (opt.lightmode)
B_data.release();
}

if (constantC && constant_broadcast_type_C != -1)
@@ -7315,7 +7317,8 @@ int Gemm_x86::create_pipeline(const Option& opt)
CT_data = C2;
}

C_data.release();
if (opt.lightmode)
C_data.release();
}

if (constantA || constantB || constantC)


+ 6
- 3
src/layer/x86/innerproduct_x86.cpp View File

@@ -80,7 +80,8 @@ int InnerProduct_x86::create_pipeline(const Option& opt)

innerproduct_transform_kernel_sse(weight_data, weight_data_tm, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -175,7 +176,8 @@ int InnerProduct_x86::create_pipeline_fp16s(const Option& opt)

innerproduct_transform_kernel_fp16s_sse(weight_data, weight_data_tm, num_input, num_output, opt);

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}
@@ -281,7 +283,8 @@ int InnerProduct_x86::create_pipeline_int8_x86(const Option& opt)
scale_in_data[p] = scale_in;
}

weight_data.release();
if (opt.lightmode)
weight_data.release();

return 0;
}


+ 6
- 3
src/layer/x86/lstm_x86.cpp View File

@@ -182,9 +182,12 @@ int LSTM_x86::create_pipeline(const Option& opt)
}
}

weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
if (opt.lightmode)
{
weight_xc_data.release();
bias_c_data.release();
weight_hc_data.release();
}

return 0;
}


+ 20
- 8
src/layer/x86/multiheadattention_x86.cpp View File

@@ -65,8 +65,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt)
q_gemm->load_model(ModelBinFromMatArray(weights));
q_gemm->create_pipeline(opt);

q_weight_data.release();
q_bias_data.release();
if (opt.lightmode)
{
q_weight_data.release();
q_bias_data.release();
}
}

{
@@ -91,8 +94,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt)
k_gemm->load_model(ModelBinFromMatArray(weights));
k_gemm->create_pipeline(opt);

k_weight_data.release();
k_bias_data.release();
if (opt.lightmode)
{
k_weight_data.release();
k_bias_data.release();
}
}

{
@@ -117,8 +123,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt)
v_gemm->load_model(ModelBinFromMatArray(weights));
v_gemm->create_pipeline(opt);

v_weight_data.release();
v_bias_data.release();
if (opt.lightmode)
{
v_weight_data.release();
v_bias_data.release();
}
}

{
@@ -193,8 +202,11 @@ int MultiHeadAttention_x86::create_pipeline(const Option& opt)
o_gemm->load_model(ModelBinFromMatArray(weights));
o_gemm->create_pipeline(opt);

out_weight_data.release();
out_bias_data.release();
if (opt.lightmode)
{
out_weight_data.release();
out_bias_data.release();
}
}

return 0;


+ 89
- 43
tools/modelwriter.h View File

@@ -32,6 +32,7 @@
#include "layer/batchnorm.h"
#include "layer/bias.h"
#include "layer/binaryop.h"
#include "layer/celu.h"
#include "layer/clip.h"
#include "layer/concat.h"
#include "layer/convolution.h"
@@ -51,6 +52,7 @@
#include "layer/deconvolutiondepthwise3d.h"
#include "layer/deformableconv2d.h"
#include "layer/detectionoutput.h"
#include "layer/diag.h"
#include "layer/dropout.h"
#include "layer/eltwise.h"
#include "layer/elu.h"
@@ -835,6 +837,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
fprintf_param_value(" 1=%d", with_scalar)
fprintf_param_value(" 2=%e", b)
}
else if (layer->type == "CELU")
{
ncnn::CELU* op = (ncnn::CELU*)layer;
ncnn::CELU* op_default = (ncnn::CELU*)layer_default;

fprintf_param_value(" 0=%e", alpha)
}
else if (layer->type == "Clip")
{
ncnn::Clip* op = (ncnn::Clip*)layer;
@@ -888,18 +897,21 @@ int ModelWriter::save(const char* parampath, const char* binpath)
}
fprintf_param_value(" 19=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);

#if NCNN_INT8
// write int8_scale data
if (op->int8_scale_term)
{
fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
}
// write int8_scale data
if (op->int8_scale_term)
{
fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
}
#endif // NCNN_INT8
}

if (shape_ready)
{
@@ -931,9 +943,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
{
if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
}
fprintf_param_value(" 19=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
}

if (shape_ready)
{
@@ -1040,32 +1056,35 @@ int ModelWriter::save(const char* parampath, const char* binpath)
}
fprintf_param_value(" 19=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);

#if NCNN_INT8
// write int8_scale data
if (op->int8_scale_term == 1 || op->int8_scale_term == 101)
{
op->bottom_blob_int8_scales.w = 1;
}
if (op->int8_scale_term == 2 || op->int8_scale_term == 102)
{
op->weight_data_int8_scales.w = 1;
op->bottom_blob_int8_scales.w = 1;
}
if (op->int8_scale_term > 100)
{
op->top_blob_int8_scales.w = 1;
}
// write int8_scale data
if (op->int8_scale_term == 1 || op->int8_scale_term == 101)
{
op->bottom_blob_int8_scales.w = 1;
}
if (op->int8_scale_term == 2 || op->int8_scale_term == 102)
{
op->weight_data_int8_scales.w = 1;
op->bottom_blob_int8_scales.w = 1;
}
if (op->int8_scale_term > 100)
{
op->top_blob_int8_scales.w = 1;
}

if (op->int8_scale_term)
{
fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
}
if (op->int8_scale_term)
{
fwrite_weight_data(op->weight_data_int8_scales, bp, 90, 100);
fwrite_weight_data(op->bottom_blob_int8_scales, bp, 0.001, 1);
fwrite_weight_data(op->top_blob_int8_scales, bp, 0.001, 1);
}
#endif // NCNN_INT8
}

if (shape_ready)
{
@@ -1098,9 +1117,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
{
if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
}
fprintf_param_value(" 19=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
}

if (shape_ready)
{
@@ -1261,9 +1284,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
{
if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
}
fprintf_param_value(" 28=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
}

if (shape_ready)
{
@@ -1296,9 +1323,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
{
if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
}
fprintf_param_value(" 28=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
}

if (shape_ready)
{
@@ -1418,9 +1449,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
{
if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
}
fprintf_param_value(" 28=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
}

if (shape_ready)
{
@@ -1454,9 +1489,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
{
if (!op->activation_params.empty()) fprintf_param_float_array(10, op->activation_params, pp);
}
fprintf_param_value(" 28=%d", dynamic_weight)

fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
if (op->dynamic_weight == 0)
{
fwrite_weight_tag_data(op->weight_data, bp);
fwrite_weight_data(op->bias_data, bp);
}

if (shape_ready)
{
@@ -1597,6 +1636,13 @@ int ModelWriter::save(const char* parampath, const char* binpath)
fprintf_param_value(" 7=%e", variances[2])
fprintf_param_value(" 8=%e", variances[3])
}
else if (layer->type == "Diag")
{
ncnn::Diag* op = (ncnn::Diag*)layer;
ncnn::Diag* op_default = (ncnn::Diag*)layer_default;

fprintf_param_value(" 0=%d", diagonal)
}
else if (layer->type == "Dropout")
{
ncnn::Dropout* op = (ncnn::Dropout*)layer;


Loading…
Cancel
Save