Browse Source

reduce memory usage of weight packing

tags/20190320
nihuini 7 years ago
parent
commit
e213605cd4
10 changed files with 20 additions and 22 deletions
  1. +2
    -0
      src/layer/batchnorm.cpp
  2. +0
    -3
      src/layer/batchnorm.h
  3. +7
    -3
      src/layer/convolution.cpp
  4. +0
    -4
      src/layer/convolution.h
  5. +2
    -0
      src/layer/convolutiondepthwise.cpp
  6. +0
    -2
      src/layer/convolutiondepthwise.h
  7. +7
    -3
      src/layer/innerproduct.cpp
  8. +0
    -4
      src/layer/innerproduct.h
  9. +2
    -0
      src/layer/scale.cpp
  10. +0
    -3
      src/layer/scale.h

+ 2
- 0
src/layer/batchnorm.cpp View File

@@ -147,9 +147,11 @@ int BatchNorm::upload_model(VkTransfer& cmd)
// pack4
if (channels % 4 == 0)
{
Mat a_data_pack4;
convert_packing(a_data, a_data_pack4, 4);
cmd.record_upload(a_data_pack4, a_data_gpu_pack4);

Mat b_data_pack4;
convert_packing(b_data, b_data_pack4, 4);
cmd.record_upload(b_data_pack4, b_data_gpu_pack4);
}


+ 0
- 3
src/layer/batchnorm.h View File

@@ -56,11 +56,8 @@ public:
#if NCNN_VULKAN
VkMat a_data_gpu;
VkMat b_data_gpu;

Pipeline* pipeline_batchnorm;

Mat a_data_pack4;
Mat b_data_pack4;
VkMat a_data_gpu_pack4;
VkMat b_data_gpu_pack4;
Pipeline* pipeline_batchnorm_pack4;


+ 7
- 3
src/layer/convolution.cpp View File

@@ -446,10 +446,11 @@ int Convolution::upload_model(VkTransfer& cmd)
{
// src = kw-kh-inch-outch
// dst = 4a-4b-kw-kh-inch/4a-outch/4b
Mat weight_data_pack4;
{
Mat weight_data_r2 = weight_data.reshape(maxk, num_input, num_output);

weight_data_pack4 = Mat(16*maxk, num_input/4, num_output/4);
weight_data_pack4.create(16*maxk, num_input/4, num_output/4);

for (int q=0; q+3<num_output; q+=4)
{
@@ -521,10 +522,11 @@ int Convolution::upload_model(VkTransfer& cmd)
{
// src = kw-kh-inch-outch
// dst = 4b-kw-kh-inch-outch/4b
Mat weight_data_pack1to4;
{
Mat weight_data_r2 = weight_data.reshape(maxk, num_input, num_output);

weight_data_pack1to4 = Mat(4*maxk, num_input, num_output/4);
weight_data_pack1to4.create(4*maxk, num_input, num_output/4);

for (int q=0; q+3<num_output; q+=4)
{
@@ -566,10 +568,11 @@ int Convolution::upload_model(VkTransfer& cmd)
{
// src = kw-kh-inch-outch
// dst = 4a-kw-kh-inch/4a-outch
Mat weight_data_pack4to1;
{
Mat weight_data_r2 = weight_data.reshape(maxk, num_input, num_output);

weight_data_pack4to1 = Mat(4*maxk, num_input/4, num_output);
weight_data_pack4to1.create(4*maxk, num_input/4, num_output);

for (int q=0; q<num_output; q++)
{
@@ -606,6 +609,7 @@ int Convolution::upload_model(VkTransfer& cmd)
{
if (bias_term)
{
Mat bias_data_pack4;
convert_packing(bias_data, bias_data_pack4, 4);
cmd.record_upload(bias_data_pack4, bias_data_gpu_pack4);
}


+ 0
- 4
src/layer/convolution.h View File

@@ -71,21 +71,17 @@ public:
Pipeline* pipeline_convolution;
Pipeline* pipeline_convolution_1x1s1d1;

Mat bias_data_pack4;
VkMat bias_data_gpu_pack4;

// pack4
Mat weight_data_pack4;
VkMat weight_data_gpu_pack4;
Pipeline* pipeline_convolution_pack4;

// pack1to4
Mat weight_data_pack1to4;
VkMat weight_data_gpu_pack1to4;
Pipeline* pipeline_convolution_pack1to4;

// pack4to1
Mat weight_data_pack4to1;
VkMat weight_data_gpu_pack4to1;
Pipeline* pipeline_convolution_pack4to1;
#endif // NCNN_VULKAN


+ 2
- 0
src/layer/convolutiondepthwise.cpp View File

@@ -606,6 +606,7 @@ int ConvolutionDepthWise::upload_model(VkTransfer& cmd)
{
const int maxk = kernel_w * kernel_h;

Mat weight_data_pack4;
Mat weight_data_r2 = weight_data.reshape(maxk, group);
convert_packing(weight_data_r2, weight_data_pack4, 4);

@@ -614,6 +615,7 @@ int ConvolutionDepthWise::upload_model(VkTransfer& cmd)

if (bias_term)
{
Mat bias_data_pack4;
convert_packing(bias_data, bias_data_pack4, 4);
cmd.record_upload(bias_data_pack4, bias_data_gpu_pack4);
}


+ 0
- 2
src/layer/convolutiondepthwise.h View File

@@ -72,8 +72,6 @@ public:

Pipeline* pipeline_convolutiondepthwise;

Mat weight_data_pack4;
Mat bias_data_pack4;
VkMat weight_data_gpu_pack4;
VkMat bias_data_gpu_pack4;
Pipeline* pipeline_convolutiondepthwise_pack4;


+ 7
- 3
src/layer/innerproduct.cpp View File

@@ -248,10 +248,11 @@ int InnerProduct::upload_model(VkTransfer& cmd)
{
// src = inch-outch
// dst = 4a-4b-inch/4a-outch/4b
Mat weight_data_pack4;
{
Mat weight_data_r2 = weight_data.reshape(num_input, num_output);

weight_data_pack4 = Mat(16, num_input/4, num_output/4);
weight_data_pack4.create(16, num_input/4, num_output/4);

for (int q=0; q+3<num_output; q+=4)
{
@@ -302,10 +303,11 @@ int InnerProduct::upload_model(VkTransfer& cmd)
{
// src = inch-outch
// dst = 4b-inch-outch/4b
Mat weight_data_pack1to4;
{
Mat weight_data_r2 = weight_data.reshape(num_input, num_output);

weight_data_pack1to4 = Mat(4, num_input, num_output/4);
weight_data_pack1to4.create(4, num_input, num_output/4);

for (int q=0; q+3<num_output; q+=4)
{
@@ -337,10 +339,11 @@ int InnerProduct::upload_model(VkTransfer& cmd)
{
// src = inch-outch
// dst = 4a-inch/4a-outch
Mat weight_data_pack4to1;
{
Mat weight_data_r2 = weight_data.reshape(num_input, num_output);

weight_data_pack4to1 = Mat(4, num_input/4, num_output);
weight_data_pack4to1.create(4, num_input/4, num_output);

for (int q=0; q<num_output; q++)
{
@@ -369,6 +372,7 @@ int InnerProduct::upload_model(VkTransfer& cmd)
{
if (bias_term)
{
Mat bias_data_pack4;
convert_packing(bias_data, bias_data_pack4, 4);
cmd.record_upload(bias_data_pack4, bias_data_gpu_pack4);
}


+ 0
- 4
src/layer/innerproduct.h View File

@@ -61,21 +61,17 @@ public:

Pipeline* pipeline_innerproduct;

Mat bias_data_pack4;
VkMat bias_data_gpu_pack4;

// pack4
Mat weight_data_pack4;
VkMat weight_data_gpu_pack4;
Pipeline* pipeline_innerproduct_pack4;

// pack1to4
Mat weight_data_pack1to4;
VkMat weight_data_gpu_pack1to4;
Pipeline* pipeline_innerproduct_pack1to4;

// pack4to1
Mat weight_data_pack4to1;
VkMat weight_data_gpu_pack4to1;
Pipeline* pipeline_innerproduct_pack4to1;
#endif // NCNN_VULKAN


+ 2
- 0
src/layer/scale.cpp View File

@@ -188,6 +188,7 @@ int Scale::upload_model(VkTransfer& cmd)

// pack4
{
Mat scale_data_pack4;
convert_packing(scale_data, scale_data_pack4, 4);
cmd.record_upload(scale_data_pack4, scale_data_gpu_pack4);
}
@@ -199,6 +200,7 @@ int Scale::upload_model(VkTransfer& cmd)

// pack4
{
Mat bias_data_pack4;
convert_packing(bias_data, bias_data_pack4, 4);
cmd.record_upload(bias_data_pack4, bias_data_gpu_pack4);
}


+ 0
- 3
src/layer/scale.h View File

@@ -53,11 +53,8 @@ public:
#if NCNN_VULKAN
VkMat scale_data_gpu;
VkMat bias_data_gpu;

Pipeline* pipeline_scale;

Mat scale_data_pack4;
Mat bias_data_pack4;
VkMat scale_data_gpu_pack4;
VkMat bias_data_gpu_pack4;
Pipeline* pipeline_scale_pack4;


Loading…
Cancel
Save