|
|
|
@@ -446,10 +446,11 @@ int Convolution::upload_model(VkTransfer& cmd) |
|
|
|
{ |
|
|
|
// src = kw-kh-inch-outch |
|
|
|
// dst = 4a-4b-kw-kh-inch/4a-outch/4b |
|
|
|
Mat weight_data_pack4; |
|
|
|
{ |
|
|
|
Mat weight_data_r2 = weight_data.reshape(maxk, num_input, num_output); |
|
|
|
|
|
|
|
weight_data_pack4 = Mat(16*maxk, num_input/4, num_output/4); |
|
|
|
weight_data_pack4.create(16*maxk, num_input/4, num_output/4); |
|
|
|
|
|
|
|
for (int q=0; q+3<num_output; q+=4) |
|
|
|
{ |
|
|
|
@@ -521,10 +522,11 @@ int Convolution::upload_model(VkTransfer& cmd) |
|
|
|
{ |
|
|
|
// src = kw-kh-inch-outch |
|
|
|
// dst = 4b-kw-kh-inch-outch/4b |
|
|
|
Mat weight_data_pack1to4; |
|
|
|
{ |
|
|
|
Mat weight_data_r2 = weight_data.reshape(maxk, num_input, num_output); |
|
|
|
|
|
|
|
weight_data_pack1to4 = Mat(4*maxk, num_input, num_output/4); |
|
|
|
weight_data_pack1to4.create(4*maxk, num_input, num_output/4); |
|
|
|
|
|
|
|
for (int q=0; q+3<num_output; q+=4) |
|
|
|
{ |
|
|
|
@@ -566,10 +568,11 @@ int Convolution::upload_model(VkTransfer& cmd) |
|
|
|
{ |
|
|
|
// src = kw-kh-inch-outch |
|
|
|
// dst = 4a-kw-kh-inch/4a-outch |
|
|
|
Mat weight_data_pack4to1; |
|
|
|
{ |
|
|
|
Mat weight_data_r2 = weight_data.reshape(maxk, num_input, num_output); |
|
|
|
|
|
|
|
weight_data_pack4to1 = Mat(4*maxk, num_input/4, num_output); |
|
|
|
weight_data_pack4to1.create(4*maxk, num_input/4, num_output); |
|
|
|
|
|
|
|
for (int q=0; q<num_output; q++) |
|
|
|
{ |
|
|
|
@@ -606,6 +609,7 @@ int Convolution::upload_model(VkTransfer& cmd) |
|
|
|
{ |
|
|
|
if (bias_term) |
|
|
|
{ |
|
|
|
Mat bias_data_pack4; |
|
|
|
convert_packing(bias_data, bias_data_pack4, 4); |
|
|
|
cmd.record_upload(bias_data_pack4, bias_data_gpu_pack4); |
|
|
|
} |
|
|
|
|