|
|
|
@@ -1145,201 +1145,425 @@ int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, Option& opt |
|
|
|
} |
|
|
|
|
|
|
|
#if NCNN_VULKAN |
|
|
|
int Net::forward_layer(int layer_index, std::vector<VkMat>& blob_mats, std::vector<int>& wait_barrier_counts, VkCompute& cmd, Option& opt) const |
|
|
|
int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, std::vector<int>& wait_barrier_counts, VkCompute& cmd, Option& opt) const |
|
|
|
{ |
|
|
|
const Layer* layer = layers[layer_index]; |
|
|
|
|
|
|
|
// fprintf(stderr, "forward_layer %d %s\n", layer_index, layer->name.c_str()); |
|
|
|
// fprintf(stderr, "forward_layer %d %d %s\n", layer->support_vulkan, layer_index, layer->name.c_str()); |
|
|
|
|
|
|
|
if (layer->one_blob_only) |
|
|
|
if (layer->support_vulkan) |
|
|
|
{ |
|
|
|
// load bottom blob |
|
|
|
int bottom_blob_index = layer->bottoms[0]; |
|
|
|
int top_blob_index = layer->tops[0]; |
|
|
|
|
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, wait_barrier_counts, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
} |
|
|
|
else if (blob_mats[bottom_blob_index].staging_data) |
|
|
|
if (layer->one_blob_only) |
|
|
|
{ |
|
|
|
// upload |
|
|
|
const VkMat& bottom_blob = blob_mats[bottom_blob_index]; |
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
cmd.record_upload(bottom_blob); |
|
|
|
} |
|
|
|
// load bottom blob |
|
|
|
int bottom_blob_index = layer->bottoms[0]; |
|
|
|
int top_blob_index = layer->tops[0]; |
|
|
|
|
|
|
|
VkMat bottom_blob = blob_mats[bottom_blob_index]; |
|
|
|
if (blob_mats_gpu[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
if (blob_mats_gpu[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
// upload |
|
|
|
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index]; |
|
|
|
bottom_blob.create_like(blob_mats[bottom_blob_index], opt.blob_vkallocator, opt.staging_vkallocator); |
|
|
|
|
|
|
|
if (!bottom_blob.allocator->mappable) |
|
|
|
{ |
|
|
|
bottom_blob.prepare_staging_buffer(); |
|
|
|
} |
|
|
|
|
|
|
|
wait_barrier_counts[bottom_blob_index] += layer->tops.size(); |
|
|
|
bottom_blob.upload(blob_mats[bottom_blob_index]); |
|
|
|
|
|
|
|
// deep copy for inplace forward if data is shared |
|
|
|
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1) |
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
cmd.record_upload(bottom_blob); |
|
|
|
|
|
|
|
// TODO convert packing |
|
|
|
// fprintf(stderr, "upload %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
VkMat bottom_blob = blob_mats_gpu[bottom_blob_index]; |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
VkMat bottom_blob_copy; |
|
|
|
bottom_blob_copy.create_like(bottom_blob, bottom_blob.allocator, bottom_blob.staging_allocator); |
|
|
|
|
|
|
|
// fprintf(stderr, "clone %p %p\n", bottom_blob.buffer(), bottom_blob_copy.buffer()); |
|
|
|
wait_barrier_counts[bottom_blob_index] += layer->tops.size(); |
|
|
|
|
|
|
|
// deep copy for inplace forward if data is shared |
|
|
|
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1) |
|
|
|
{ |
|
|
|
VkMat bottom_blob_copy; |
|
|
|
bottom_blob_copy.create_like(bottom_blob, bottom_blob.allocator, bottom_blob.staging_allocator); |
|
|
|
|
|
|
|
// fprintf(stderr, "clone %p %p\n", bottom_blob.buffer(), bottom_blob_copy.buffer()); |
|
|
|
|
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
cmd.record_clone(bottom_blob, bottom_blob_copy); |
|
|
|
bottom_blob = bottom_blob_copy; |
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
cmd.record_clone(bottom_blob, bottom_blob_copy); |
|
|
|
bottom_blob = bottom_blob_copy; |
|
|
|
|
|
|
|
wait_barrier_counts[bottom_blob_index]--; |
|
|
|
wait_barrier_counts[bottom_blob_index]--; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// forward |
|
|
|
if (opt.lightmode && layer->support_inplace) |
|
|
|
{ |
|
|
|
VkMat& bottom_top_blob = bottom_blob; |
|
|
|
int ret = layer->forward_inplace(bottom_top_blob, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
// forward |
|
|
|
if (opt.lightmode && layer->support_inplace) |
|
|
|
{ |
|
|
|
VkMat& bottom_top_blob = bottom_blob; |
|
|
|
int ret = layer->forward_inplace(bottom_top_blob, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
// store top blob |
|
|
|
blob_mats[top_blob_index] = bottom_top_blob; |
|
|
|
// store top blob |
|
|
|
blob_mats_gpu[top_blob_index] = bottom_top_blob; |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
VkMat top_blob; |
|
|
|
int ret = layer->forward(bottom_blob, top_blob, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
// store top blob |
|
|
|
blob_mats_gpu[top_blob_index] = top_blob; |
|
|
|
} |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
// reclaim producer bottom_blob as free when consuming bottom_blob |
|
|
|
const Layer* producer = layers[ blobs[bottom_blob_index].producer ]; |
|
|
|
for (size_t i=0; i<producer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
int producer_bottom_blob_index = producer->bottoms[i]; |
|
|
|
|
|
|
|
wait_barrier_counts[producer_bottom_blob_index]--; |
|
|
|
if (wait_barrier_counts[producer_bottom_blob_index] == 0) |
|
|
|
{ |
|
|
|
// fprintf(stderr, "reclaim free %p\n", blob_mats_gpu[producer_bottom_blob_index].buffer()); |
|
|
|
|
|
|
|
blob_mats_gpu[producer_bottom_blob_index].release(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
VkMat top_blob; |
|
|
|
int ret = layer->forward(bottom_blob, top_blob, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
// load bottom blobs |
|
|
|
std::vector<VkMat> bottom_blobs; |
|
|
|
bottom_blobs.resize(layer->bottoms.size()); |
|
|
|
for (size_t i=0; i<layer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
int bottom_blob_index = layer->bottoms[i]; |
|
|
|
|
|
|
|
// store top blob |
|
|
|
blob_mats[top_blob_index] = top_blob; |
|
|
|
} |
|
|
|
if (blob_mats_gpu[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
// reclaim producer bottom_blob as free when consuming bottom_blob |
|
|
|
const Layer* producer = layers[ blobs[bottom_blob_index].producer ]; |
|
|
|
for (size_t i=0; i<producer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
int producer_bottom_blob_index = producer->bottoms[i]; |
|
|
|
if (blob_mats_gpu[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
// upload |
|
|
|
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index]; |
|
|
|
bottom_blob.create_like(blob_mats[bottom_blob_index], opt.blob_vkallocator, opt.staging_vkallocator); |
|
|
|
|
|
|
|
if (!bottom_blob.allocator->mappable) |
|
|
|
{ |
|
|
|
bottom_blob.prepare_staging_buffer(); |
|
|
|
} |
|
|
|
|
|
|
|
bottom_blob.upload(blob_mats[bottom_blob_index]); |
|
|
|
|
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
cmd.record_upload(bottom_blob); |
|
|
|
|
|
|
|
// TODO convert packing |
|
|
|
// fprintf(stderr, "upload %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
wait_barrier_counts[producer_bottom_blob_index]--; |
|
|
|
if (wait_barrier_counts[producer_bottom_blob_index] == 0) |
|
|
|
bottom_blobs[i] = blob_mats_gpu[bottom_blob_index]; |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
// fprintf(stderr, "reclaim free %p\n", blob_mats[producer_bottom_blob_index].buffer()); |
|
|
|
|
|
|
|
blob_mats[producer_bottom_blob_index].release(); |
|
|
|
wait_barrier_counts[bottom_blob_index] = layer->tops.size(); |
|
|
|
|
|
|
|
// deep copy for inplace forward if data is shared |
|
|
|
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1) |
|
|
|
{ |
|
|
|
VkMat bottom_blob_copy; |
|
|
|
bottom_blob_copy.create_like(bottom_blobs[i], bottom_blobs[i].allocator, bottom_blobs[i].staging_allocator); |
|
|
|
|
|
|
|
// fprintf(stderr, "clone %p %p\n", bottom_blobs[i].buffer(), bottom_blob_copy.buffer()); |
|
|
|
|
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blobs[i]); |
|
|
|
cmd.record_clone(bottom_blobs[i], bottom_blob_copy); |
|
|
|
bottom_blobs[i] = bottom_blob_copy; |
|
|
|
|
|
|
|
wait_barrier_counts[bottom_blob_index]--; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
// load bottom blobs |
|
|
|
std::vector<VkMat> bottom_blobs; |
|
|
|
bottom_blobs.resize(layer->bottoms.size()); |
|
|
|
for (size_t i=0; i<layer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
int bottom_blob_index = layer->bottoms[i]; |
|
|
|
|
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
// forward |
|
|
|
if (opt.lightmode && layer->support_inplace) |
|
|
|
{ |
|
|
|
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, wait_barrier_counts, cmd, opt); |
|
|
|
std::vector<VkMat>& bottom_top_blobs = bottom_blobs; |
|
|
|
int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
// store top blobs |
|
|
|
for (size_t i=0; i<layer->tops.size(); i++) |
|
|
|
{ |
|
|
|
int top_blob_index = layer->tops[i]; |
|
|
|
|
|
|
|
blob_mats_gpu[top_blob_index] = bottom_top_blobs[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
else if (blob_mats[bottom_blob_index].staging_data) |
|
|
|
else |
|
|
|
{ |
|
|
|
// upload |
|
|
|
const VkMat& bottom_blob = blob_mats[bottom_blob_index]; |
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
cmd.record_upload(bottom_blob); |
|
|
|
} |
|
|
|
std::vector<VkMat> top_blobs; |
|
|
|
top_blobs.resize(layer->tops.size()); |
|
|
|
int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
bottom_blobs[i] = blob_mats[bottom_blob_index]; |
|
|
|
// store top blobs |
|
|
|
for (size_t i=0; i<layer->tops.size(); i++) |
|
|
|
{ |
|
|
|
int top_blob_index = layer->tops[i]; |
|
|
|
|
|
|
|
blob_mats_gpu[top_blob_index] = top_blobs[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
|
|
|
|
wait_barrier_counts[bottom_blob_index] = layer->tops.size(); |
|
|
|
|
|
|
|
// deep copy for inplace forward if data is shared |
|
|
|
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1) |
|
|
|
for (size_t i=0; i<layer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
VkMat bottom_blob_copy; |
|
|
|
bottom_blob_copy.create_like(bottom_blobs[i], bottom_blobs[i].allocator, bottom_blobs[i].staging_allocator); |
|
|
|
int bottom_blob_index = layer->bottoms[i]; |
|
|
|
|
|
|
|
// fprintf(stderr, "clone %p %p\n", bottom_blobs[i].buffer(), bottom_blob_copy.buffer()); |
|
|
|
// reclaim producer bottom_blob as free when consuming bottom_blob |
|
|
|
const Layer* producer = layers[ blobs[bottom_blob_index].producer ]; |
|
|
|
for (size_t i=0; i<producer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
int producer_bottom_blob_index = producer->bottoms[i]; |
|
|
|
|
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blobs[i]); |
|
|
|
cmd.record_clone(bottom_blobs[i], bottom_blob_copy); |
|
|
|
bottom_blobs[i] = bottom_blob_copy; |
|
|
|
wait_barrier_counts[producer_bottom_blob_index]--; |
|
|
|
if (wait_barrier_counts[producer_bottom_blob_index] == 0) |
|
|
|
{ |
|
|
|
// fprintf(stderr, "reclaim free %p\n", blob_mats_gpu[producer_bottom_blob_index].buffer()); |
|
|
|
|
|
|
|
wait_barrier_counts[bottom_blob_index]--; |
|
|
|
blob_mats_gpu[producer_bottom_blob_index].release(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
// forward |
|
|
|
if (opt.lightmode && layer->support_inplace) |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
if (layer->one_blob_only) |
|
|
|
{ |
|
|
|
std::vector<VkMat>& bottom_top_blobs = bottom_blobs; |
|
|
|
int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
// load bottom blob |
|
|
|
int bottom_blob_index = layer->bottoms[0]; |
|
|
|
int top_blob_index = layer->tops[0]; |
|
|
|
|
|
|
|
// store top blobs |
|
|
|
for (size_t i=0; i<layer->tops.size(); i++) |
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int top_blob_index = layer->tops[i]; |
|
|
|
if (blob_mats_gpu[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
blob_mats[top_blob_index] = bottom_top_blobs[i]; |
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
// download |
|
|
|
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index]; |
|
|
|
|
|
|
|
if (!bottom_blob.allocator->mappable) |
|
|
|
{ |
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
bottom_blob.prepare_staging_buffer(); |
|
|
|
cmd.record_download(bottom_blob); |
|
|
|
} |
|
|
|
|
|
|
|
cmd.submit(); |
|
|
|
|
|
|
|
cmd.wait(); |
|
|
|
|
|
|
|
cmd.reset(); |
|
|
|
|
|
|
|
blob_mats[bottom_blob_index].create_like(bottom_blob, opt.blob_allocator); |
|
|
|
bottom_blob.download(blob_mats[bottom_blob_index]); |
|
|
|
|
|
|
|
if (!bottom_blob.allocator->mappable) |
|
|
|
{ |
|
|
|
bottom_blob.discard_staging_buffer(); |
|
|
|
} |
|
|
|
|
|
|
|
// TODO convert packing |
|
|
|
// fprintf(stderr, "download %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Mat bottom_blob = blob_mats[bottom_blob_index]; |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
// delete after taken in light mode |
|
|
|
blob_mats[bottom_blob_index].release(); |
|
|
|
// deep copy for inplace forward if data is shared |
|
|
|
if (layer->support_inplace && *bottom_blob.refcount != 1) |
|
|
|
{ |
|
|
|
bottom_blob = bottom_blob.clone(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// forward |
|
|
|
if (opt.lightmode && layer->support_inplace) |
|
|
|
{ |
|
|
|
Mat& bottom_top_blob = bottom_blob; |
|
|
|
|
|
|
|
int ret = layer->forward_inplace(bottom_top_blob, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
// store top blob |
|
|
|
blob_mats[top_blob_index] = bottom_top_blob; |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
Mat top_blob; |
|
|
|
|
|
|
|
int ret = layer->forward(bottom_blob, top_blob, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
// store top blob |
|
|
|
blob_mats[top_blob_index] = top_blob; |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
std::vector<VkMat> top_blobs; |
|
|
|
top_blobs.resize(layer->tops.size()); |
|
|
|
int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
// load bottom blobs |
|
|
|
std::vector<Mat> bottom_blobs; |
|
|
|
bottom_blobs.resize(layer->bottoms.size()); |
|
|
|
for (size_t i=0; i<layer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
int bottom_blob_index = layer->bottoms[i]; |
|
|
|
|
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
if (blob_mats_gpu[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
// download |
|
|
|
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index]; |
|
|
|
|
|
|
|
if (!bottom_blob.allocator->mappable) |
|
|
|
{ |
|
|
|
cmd.record_prepare_transfer_barrier(bottom_blob); |
|
|
|
bottom_blob.prepare_staging_buffer(); |
|
|
|
cmd.record_download(bottom_blob); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// store top blobs |
|
|
|
for (size_t i=0; i<layer->tops.size(); i++) |
|
|
|
{ |
|
|
|
int top_blob_index = layer->tops[i]; |
|
|
|
cmd.submit(); |
|
|
|
|
|
|
|
blob_mats[top_blob_index] = top_blobs[i]; |
|
|
|
cmd.wait(); |
|
|
|
|
|
|
|
cmd.reset(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (opt.lightmode) |
|
|
|
{ |
|
|
|
for (size_t i=0; i<layer->bottoms.size(); i++) |
|
|
|
{ |
|
|
|
int bottom_blob_index = layer->bottoms[i]; |
|
|
|
|
|
|
|
// reclaim producer bottom_blob as free when consuming bottom_blob |
|
|
|
const Layer* producer = layers[ blobs[bottom_blob_index].producer ]; |
|
|
|
for (size_t i=0; i<producer->bottoms.size(); i++) |
|
|
|
if (blob_mats[bottom_blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int producer_bottom_blob_index = producer->bottoms[i]; |
|
|
|
// download |
|
|
|
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index]; |
|
|
|
|
|
|
|
wait_barrier_counts[producer_bottom_blob_index]--; |
|
|
|
if (wait_barrier_counts[producer_bottom_blob_index] == 0) |
|
|
|
{ |
|
|
|
// fprintf(stderr, "reclaim free %p\n", blob_mats[producer_bottom_blob_index].buffer()); |
|
|
|
blob_mats[bottom_blob_index].create_like(bottom_blob, opt.blob_allocator); |
|
|
|
bottom_blob.download(blob_mats[bottom_blob_index]); |
|
|
|
|
|
|
|
blob_mats[producer_bottom_blob_index].release(); |
|
|
|
if (!bottom_blob.allocator->mappable) |
|
|
|
{ |
|
|
|
bottom_blob.discard_staging_buffer(); |
|
|
|
} |
|
|
|
|
|
|
|
// TODO convert packing |
|
|
|
// fprintf(stderr, "download %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing); |
|
|
|
} |
|
|
|
|
|
|
|
bottom_blobs[i] = blob_mats[bottom_blob_index]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// forward |
|
|
|
if (opt.lightmode && layer->support_inplace) |
|
|
|
{ |
|
|
|
std::vector<Mat>& bottom_top_blobs = bottom_blobs; |
|
|
|
|
|
|
|
int ret = layer->forward_inplace(bottom_top_blobs, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
// store top blobs |
|
|
|
for (size_t i=0; i<layer->tops.size(); i++) |
|
|
|
{ |
|
|
|
int top_blob_index = layer->tops[i]; |
|
|
|
|
|
|
|
blob_mats[top_blob_index] = bottom_top_blobs[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
std::vector<Mat> top_blobs; |
|
|
|
top_blobs.resize(layer->tops.size()); |
|
|
|
|
|
|
|
int ret = layer->forward(bottom_blobs, top_blobs, opt); |
|
|
|
if (ret != 0) |
|
|
|
return ret; |
|
|
|
|
|
|
|
// store top blobs |
|
|
|
for (size_t i=0; i<layer->tops.size(); i++) |
|
|
|
{ |
|
|
|
int top_blob_index = layer->tops[i]; |
|
|
|
|
|
|
|
blob_mats[top_blob_index] = top_blobs[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// fprintf(stderr, "forward_layer %d %s done\n", layer_index, layer->name.c_str()); |
|
|
|
// fprintf(stderr, "forward_layer %d %d %s done\n", layer->support_vulkan, layer_index, layer->name.c_str()); |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
@@ -1440,22 +1664,6 @@ int Extractor::input(int blob_index, const Mat& in) |
|
|
|
|
|
|
|
blob_mats[blob_index] = in; |
|
|
|
|
|
|
|
#if NCNN_VULKAN |
|
|
|
if (opt.vulkan_compute) |
|
|
|
{ |
|
|
|
VkMat& in_gpu = blob_mats_gpu[blob_index]; |
|
|
|
|
|
|
|
in_gpu.create_like(in, opt.blob_vkallocator, opt.staging_vkallocator); |
|
|
|
|
|
|
|
if (!in_gpu.allocator->mappable) |
|
|
|
{ |
|
|
|
in_gpu.prepare_staging_buffer(); |
|
|
|
} |
|
|
|
|
|
|
|
in_gpu.upload(in); |
|
|
|
} |
|
|
|
#endif // NCNN_VULKAN |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
@@ -1473,33 +1681,35 @@ int Extractor::extract(int blob_index, Mat& feat) |
|
|
|
#if NCNN_VULKAN |
|
|
|
if (opt.vulkan_compute) |
|
|
|
{ |
|
|
|
VkMat feat_gpu; |
|
|
|
|
|
|
|
ncnn::VkCompute cmd(net->vkdev); |
|
|
|
|
|
|
|
VkMat feat_gpu; |
|
|
|
ret = extract(blob_index, feat_gpu, cmd); |
|
|
|
|
|
|
|
if (!feat_gpu.allocator->mappable) |
|
|
|
if (blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0) |
|
|
|
{ |
|
|
|
// download |
|
|
|
cmd.record_prepare_transfer_barrier(feat_gpu); |
|
|
|
if (!feat_gpu.allocator->mappable) |
|
|
|
{ |
|
|
|
cmd.record_prepare_transfer_barrier(feat_gpu); |
|
|
|
|
|
|
|
feat_gpu.prepare_staging_buffer(); |
|
|
|
feat_gpu.prepare_staging_buffer(); |
|
|
|
|
|
|
|
cmd.record_download(feat_gpu); |
|
|
|
} |
|
|
|
cmd.record_download(feat_gpu); |
|
|
|
} |
|
|
|
|
|
|
|
cmd.submit(); |
|
|
|
cmd.submit(); |
|
|
|
|
|
|
|
cmd.wait(); |
|
|
|
cmd.wait(); |
|
|
|
|
|
|
|
blob_mats[blob_index].create_like(feat_gpu, opt.blob_allocator); |
|
|
|
blob_mats[blob_index].create_like(feat_gpu, opt.blob_allocator); |
|
|
|
|
|
|
|
feat_gpu.download(blob_mats[blob_index]); |
|
|
|
feat_gpu.download(blob_mats[blob_index]); |
|
|
|
|
|
|
|
if (!feat_gpu.allocator->mappable) |
|
|
|
{ |
|
|
|
feat_gpu.discard_staging_buffer(); |
|
|
|
if (!feat_gpu.allocator->mappable) |
|
|
|
{ |
|
|
|
feat_gpu.discard_staging_buffer(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
else |
|
|
|
@@ -1558,7 +1768,7 @@ int Extractor::extract(int blob_index, VkMat& feat, VkCompute& cmd) |
|
|
|
if (blob_mats_gpu[blob_index].dims == 0) |
|
|
|
{ |
|
|
|
int layer_index = net->blobs[blob_index].producer; |
|
|
|
ret = net->forward_layer(layer_index, blob_mats_gpu, wait_barrier_counts, cmd, opt); |
|
|
|
ret = net->forward_layer(layer_index, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt); |
|
|
|
} |
|
|
|
|
|
|
|
feat = blob_mats_gpu[blob_index]; |
|
|
|
|