Browse Source

fallback to cpu forward if layer not support vulkan, automatically!

tags/20190320
nihuini 7 years ago
parent
commit
83efa73cf6
4 changed files with 394 additions and 186 deletions
  1. +28
    -28
      benchmark/benchncnn.cpp
  2. +0
    -2
      src/layer/permute.cpp
  3. +365
    -155
      src/net.cpp
  4. +1
    -1
      src/net.h

+ 28
- 28
benchmark/benchncnn.cpp View File

@@ -409,20 +409,20 @@ void mobilenet_yolo_run(const ncnn::Net& net)
ncnn::Mat in(416, 416, 3);
ex.input("data", in);

#if NCNN_VULKAN
// TODO chain conv22 conv23
if (g_use_vulkan_compute)
{
ncnn::Mat conv22;
ex.extract("conv22", conv22);
ncnn::Mat conv23;
ex.extract("conv23", conv23);
// use cpu for detection_out
ex.set_vulkan_compute(false);
}
#endif // NCNN_VULKAN
// #if NCNN_VULKAN
// // TODO chain conv22 conv23
// if (g_use_vulkan_compute)
// {
// ncnn::Mat conv22;
// ex.extract("conv22", conv22);
//
// ncnn::Mat conv23;
// ex.extract("conv23", conv23);
//
// // use cpu for detection_out
// ex.set_vulkan_compute(false);
// }
// #endif // NCNN_VULKAN

ncnn::Mat out;
ex.extract("detection_out", out);
@@ -440,20 +440,20 @@ void mobilenet_yolov3_run(const ncnn::Net& net)
ncnn::Mat in(416, 416, 3);
ex.input("data", in);

#if NCNN_VULKAN
// TODO chain conv19 conv20
if (g_use_vulkan_compute)
{
ncnn::Mat conv19;
ex.extract("conv19", conv19);
ncnn::Mat conv20;
ex.extract("conv20", conv20);
// use cpu for detection_out
ex.set_vulkan_compute(false);
}
#endif // NCNN_VULKAN
// #if NCNN_VULKAN
// // TODO chain conv19 conv20
// if (g_use_vulkan_compute)
// {
// ncnn::Mat conv19;
// ex.extract("conv19", conv19);
//
// ncnn::Mat conv20;
// ex.extract("conv20", conv20);
//
// // use cpu for detection_out
// ex.set_vulkan_compute(false);
// }
// #endif // NCNN_VULKAN

ncnn::Mat out;
ex.extract("detection_out", out);


+ 0
- 2
src/layer/permute.cpp View File

@@ -317,8 +317,6 @@ int Permute::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd,
}
}

fprintf(stderr, "%d %d %d\n", top_blob.w, top_blob.h, top_blob.c);

// fprintf(stderr, "Permute::forward %p %p\n", bottom_blob.buffer(), top_blob.buffer());

std::vector<VkMat> bindings(2);


+ 365
- 155
src/net.cpp View File

@@ -1145,201 +1145,425 @@ int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, Option& opt
}

#if NCNN_VULKAN
int Net::forward_layer(int layer_index, std::vector<VkMat>& blob_mats, std::vector<int>& wait_barrier_counts, VkCompute& cmd, Option& opt) const
int Net::forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, std::vector<int>& wait_barrier_counts, VkCompute& cmd, Option& opt) const
{
const Layer* layer = layers[layer_index];

// fprintf(stderr, "forward_layer %d %s\n", layer_index, layer->name.c_str());
// fprintf(stderr, "forward_layer %d %d %s\n", layer->support_vulkan, layer_index, layer->name.c_str());

if (layer->one_blob_only)
if (layer->support_vulkan)
{
// load bottom blob
int bottom_blob_index = layer->bottoms[0];
int top_blob_index = layer->tops[0];

if (blob_mats[bottom_blob_index].dims == 0)
{
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, wait_barrier_counts, cmd, opt);
if (ret != 0)
return ret;
}
else if (blob_mats[bottom_blob_index].staging_data)
if (layer->one_blob_only)
{
// upload
const VkMat& bottom_blob = blob_mats[bottom_blob_index];
cmd.record_prepare_transfer_barrier(bottom_blob);
cmd.record_upload(bottom_blob);
}
// load bottom blob
int bottom_blob_index = layer->bottoms[0];
int top_blob_index = layer->tops[0];

VkMat bottom_blob = blob_mats[bottom_blob_index];
if (blob_mats_gpu[bottom_blob_index].dims == 0)
{
if (blob_mats[bottom_blob_index].dims == 0)
{
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt);
if (ret != 0)
return ret;
}

if (opt.lightmode)
{
if (blob_mats_gpu[bottom_blob_index].dims == 0)
{
// upload
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index];
bottom_blob.create_like(blob_mats[bottom_blob_index], opt.blob_vkallocator, opt.staging_vkallocator);

if (!bottom_blob.allocator->mappable)
{
bottom_blob.prepare_staging_buffer();
}

wait_barrier_counts[bottom_blob_index] += layer->tops.size();
bottom_blob.upload(blob_mats[bottom_blob_index]);

// deep copy for inplace forward if data is shared
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1)
cmd.record_prepare_transfer_barrier(bottom_blob);
cmd.record_upload(bottom_blob);

// TODO convert packing
// fprintf(stderr, "upload %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing);
}
}

VkMat bottom_blob = blob_mats_gpu[bottom_blob_index];

if (opt.lightmode)
{
VkMat bottom_blob_copy;
bottom_blob_copy.create_like(bottom_blob, bottom_blob.allocator, bottom_blob.staging_allocator);

// fprintf(stderr, "clone %p %p\n", bottom_blob.buffer(), bottom_blob_copy.buffer());
wait_barrier_counts[bottom_blob_index] += layer->tops.size();

// deep copy for inplace forward if data is shared
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1)
{
VkMat bottom_blob_copy;
bottom_blob_copy.create_like(bottom_blob, bottom_blob.allocator, bottom_blob.staging_allocator);

// fprintf(stderr, "clone %p %p\n", bottom_blob.buffer(), bottom_blob_copy.buffer());

cmd.record_prepare_transfer_barrier(bottom_blob);
cmd.record_clone(bottom_blob, bottom_blob_copy);
bottom_blob = bottom_blob_copy;
cmd.record_prepare_transfer_barrier(bottom_blob);
cmd.record_clone(bottom_blob, bottom_blob_copy);
bottom_blob = bottom_blob_copy;

wait_barrier_counts[bottom_blob_index]--;
wait_barrier_counts[bottom_blob_index]--;
}
}
}

// forward
if (opt.lightmode && layer->support_inplace)
{
VkMat& bottom_top_blob = bottom_blob;
int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
if (ret != 0)
return ret;
// forward
if (opt.lightmode && layer->support_inplace)
{
VkMat& bottom_top_blob = bottom_blob;
int ret = layer->forward_inplace(bottom_top_blob, cmd, opt);
if (ret != 0)
return ret;

// store top blob
blob_mats[top_blob_index] = bottom_top_blob;
// store top blob
blob_mats_gpu[top_blob_index] = bottom_top_blob;
}
else
{
VkMat top_blob;
int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
if (ret != 0)
return ret;

// store top blob
blob_mats_gpu[top_blob_index] = top_blob;
}

if (opt.lightmode)
{
// reclaim producer bottom_blob as free when consuming bottom_blob
const Layer* producer = layers[ blobs[bottom_blob_index].producer ];
for (size_t i=0; i<producer->bottoms.size(); i++)
{
int producer_bottom_blob_index = producer->bottoms[i];

wait_barrier_counts[producer_bottom_blob_index]--;
if (wait_barrier_counts[producer_bottom_blob_index] == 0)
{
// fprintf(stderr, "reclaim free %p\n", blob_mats_gpu[producer_bottom_blob_index].buffer());

blob_mats_gpu[producer_bottom_blob_index].release();
}
}
}
}
else
{
VkMat top_blob;
int ret = layer->forward(bottom_blob, top_blob, cmd, opt);
if (ret != 0)
return ret;
// load bottom blobs
std::vector<VkMat> bottom_blobs;
bottom_blobs.resize(layer->bottoms.size());
for (size_t i=0; i<layer->bottoms.size(); i++)
{
int bottom_blob_index = layer->bottoms[i];

// store top blob
blob_mats[top_blob_index] = top_blob;
}
if (blob_mats_gpu[bottom_blob_index].dims == 0)
{
if (blob_mats[bottom_blob_index].dims == 0)
{
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt);
if (ret != 0)
return ret;
}

if (opt.lightmode)
{
// reclaim producer bottom_blob as free when consuming bottom_blob
const Layer* producer = layers[ blobs[bottom_blob_index].producer ];
for (size_t i=0; i<producer->bottoms.size(); i++)
{
int producer_bottom_blob_index = producer->bottoms[i];
if (blob_mats_gpu[bottom_blob_index].dims == 0)
{
// upload
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index];
bottom_blob.create_like(blob_mats[bottom_blob_index], opt.blob_vkallocator, opt.staging_vkallocator);

if (!bottom_blob.allocator->mappable)
{
bottom_blob.prepare_staging_buffer();
}

bottom_blob.upload(blob_mats[bottom_blob_index]);

cmd.record_prepare_transfer_barrier(bottom_blob);
cmd.record_upload(bottom_blob);

// TODO convert packing
// fprintf(stderr, "upload %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing);
}
}

wait_barrier_counts[producer_bottom_blob_index]--;
if (wait_barrier_counts[producer_bottom_blob_index] == 0)
bottom_blobs[i] = blob_mats_gpu[bottom_blob_index];

if (opt.lightmode)
{
// fprintf(stderr, "reclaim free %p\n", blob_mats[producer_bottom_blob_index].buffer());

blob_mats[producer_bottom_blob_index].release();
wait_barrier_counts[bottom_blob_index] = layer->tops.size();

// deep copy for inplace forward if data is shared
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1)
{
VkMat bottom_blob_copy;
bottom_blob_copy.create_like(bottom_blobs[i], bottom_blobs[i].allocator, bottom_blobs[i].staging_allocator);

// fprintf(stderr, "clone %p %p\n", bottom_blobs[i].buffer(), bottom_blob_copy.buffer());

cmd.record_prepare_transfer_barrier(bottom_blobs[i]);
cmd.record_clone(bottom_blobs[i], bottom_blob_copy);
bottom_blobs[i] = bottom_blob_copy;

wait_barrier_counts[bottom_blob_index]--;
}
}
}
}
}
else
{
// load bottom blobs
std::vector<VkMat> bottom_blobs;
bottom_blobs.resize(layer->bottoms.size());
for (size_t i=0; i<layer->bottoms.size(); i++)
{
int bottom_blob_index = layer->bottoms[i];

if (blob_mats[bottom_blob_index].dims == 0)
// forward
if (opt.lightmode && layer->support_inplace)
{
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, wait_barrier_counts, cmd, opt);
std::vector<VkMat>& bottom_top_blobs = bottom_blobs;
int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
if (ret != 0)
return ret;

// store top blobs
for (size_t i=0; i<layer->tops.size(); i++)
{
int top_blob_index = layer->tops[i];

blob_mats_gpu[top_blob_index] = bottom_top_blobs[i];
}
}
else if (blob_mats[bottom_blob_index].staging_data)
else
{
// upload
const VkMat& bottom_blob = blob_mats[bottom_blob_index];
cmd.record_prepare_transfer_barrier(bottom_blob);
cmd.record_upload(bottom_blob);
}
std::vector<VkMat> top_blobs;
top_blobs.resize(layer->tops.size());
int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
if (ret != 0)
return ret;

bottom_blobs[i] = blob_mats[bottom_blob_index];
// store top blobs
for (size_t i=0; i<layer->tops.size(); i++)
{
int top_blob_index = layer->tops[i];

blob_mats_gpu[top_blob_index] = top_blobs[i];
}
}

if (opt.lightmode)
{

wait_barrier_counts[bottom_blob_index] = layer->tops.size();

// deep copy for inplace forward if data is shared
if (layer->support_inplace && wait_barrier_counts[bottom_blob_index] != 1)
for (size_t i=0; i<layer->bottoms.size(); i++)
{
VkMat bottom_blob_copy;
bottom_blob_copy.create_like(bottom_blobs[i], bottom_blobs[i].allocator, bottom_blobs[i].staging_allocator);
int bottom_blob_index = layer->bottoms[i];

// fprintf(stderr, "clone %p %p\n", bottom_blobs[i].buffer(), bottom_blob_copy.buffer());
// reclaim producer bottom_blob as free when consuming bottom_blob
const Layer* producer = layers[ blobs[bottom_blob_index].producer ];
for (size_t i=0; i<producer->bottoms.size(); i++)
{
int producer_bottom_blob_index = producer->bottoms[i];

cmd.record_prepare_transfer_barrier(bottom_blobs[i]);
cmd.record_clone(bottom_blobs[i], bottom_blob_copy);
bottom_blobs[i] = bottom_blob_copy;
wait_barrier_counts[producer_bottom_blob_index]--;
if (wait_barrier_counts[producer_bottom_blob_index] == 0)
{
// fprintf(stderr, "reclaim free %p\n", blob_mats_gpu[producer_bottom_blob_index].buffer());

wait_barrier_counts[bottom_blob_index]--;
blob_mats_gpu[producer_bottom_blob_index].release();
}
}
}
}

}

// forward
if (opt.lightmode && layer->support_inplace)
}
else
{
if (layer->one_blob_only)
{
std::vector<VkMat>& bottom_top_blobs = bottom_blobs;
int ret = layer->forward_inplace(bottom_top_blobs, cmd, opt);
if (ret != 0)
return ret;
// load bottom blob
int bottom_blob_index = layer->bottoms[0];
int top_blob_index = layer->tops[0];

// store top blobs
for (size_t i=0; i<layer->tops.size(); i++)
if (blob_mats[bottom_blob_index].dims == 0)
{
int top_blob_index = layer->tops[i];
if (blob_mats_gpu[bottom_blob_index].dims == 0)
{
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt);
if (ret != 0)
return ret;
}

blob_mats[top_blob_index] = bottom_top_blobs[i];
if (blob_mats[bottom_blob_index].dims == 0)
{
// download
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index];

if (!bottom_blob.allocator->mappable)
{
cmd.record_prepare_transfer_barrier(bottom_blob);
bottom_blob.prepare_staging_buffer();
cmd.record_download(bottom_blob);
}

cmd.submit();

cmd.wait();

cmd.reset();

blob_mats[bottom_blob_index].create_like(bottom_blob, opt.blob_allocator);
bottom_blob.download(blob_mats[bottom_blob_index]);

if (!bottom_blob.allocator->mappable)
{
bottom_blob.discard_staging_buffer();
}

// TODO convert packing
// fprintf(stderr, "download %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing);
}
}

Mat bottom_blob = blob_mats[bottom_blob_index];

if (opt.lightmode)
{
// delete after taken in light mode
blob_mats[bottom_blob_index].release();
// deep copy for inplace forward if data is shared
if (layer->support_inplace && *bottom_blob.refcount != 1)
{
bottom_blob = bottom_blob.clone();
}
}

// forward
if (opt.lightmode && layer->support_inplace)
{
Mat& bottom_top_blob = bottom_blob;

int ret = layer->forward_inplace(bottom_top_blob, opt);
if (ret != 0)
return ret;

// store top blob
blob_mats[top_blob_index] = bottom_top_blob;
}
else
{
Mat top_blob;

int ret = layer->forward(bottom_blob, top_blob, opt);
if (ret != 0)
return ret;

// store top blob
blob_mats[top_blob_index] = top_blob;
}

}
else
{
std::vector<VkMat> top_blobs;
top_blobs.resize(layer->tops.size());
int ret = layer->forward(bottom_blobs, top_blobs, cmd, opt);
if (ret != 0)
return ret;
// load bottom blobs
std::vector<Mat> bottom_blobs;
bottom_blobs.resize(layer->bottoms.size());
for (size_t i=0; i<layer->bottoms.size(); i++)
{
int bottom_blob_index = layer->bottoms[i];

if (blob_mats[bottom_blob_index].dims == 0)
{
if (blob_mats_gpu[bottom_blob_index].dims == 0)
{
int ret = forward_layer(blobs[bottom_blob_index].producer, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt);
if (ret != 0)
return ret;
}

if (blob_mats[bottom_blob_index].dims == 0)
{
// download
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index];

if (!bottom_blob.allocator->mappable)
{
cmd.record_prepare_transfer_barrier(bottom_blob);
bottom_blob.prepare_staging_buffer();
cmd.record_download(bottom_blob);
}
}
}
}

// store top blobs
for (size_t i=0; i<layer->tops.size(); i++)
{
int top_blob_index = layer->tops[i];
cmd.submit();

blob_mats[top_blob_index] = top_blobs[i];
cmd.wait();

cmd.reset();
}
}

if (opt.lightmode)
{
for (size_t i=0; i<layer->bottoms.size(); i++)
{
int bottom_blob_index = layer->bottoms[i];

// reclaim producer bottom_blob as free when consuming bottom_blob
const Layer* producer = layers[ blobs[bottom_blob_index].producer ];
for (size_t i=0; i<producer->bottoms.size(); i++)
if (blob_mats[bottom_blob_index].dims == 0)
{
int producer_bottom_blob_index = producer->bottoms[i];
// download
VkMat& bottom_blob = blob_mats_gpu[bottom_blob_index];

wait_barrier_counts[producer_bottom_blob_index]--;
if (wait_barrier_counts[producer_bottom_blob_index] == 0)
{
// fprintf(stderr, "reclaim free %p\n", blob_mats[producer_bottom_blob_index].buffer());
blob_mats[bottom_blob_index].create_like(bottom_blob, opt.blob_allocator);
bottom_blob.download(blob_mats[bottom_blob_index]);

blob_mats[producer_bottom_blob_index].release();
if (!bottom_blob.allocator->mappable)
{
bottom_blob.discard_staging_buffer();
}

// TODO convert packing
// fprintf(stderr, "download %d %d %d %lu %d\n", bottom_blob.w, bottom_blob.h, bottom_blob.c, bottom_blob.elemsize, bottom_blob.packing);
}

bottom_blobs[i] = blob_mats[bottom_blob_index];
}
}

// forward
if (opt.lightmode && layer->support_inplace)
{
std::vector<Mat>& bottom_top_blobs = bottom_blobs;

int ret = layer->forward_inplace(bottom_top_blobs, opt);
if (ret != 0)
return ret;

// store top blobs
for (size_t i=0; i<layer->tops.size(); i++)
{
int top_blob_index = layer->tops[i];

blob_mats[top_blob_index] = bottom_top_blobs[i];
}
}
else
{
std::vector<Mat> top_blobs;
top_blobs.resize(layer->tops.size());

int ret = layer->forward(bottom_blobs, top_blobs, opt);
if (ret != 0)
return ret;

// store top blobs
for (size_t i=0; i<layer->tops.size(); i++)
{
int top_blob_index = layer->tops[i];

blob_mats[top_blob_index] = top_blobs[i];
}
}

}
}

// fprintf(stderr, "forward_layer %d %s done\n", layer_index, layer->name.c_str());
// fprintf(stderr, "forward_layer %d %d %s done\n", layer->support_vulkan, layer_index, layer->name.c_str());

return 0;
}
@@ -1440,22 +1664,6 @@ int Extractor::input(int blob_index, const Mat& in)

blob_mats[blob_index] = in;

#if NCNN_VULKAN
if (opt.vulkan_compute)
{
VkMat& in_gpu = blob_mats_gpu[blob_index];

in_gpu.create_like(in, opt.blob_vkallocator, opt.staging_vkallocator);

if (!in_gpu.allocator->mappable)
{
in_gpu.prepare_staging_buffer();
}

in_gpu.upload(in);
}
#endif // NCNN_VULKAN

return 0;
}

@@ -1473,33 +1681,35 @@ int Extractor::extract(int blob_index, Mat& feat)
#if NCNN_VULKAN
if (opt.vulkan_compute)
{
VkMat feat_gpu;

ncnn::VkCompute cmd(net->vkdev);

VkMat feat_gpu;
ret = extract(blob_index, feat_gpu, cmd);

if (!feat_gpu.allocator->mappable)
if (blob_mats[blob_index].dims == 0 && feat_gpu.dims != 0)
{
// download
cmd.record_prepare_transfer_barrier(feat_gpu);
if (!feat_gpu.allocator->mappable)
{
cmd.record_prepare_transfer_barrier(feat_gpu);

feat_gpu.prepare_staging_buffer();
feat_gpu.prepare_staging_buffer();

cmd.record_download(feat_gpu);
}
cmd.record_download(feat_gpu);
}

cmd.submit();
cmd.submit();

cmd.wait();
cmd.wait();

blob_mats[blob_index].create_like(feat_gpu, opt.blob_allocator);
blob_mats[blob_index].create_like(feat_gpu, opt.blob_allocator);

feat_gpu.download(blob_mats[blob_index]);
feat_gpu.download(blob_mats[blob_index]);

if (!feat_gpu.allocator->mappable)
{
feat_gpu.discard_staging_buffer();
if (!feat_gpu.allocator->mappable)
{
feat_gpu.discard_staging_buffer();
}
}
}
else
@@ -1558,7 +1768,7 @@ int Extractor::extract(int blob_index, VkMat& feat, VkCompute& cmd)
if (blob_mats_gpu[blob_index].dims == 0)
{
int layer_index = net->blobs[blob_index].producer;
ret = net->forward_layer(layer_index, blob_mats_gpu, wait_barrier_counts, cmd, opt);
ret = net->forward_layer(layer_index, blob_mats, blob_mats_gpu, wait_barrier_counts, cmd, opt);
}

feat = blob_mats_gpu[blob_index];


+ 1
- 1
src/net.h View File

@@ -122,7 +122,7 @@ protected:
int forward_layer(int layer_index, std::vector<Mat>& blob_mats, Option& opt) const;

#if NCNN_VULKAN
int forward_layer(int layer_index, std::vector<VkMat>& blob_mats, std::vector<int>& wait_barrier_counts, VkCompute& cmd, Option& opt) const;
int forward_layer(int layer_index, std::vector<Mat>& blob_mats, std::vector<VkMat>& blob_mats_gpu, std::vector<int>& wait_barrier_counts, VkCompute& cmd, Option& opt) const;
#endif // NCNN_VULKAN

protected:


Loading…
Cancel
Save