Browse Source

add transfer-transfer barrier, concat pack4

tags/20190320
nihui 7 years ago
parent
commit
c60773bde4
3 changed files with 60 additions and 5 deletions
  1. +43
    -1
      src/command.cpp
  2. +6
    -1
      src/command.h
  3. +11
    -3
      src/layer/concat.cpp

+ 43
- 1
src/command.cpp View File

@@ -445,8 +445,26 @@ void VkCompute::record_compute_compute_barrier(const VkMat& m)
delayed_records.push_back(r);
}

void VkCompute::record_transfer_transfer_barrier(const VkMat& m)
{
m.state = 2;

if (vkdev->info.support_VK_KHR_push_descriptor)
return transfer_transfer_barrier(m.buffer(), m.buffer_offset(), m.total() * m.elemsize);

record_type r;
r.type = 10;
r.transfer_transfer_barrier.buffer = m.buffer();
r.transfer_transfer_barrier.offset = m.buffer_offset();
r.transfer_transfer_barrier.size = m.total() * m.elemsize;
delayed_records.push_back(r);
}

void VkCompute::record_prepare_transfer_barrier(const VkMat& m)
{
if (m.state == 2)
return record_transfer_transfer_barrier(m);

if (m.state == 3)
return record_compute_transfer_barrier(m);

@@ -470,7 +488,7 @@ int VkCompute::end()
return end_command_buffer();

record_type r;
r.type = 10;
r.type = 11;
delayed_records.push_back(r);

return 0;
@@ -519,6 +537,9 @@ int VkCompute::submit()
compute_compute_barrier(r.compute_compute_barrier.buffer, r.compute_compute_barrier.offset, r.compute_compute_barrier.size);
break;
case 10:
transfer_transfer_barrier(r.compute_compute_barrier.buffer, r.compute_compute_barrier.offset, r.compute_compute_barrier.size);
break;
case 11:
end_command_buffer();
break;
}
@@ -649,6 +670,27 @@ void VkCompute::compute_compute_barrier(VkBuffer buffer, size_t offset, size_t s
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, 0, 1, &bufferBarrier, 0, 0);
}

void VkCompute::transfer_transfer_barrier(VkBuffer buffer, size_t offset, size_t size)
{
// fprintf(stderr, "cmd transfer_transfer_barrier %p\n", buffer);

VkBufferMemoryBarrier bufferBarrier;
bufferBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
bufferBarrier.pNext = 0;
bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
bufferBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufferBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bufferBarrier.buffer = buffer;
bufferBarrier.offset = offset;
bufferBarrier.size = size;

VkPipelineStageFlags srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
VkPipelineStageFlags dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;

vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, 0, 1, &bufferBarrier, 0, 0);
}

VkTransfer::VkTransfer(VulkanDevice* _vkdev) : Command(_vkdev, _vkdev->info.transfer_queue_index)
{
staging_data = 0;


+ 6
- 1
src/command.h View File

@@ -78,6 +78,8 @@ public:

void record_compute_compute_barrier(const VkMat& m);

void record_transfer_transfer_barrier(const VkMat& m);

void record_prepare_transfer_barrier(const VkMat& m);

void record_prepare_compute_barrier(const VkMat& m);
@@ -109,6 +111,7 @@ protected:
void transfer_compute_barrier(VkBuffer buffer, size_t offset, size_t size);
void compute_transfer_barrier(VkBuffer buffer, size_t offset, size_t size);
void compute_compute_barrier(VkBuffer buffer, size_t offset, size_t size);
void transfer_transfer_barrier(VkBuffer buffer, size_t offset, size_t size);

protected:
// delayed record
@@ -127,7 +130,8 @@ protected:
// 7=transfer-compute barrier
// 8=compute-transfer barrier
// 9=compute-compute barrier
// 10=end
// 10=transfer-transfer barrier
// 11=end
int type;

union
@@ -141,6 +145,7 @@ protected:
struct { VkBuffer buffer; size_t offset; size_t size; } transfer_compute_barrier;
struct { VkBuffer buffer; size_t offset; size_t size; } compute_transfer_barrier;
struct { VkBuffer buffer; size_t offset; size_t size; } compute_compute_barrier;
struct { VkBuffer buffer; size_t offset; size_t size; } transfer_transfer_barrier;
};

std::vector<VkBufferCopy> regions;


+ 11
- 3
src/layer/concat.cpp View File

@@ -264,6 +264,8 @@ int Concat::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_
int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
{
int dims = bottom_blobs[0].dims;
size_t elemsize = bottom_blobs[0].elemsize;
int packing = bottom_blobs[0].packing;

if (dims == 1) // axis == 0
{
@@ -277,10 +279,12 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>&
}

VkMat& top_blob = top_blobs[0];
top_blob.create(top_w, 4u, opt.blob_vkallocator, opt.staging_vkallocator);
top_blob.create(top_w, elemsize, packing, opt.blob_vkallocator, opt.staging_vkallocator);
if (top_blob.empty())
return -100;

cmd.record_prepare_transfer_barrier(top_blob);

int dstOffset = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
@@ -316,10 +320,12 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>&
}

VkMat& top_blob = top_blobs[0];
top_blob.create(w, top_h, 4u, opt.blob_vkallocator, opt.staging_vkallocator);
top_blob.create(w, top_h, elemsize, packing, opt.blob_vkallocator, opt.staging_vkallocator);
if (top_blob.empty())
return -100;

cmd.record_prepare_transfer_barrier(top_blob);

int dstOffset = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{
@@ -361,10 +367,12 @@ int Concat::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>&
}

VkMat& top_blob = top_blobs[0];
top_blob.create(w, h, top_channels, 4u, opt.blob_vkallocator, opt.staging_vkallocator);
top_blob.create(w, h, top_channels, elemsize, packing, opt.blob_vkallocator, opt.staging_vkallocator);
if (top_blob.empty())
return -100;

cmd.record_prepare_transfer_barrier(top_blob);

int dstOffset = 0;
for (size_t b=0; b<bottom_blobs.size(); b++)
{


Loading…
Cancel
Save