Browse Source

uint32_t typed workgroup size

tags/20190320
nihuini 7 years ago
parent
commit
1f4bdd91b5
7 changed files with 51 additions and 44 deletions
  1. +8
    -8
      benchmark/benchncnn.cpp
  2. +4
    -4
      src/gpu.cpp
  3. +4
    -4
      src/gpu.h
  4. +1
    -1
      src/mat.h
  5. +15
    -8
      src/net.cpp
  6. +16
    -16
      src/pipeline.cpp
  7. +3
    -3
      src/pipeline.h

+ 8
- 8
benchmark/benchncnn.cpp View File

@@ -91,11 +91,12 @@ public:

for (size_t i=0; i<layers.size(); i++)
{
Layer* layer = layers[i];

if (layer->support_vulkan)
int uret = layers[i]->upload_model(cmd);
if (uret != 0)
{
layer->upload_model(cmd);
fprintf(stderr, "layer upload_model %d failed\n", (int)i);
ret = -1;
break;
}
}

@@ -106,11 +107,10 @@ public:
#pragma omp parallel for
for (int i=0; i<layers.size(); i++)
{
Layer* layer = layers[i];

if (layer->support_vulkan)
int cret = layers[i]->create_pipeline();
if (cret != 0)
{
layer->create_pipeline();
fprintf(stderr, "layer create_pipeline %d failed\n", (int)i);
}
}
}


+ 4
- 4
src/gpu.cpp View File

@@ -456,10 +456,10 @@ int create_gpu_instance()
gpu_info.memory_map_alignment = physicalDeviceProperties.limits.minMemoryMapAlignment;
gpu_info.buffer_offset_alignment = physicalDeviceProperties.limits.minStorageBufferOffsetAlignment;

// fprintf(stderr, "[%u] max_shared_memory_size = %d\n", i, gpu_info.max_shared_memory_size);
// fprintf(stderr, "[%u] max_workgroup_count = %d %d %d\n", i, gpu_info.max_workgroup_count[0], gpu_info.max_workgroup_count[1], gpu_info.max_workgroup_count[2]);
// fprintf(stderr, "[%u] max_workgroup_invocations = %d\n", i, gpu_info.max_workgroup_invocations);
// fprintf(stderr, "[%u] max_workgroup_size = %d %d %d\n", i, gpu_info.max_workgroup_size[0], gpu_info.max_workgroup_size[1], gpu_info.max_workgroup_size[2]);
// fprintf(stderr, "[%u] max_shared_memory_size = %u\n", i, gpu_info.max_shared_memory_size);
// fprintf(stderr, "[%u] max_workgroup_count = %u %u %u\n", i, gpu_info.max_workgroup_count[0], gpu_info.max_workgroup_count[1], gpu_info.max_workgroup_count[2]);
// fprintf(stderr, "[%u] max_workgroup_invocations = %u\n", i, gpu_info.max_workgroup_invocations);
// fprintf(stderr, "[%u] max_workgroup_size = %u %u %u\n", i, gpu_info.max_workgroup_size[0], gpu_info.max_workgroup_size[1], gpu_info.max_workgroup_size[2]);
// fprintf(stderr, "[%u] memory_map_alignment = %lu\n", i, gpu_info.memory_map_alignment);
// fprintf(stderr, "[%u] buffer_offset_alignment = %lu\n", i, gpu_info.buffer_offset_alignment);



+ 4
- 4
src/gpu.h View File

@@ -52,10 +52,10 @@ public:
int type;

// hardware capability
int max_shared_memory_size;
int max_workgroup_count[3];
int max_workgroup_invocations;
int max_workgroup_size[3];
uint32_t max_shared_memory_size;
uint32_t max_workgroup_count[3];
uint32_t max_workgroup_invocations;
uint32_t max_workgroup_size[3];
size_t memory_map_alignment;
size_t buffer_offset_alignment;



+ 1
- 1
src/mat.h View File

@@ -342,7 +342,7 @@ public:
};

// type for vulkan specialization constant and push constant
union vk_specialization_type { int i; float f; };
union vk_specialization_type { int i; float f; uint32_t u32; };
union vk_constant_type { int i; float f; };
#endif // NCNN_VULKAN



+ 15
- 8
src/net.cpp View File

@@ -659,9 +659,12 @@ int Net::load_model(FILE* fp)
{
Layer* layer = layers[i];

if (layer->support_vulkan)
int uret = layer->upload_model(cmd);
if (uret != 0)
{
layer->upload_model(cmd);
fprintf(stderr, "layer upload_model %d failed\n", (int)i);
ret = -1;
break;
}
}

@@ -674,9 +677,10 @@ int Net::load_model(FILE* fp)
{
Layer* layer = layers[i];

if (layer->support_vulkan)
int cret = layer->create_pipeline();
if (cret != 0)
{
layer->create_pipeline();
fprintf(stderr, "layer create_pipeline %d failed\n", (int)i);
}
}
}
@@ -877,9 +881,11 @@ int Net::load_model(const unsigned char* _mem)
{
Layer* layer = layers[i];

if (layer->support_vulkan)
int uret = layer->upload_model(cmd);
if (uret != 0)
{
layer->upload_model(cmd);
fprintf(stderr, "layer upload_model %d failed\n", (int)i);
return -1;
}
}

@@ -892,9 +898,10 @@ int Net::load_model(const unsigned char* _mem)
{
Layer* layer = layers[i];

if (layer->support_vulkan)
int cret = layer->create_pipeline();
if (cret != 0)
{
layer->create_pipeline();
fprintf(stderr, "layer create_pipeline %d failed\n", (int)i);
}
}
}


+ 16
- 16
src/pipeline.cpp View File

@@ -89,22 +89,22 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c)
if (c > 0)
{
local_size_z = vkdev->info.max_workgroup_size[2];
while (c < local_size_z)
while ((uint32_t)c < local_size_z)
{
local_size_z /= 2;
}
}
else
{
local_size_z = std::min(128, vkdev->info.max_workgroup_size[2]);
local_size_z = std::min((uint32_t)128, vkdev->info.max_workgroup_size[2]);
}

int max_local_size_xy = vkdev->info.max_workgroup_invocations / local_size_z;
uint32_t max_local_size_xy = vkdev->info.max_workgroup_invocations / local_size_z;

if (h == w || (h < 0 && w < 0))
{
int local_size_xy = sqrt(max_local_size_xy);
int local_size_xy_prefer = 128;
uint32_t local_size_xy = sqrt(max_local_size_xy);
uint32_t local_size_xy_prefer = 128;
while (local_size_xy < local_size_xy_prefer)
{
local_size_xy_prefer /= 2;
@@ -119,23 +119,23 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c)
float ps = h / (float)w;
float local_size_xy = sqrt(max_local_size_xy / ps);
local_size_y = local_size_xy * ps;
local_size_x = std::max((int)local_size_xy, 1);
local_size_x = std::max((uint32_t)local_size_xy, (uint32_t)1);
}
else
{
float ps = w / (float)h;
float local_size_xy = sqrt(max_local_size_xy / ps);
local_size_y = std::max((int)local_size_xy, 1);
local_size_y = std::max((uint32_t)local_size_xy, (uint32_t)1);
local_size_x = local_size_xy * ps;
}

int local_size_y_prefer = std::min(128, vkdev->info.max_workgroup_size[1]);
uint32_t local_size_y_prefer = std::min((uint32_t)128, vkdev->info.max_workgroup_size[1]);
while (local_size_y < local_size_y_prefer)
{
local_size_y_prefer /= 2;
}

int local_size_x_prefer = std::min(128, vkdev->info.max_workgroup_size[0]);
uint32_t local_size_x_prefer = std::min((uint32_t)128, vkdev->info.max_workgroup_size[0]);
while (local_size_x < local_size_x_prefer)
{
local_size_x_prefer /= 2;
@@ -147,23 +147,23 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c)
else if (h > 0)
{
local_size_y = std::min(max_local_size_xy, vkdev->info.max_workgroup_size[1]);
while (h < local_size_y)
while ((uint32_t)h < local_size_y)
{
local_size_y /= 2;
}

int max_local_size_x = max_local_size_xy / local_size_y;
uint32_t max_local_size_x = max_local_size_xy / local_size_y;
local_size_x = std::min(max_local_size_x, vkdev->info.max_workgroup_size[0]);
}
else if (w > 0)
{
local_size_x = std::min(max_local_size_xy, vkdev->info.max_workgroup_size[0]);
while (w < local_size_x)
while ((uint32_t)w < local_size_x)
{
local_size_x /= 2;
}

int max_local_size_y = max_local_size_xy / local_size_x;
uint32_t max_local_size_y = max_local_size_xy / local_size_x;
local_size_y = std::min(max_local_size_y, vkdev->info.max_workgroup_size[1]);
}

@@ -290,9 +290,9 @@ int Pipeline::create_pipeline(const char* name, const std::vector<vk_specializat
local_size_xyz_entries[2].size = sizeof(vk_specialization_type);

specialization_data.resize(specialization_count + 3);
specialization_data[ specialization_count+0 ].i = local_size_x;
specialization_data[ specialization_count+1 ].i = local_size_y;
specialization_data[ specialization_count+2 ].i = local_size_z;
specialization_data[ specialization_count+0 ].u32 = local_size_x;
specialization_data[ specialization_count+1 ].u32 = local_size_y;
specialization_data[ specialization_count+2 ].u32 = local_size_z;
}

VkSpecializationInfo specializationInfo;


+ 3
- 3
src/pipeline.h View File

@@ -58,9 +58,9 @@ public:

VkDescriptorUpdateTemplateKHR descriptor_update_template;

int local_size_x;
int local_size_y;
int local_size_z;
uint32_t local_size_x;
uint32_t local_size_y;
uint32_t local_size_z;
};
#endif // NCNN_VULKAN



Loading…
Cancel
Save