From 1f4bdd91b52dc49fb71cec4b4e35fc62ca51a61c Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 11 Mar 2019 18:28:24 +0800 Subject: [PATCH] uint32_t typed workgroup size --- benchmark/benchncnn.cpp | 16 ++++++++-------- src/gpu.cpp | 8 ++++---- src/gpu.h | 8 ++++---- src/mat.h | 2 +- src/net.cpp | 23 +++++++++++++++-------- src/pipeline.cpp | 32 ++++++++++++++++---------------- src/pipeline.h | 6 +++--- 7 files changed, 51 insertions(+), 44 deletions(-) diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp index 454e086f9..745e7d412 100644 --- a/benchmark/benchncnn.cpp +++ b/benchmark/benchncnn.cpp @@ -91,11 +91,12 @@ public: for (size_t i=0; isupport_vulkan) + int uret = layers[i]->upload_model(cmd); + if (uret != 0) { - layer->upload_model(cmd); + fprintf(stderr, "layer upload_model %d failed\n", (int)i); + ret = -1; + break; } } @@ -106,11 +107,10 @@ public: #pragma omp parallel for for (int i=0; isupport_vulkan) + int cret = layers[i]->create_pipeline(); + if (cret != 0) { - layer->create_pipeline(); + fprintf(stderr, "layer create_pipeline %d failed\n", (int)i); } } } diff --git a/src/gpu.cpp b/src/gpu.cpp index 4188c2496..6dbed566c 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -456,10 +456,10 @@ int create_gpu_instance() gpu_info.memory_map_alignment = physicalDeviceProperties.limits.minMemoryMapAlignment; gpu_info.buffer_offset_alignment = physicalDeviceProperties.limits.minStorageBufferOffsetAlignment; -// fprintf(stderr, "[%u] max_shared_memory_size = %d\n", i, gpu_info.max_shared_memory_size); -// fprintf(stderr, "[%u] max_workgroup_count = %d %d %d\n", i, gpu_info.max_workgroup_count[0], gpu_info.max_workgroup_count[1], gpu_info.max_workgroup_count[2]); -// fprintf(stderr, "[%u] max_workgroup_invocations = %d\n", i, gpu_info.max_workgroup_invocations); -// fprintf(stderr, "[%u] max_workgroup_size = %d %d %d\n", i, gpu_info.max_workgroup_size[0], gpu_info.max_workgroup_size[1], gpu_info.max_workgroup_size[2]); +// fprintf(stderr, "[%u] max_shared_memory_size = %u\n", i, gpu_info.max_shared_memory_size); +// fprintf(stderr, "[%u] max_workgroup_count = %u %u %u\n", i, gpu_info.max_workgroup_count[0], gpu_info.max_workgroup_count[1], gpu_info.max_workgroup_count[2]); +// fprintf(stderr, "[%u] max_workgroup_invocations = %u\n", i, gpu_info.max_workgroup_invocations); +// fprintf(stderr, "[%u] max_workgroup_size = %u %u %u\n", i, gpu_info.max_workgroup_size[0], gpu_info.max_workgroup_size[1], gpu_info.max_workgroup_size[2]); // fprintf(stderr, "[%u] memory_map_alignment = %lu\n", i, gpu_info.memory_map_alignment); // fprintf(stderr, "[%u] buffer_offset_alignment = %lu\n", i, gpu_info.buffer_offset_alignment); diff --git a/src/gpu.h b/src/gpu.h index 514682559..3535a94b7 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -52,10 +52,10 @@ public: int type; // hardware capability - int max_shared_memory_size; - int max_workgroup_count[3]; - int max_workgroup_invocations; - int max_workgroup_size[3]; + uint32_t max_shared_memory_size; + uint32_t max_workgroup_count[3]; + uint32_t max_workgroup_invocations; + uint32_t max_workgroup_size[3]; size_t memory_map_alignment; size_t buffer_offset_alignment; diff --git a/src/mat.h b/src/mat.h index dd80f74a8..f9b2e2188 100644 --- a/src/mat.h +++ b/src/mat.h @@ -342,7 +342,7 @@ public: }; // type for vulkan specialization constant and push constant -union vk_specialization_type { int i; float f; }; +union vk_specialization_type { int i; float f; uint32_t u32; }; union vk_constant_type { int i; float f; }; #endif // NCNN_VULKAN diff --git a/src/net.cpp b/src/net.cpp index 0c129d12e..278638fcf 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -659,9 +659,12 @@ int Net::load_model(FILE* fp) { Layer* layer = layers[i]; - if (layer->support_vulkan) + int uret = layer->upload_model(cmd); + if (uret != 0) { - layer->upload_model(cmd); + fprintf(stderr, "layer upload_model %d failed\n", (int)i); + ret = -1; + break; } } @@ -674,9 +677,10 @@ int Net::load_model(FILE* fp) { Layer* layer = layers[i]; - if (layer->support_vulkan) + int cret = layer->create_pipeline(); + if (cret != 0) { - layer->create_pipeline(); + fprintf(stderr, "layer create_pipeline %d failed\n", (int)i); } } } @@ -877,9 +881,11 @@ int Net::load_model(const unsigned char* _mem) { Layer* layer = layers[i]; - if (layer->support_vulkan) + int uret = layer->upload_model(cmd); + if (uret != 0) { - layer->upload_model(cmd); + fprintf(stderr, "layer upload_model %d failed\n", (int)i); + return -1; } } @@ -892,9 +898,10 @@ int Net::load_model(const unsigned char* _mem) { Layer* layer = layers[i]; - if (layer->support_vulkan) + int cret = layer->create_pipeline(); + if (cret != 0) { - layer->create_pipeline(); + fprintf(stderr, "layer create_pipeline %d failed\n", (int)i); } } } diff --git a/src/pipeline.cpp b/src/pipeline.cpp index af073e1ba..7fdc548ee 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -89,22 +89,22 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c) if (c > 0) { local_size_z = vkdev->info.max_workgroup_size[2]; - while (c < local_size_z) + while ((uint32_t)c < local_size_z) { local_size_z /= 2; } } else { - local_size_z = std::min(128, vkdev->info.max_workgroup_size[2]); + local_size_z = std::min((uint32_t)128, vkdev->info.max_workgroup_size[2]); } - int max_local_size_xy = vkdev->info.max_workgroup_invocations / local_size_z; + uint32_t max_local_size_xy = vkdev->info.max_workgroup_invocations / local_size_z; if (h == w || (h < 0 && w < 0)) { - int local_size_xy = sqrt(max_local_size_xy); - int local_size_xy_prefer = 128; + uint32_t local_size_xy = sqrt(max_local_size_xy); + uint32_t local_size_xy_prefer = 128; while (local_size_xy < local_size_xy_prefer) { local_size_xy_prefer /= 2; @@ -119,23 +119,23 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c) float ps = h / (float)w; float local_size_xy = sqrt(max_local_size_xy / ps); local_size_y = local_size_xy * ps; - local_size_x = std::max((int)local_size_xy, 1); + local_size_x = std::max((uint32_t)local_size_xy, (uint32_t)1); } else { float ps = w / (float)h; float local_size_xy = sqrt(max_local_size_xy / ps); - local_size_y = std::max((int)local_size_xy, 1); + local_size_y = std::max((uint32_t)local_size_xy, (uint32_t)1); local_size_x = local_size_xy * ps; } - int local_size_y_prefer = std::min(128, vkdev->info.max_workgroup_size[1]); + uint32_t local_size_y_prefer = std::min((uint32_t)128, vkdev->info.max_workgroup_size[1]); while (local_size_y < local_size_y_prefer) { local_size_y_prefer /= 2; } - int local_size_x_prefer = std::min(128, vkdev->info.max_workgroup_size[0]); + uint32_t local_size_x_prefer = std::min((uint32_t)128, vkdev->info.max_workgroup_size[0]); while (local_size_x < local_size_x_prefer) { local_size_x_prefer /= 2; @@ -147,23 +147,23 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c) else if (h > 0) { local_size_y = std::min(max_local_size_xy, vkdev->info.max_workgroup_size[1]); - while (h < local_size_y) + while ((uint32_t)h < local_size_y) { local_size_y /= 2; } - int max_local_size_x = max_local_size_xy / local_size_y; + uint32_t max_local_size_x = max_local_size_xy / local_size_y; local_size_x = std::min(max_local_size_x, vkdev->info.max_workgroup_size[0]); } else if (w > 0) { local_size_x = std::min(max_local_size_xy, vkdev->info.max_workgroup_size[0]); - while (w < local_size_x) + while ((uint32_t)w < local_size_x) { local_size_x /= 2; } - int max_local_size_y = max_local_size_xy / local_size_x; + uint32_t max_local_size_y = max_local_size_xy / local_size_x; local_size_y = std::min(max_local_size_y, vkdev->info.max_workgroup_size[1]); } @@ -290,9 +290,9 @@ int Pipeline::create_pipeline(const char* name, const std::vector