| @@ -91,11 +91,12 @@ public: | |||
| for (size_t i=0; i<layers.size(); i++) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->support_vulkan) | |||
| int uret = layers[i]->upload_model(cmd); | |||
| if (uret != 0) | |||
| { | |||
| layer->upload_model(cmd); | |||
| fprintf(stderr, "layer upload_model %d failed\n", (int)i); | |||
| ret = -1; | |||
| break; | |||
| } | |||
| } | |||
| @@ -106,11 +107,10 @@ public: | |||
| #pragma omp parallel for | |||
| for (int i=0; i<layers.size(); i++) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->support_vulkan) | |||
| int cret = layers[i]->create_pipeline(); | |||
| if (cret != 0) | |||
| { | |||
| layer->create_pipeline(); | |||
| fprintf(stderr, "layer create_pipeline %d failed\n", (int)i); | |||
| } | |||
| } | |||
| } | |||
| @@ -456,10 +456,10 @@ int create_gpu_instance() | |||
| gpu_info.memory_map_alignment = physicalDeviceProperties.limits.minMemoryMapAlignment; | |||
| gpu_info.buffer_offset_alignment = physicalDeviceProperties.limits.minStorageBufferOffsetAlignment; | |||
| // fprintf(stderr, "[%u] max_shared_memory_size = %d\n", i, gpu_info.max_shared_memory_size); | |||
| // fprintf(stderr, "[%u] max_workgroup_count = %d %d %d\n", i, gpu_info.max_workgroup_count[0], gpu_info.max_workgroup_count[1], gpu_info.max_workgroup_count[2]); | |||
| // fprintf(stderr, "[%u] max_workgroup_invocations = %d\n", i, gpu_info.max_workgroup_invocations); | |||
| // fprintf(stderr, "[%u] max_workgroup_size = %d %d %d\n", i, gpu_info.max_workgroup_size[0], gpu_info.max_workgroup_size[1], gpu_info.max_workgroup_size[2]); | |||
| // fprintf(stderr, "[%u] max_shared_memory_size = %u\n", i, gpu_info.max_shared_memory_size); | |||
| // fprintf(stderr, "[%u] max_workgroup_count = %u %u %u\n", i, gpu_info.max_workgroup_count[0], gpu_info.max_workgroup_count[1], gpu_info.max_workgroup_count[2]); | |||
| // fprintf(stderr, "[%u] max_workgroup_invocations = %u\n", i, gpu_info.max_workgroup_invocations); | |||
| // fprintf(stderr, "[%u] max_workgroup_size = %u %u %u\n", i, gpu_info.max_workgroup_size[0], gpu_info.max_workgroup_size[1], gpu_info.max_workgroup_size[2]); | |||
| // fprintf(stderr, "[%u] memory_map_alignment = %lu\n", i, gpu_info.memory_map_alignment); | |||
| // fprintf(stderr, "[%u] buffer_offset_alignment = %lu\n", i, gpu_info.buffer_offset_alignment); | |||
| @@ -52,10 +52,10 @@ public: | |||
| int type; | |||
| // hardware capability | |||
| int max_shared_memory_size; | |||
| int max_workgroup_count[3]; | |||
| int max_workgroup_invocations; | |||
| int max_workgroup_size[3]; | |||
| uint32_t max_shared_memory_size; | |||
| uint32_t max_workgroup_count[3]; | |||
| uint32_t max_workgroup_invocations; | |||
| uint32_t max_workgroup_size[3]; | |||
| size_t memory_map_alignment; | |||
| size_t buffer_offset_alignment; | |||
| @@ -342,7 +342,7 @@ public: | |||
| }; | |||
| // type for vulkan specialization constant and push constant | |||
| union vk_specialization_type { int i; float f; }; | |||
| union vk_specialization_type { int i; float f; uint32_t u32; }; | |||
| union vk_constant_type { int i; float f; }; | |||
| #endif // NCNN_VULKAN | |||
| @@ -659,9 +659,12 @@ int Net::load_model(FILE* fp) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->support_vulkan) | |||
| int uret = layer->upload_model(cmd); | |||
| if (uret != 0) | |||
| { | |||
| layer->upload_model(cmd); | |||
| fprintf(stderr, "layer upload_model %d failed\n", (int)i); | |||
| ret = -1; | |||
| break; | |||
| } | |||
| } | |||
| @@ -674,9 +677,10 @@ int Net::load_model(FILE* fp) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->support_vulkan) | |||
| int cret = layer->create_pipeline(); | |||
| if (cret != 0) | |||
| { | |||
| layer->create_pipeline(); | |||
| fprintf(stderr, "layer create_pipeline %d failed\n", (int)i); | |||
| } | |||
| } | |||
| } | |||
| @@ -877,9 +881,11 @@ int Net::load_model(const unsigned char* _mem) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->support_vulkan) | |||
| int uret = layer->upload_model(cmd); | |||
| if (uret != 0) | |||
| { | |||
| layer->upload_model(cmd); | |||
| fprintf(stderr, "layer upload_model %d failed\n", (int)i); | |||
| return -1; | |||
| } | |||
| } | |||
| @@ -892,9 +898,10 @@ int Net::load_model(const unsigned char* _mem) | |||
| { | |||
| Layer* layer = layers[i]; | |||
| if (layer->support_vulkan) | |||
| int cret = layer->create_pipeline(); | |||
| if (cret != 0) | |||
| { | |||
| layer->create_pipeline(); | |||
| fprintf(stderr, "layer create_pipeline %d failed\n", (int)i); | |||
| } | |||
| } | |||
| } | |||
| @@ -89,22 +89,22 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c) | |||
| if (c > 0) | |||
| { | |||
| local_size_z = vkdev->info.max_workgroup_size[2]; | |||
| while (c < local_size_z) | |||
| while ((uint32_t)c < local_size_z) | |||
| { | |||
| local_size_z /= 2; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| local_size_z = std::min(128, vkdev->info.max_workgroup_size[2]); | |||
| local_size_z = std::min((uint32_t)128, vkdev->info.max_workgroup_size[2]); | |||
| } | |||
| int max_local_size_xy = vkdev->info.max_workgroup_invocations / local_size_z; | |||
| uint32_t max_local_size_xy = vkdev->info.max_workgroup_invocations / local_size_z; | |||
| if (h == w || (h < 0 && w < 0)) | |||
| { | |||
| int local_size_xy = sqrt(max_local_size_xy); | |||
| int local_size_xy_prefer = 128; | |||
| uint32_t local_size_xy = sqrt(max_local_size_xy); | |||
| uint32_t local_size_xy_prefer = 128; | |||
| while (local_size_xy < local_size_xy_prefer) | |||
| { | |||
| local_size_xy_prefer /= 2; | |||
| @@ -119,23 +119,23 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c) | |||
| float ps = h / (float)w; | |||
| float local_size_xy = sqrt(max_local_size_xy / ps); | |||
| local_size_y = local_size_xy * ps; | |||
| local_size_x = std::max((int)local_size_xy, 1); | |||
| local_size_x = std::max((uint32_t)local_size_xy, (uint32_t)1); | |||
| } | |||
| else | |||
| { | |||
| float ps = w / (float)h; | |||
| float local_size_xy = sqrt(max_local_size_xy / ps); | |||
| local_size_y = std::max((int)local_size_xy, 1); | |||
| local_size_y = std::max((uint32_t)local_size_xy, (uint32_t)1); | |||
| local_size_x = local_size_xy * ps; | |||
| } | |||
| int local_size_y_prefer = std::min(128, vkdev->info.max_workgroup_size[1]); | |||
| uint32_t local_size_y_prefer = std::min((uint32_t)128, vkdev->info.max_workgroup_size[1]); | |||
| while (local_size_y < local_size_y_prefer) | |||
| { | |||
| local_size_y_prefer /= 2; | |||
| } | |||
| int local_size_x_prefer = std::min(128, vkdev->info.max_workgroup_size[0]); | |||
| uint32_t local_size_x_prefer = std::min((uint32_t)128, vkdev->info.max_workgroup_size[0]); | |||
| while (local_size_x < local_size_x_prefer) | |||
| { | |||
| local_size_x_prefer /= 2; | |||
| @@ -147,23 +147,23 @@ void Pipeline::set_optimal_local_size_xyz(int w, int h, int c) | |||
| else if (h > 0) | |||
| { | |||
| local_size_y = std::min(max_local_size_xy, vkdev->info.max_workgroup_size[1]); | |||
| while (h < local_size_y) | |||
| while ((uint32_t)h < local_size_y) | |||
| { | |||
| local_size_y /= 2; | |||
| } | |||
| int max_local_size_x = max_local_size_xy / local_size_y; | |||
| uint32_t max_local_size_x = max_local_size_xy / local_size_y; | |||
| local_size_x = std::min(max_local_size_x, vkdev->info.max_workgroup_size[0]); | |||
| } | |||
| else if (w > 0) | |||
| { | |||
| local_size_x = std::min(max_local_size_xy, vkdev->info.max_workgroup_size[0]); | |||
| while (w < local_size_x) | |||
| while ((uint32_t)w < local_size_x) | |||
| { | |||
| local_size_x /= 2; | |||
| } | |||
| int max_local_size_y = max_local_size_xy / local_size_x; | |||
| uint32_t max_local_size_y = max_local_size_xy / local_size_x; | |||
| local_size_y = std::min(max_local_size_y, vkdev->info.max_workgroup_size[1]); | |||
| } | |||
| @@ -290,9 +290,9 @@ int Pipeline::create_pipeline(const char* name, const std::vector<vk_specializat | |||
| local_size_xyz_entries[2].size = sizeof(vk_specialization_type); | |||
| specialization_data.resize(specialization_count + 3); | |||
| specialization_data[ specialization_count+0 ].i = local_size_x; | |||
| specialization_data[ specialization_count+1 ].i = local_size_y; | |||
| specialization_data[ specialization_count+2 ].i = local_size_z; | |||
| specialization_data[ specialization_count+0 ].u32 = local_size_x; | |||
| specialization_data[ specialization_count+1 ].u32 = local_size_y; | |||
| specialization_data[ specialization_count+2 ].u32 = local_size_z; | |||
| } | |||
| VkSpecializationInfo specializationInfo; | |||
| @@ -58,9 +58,9 @@ public: | |||
| VkDescriptorUpdateTemplateKHR descriptor_update_template; | |||
| int local_size_x; | |||
| int local_size_y; | |||
| int local_size_z; | |||
| uint32_t local_size_x; | |||
| uint32_t local_size_y; | |||
| uint32_t local_size_z; | |||
| }; | |||
| #endif // NCNN_VULKAN | |||