From 8a7b4b035e3345e254b4fc40b3953ce5d2aaa1e8 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 12 Sep 2019 19:49:17 +0800 Subject: [PATCH] radv crash with large local group size, workaround --- src/layer/vulkan/instancenorm_vulkan.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/layer/vulkan/instancenorm_vulkan.cpp b/src/layer/vulkan/instancenorm_vulkan.cpp index 6e88d2345..134b2786e 100644 --- a/src/layer/vulkan/instancenorm_vulkan.cpp +++ b/src/layer/vulkan/instancenorm_vulkan.cpp @@ -49,14 +49,14 @@ int InstanceNorm_vulkan::create_pipeline(const Option& opt) if (channels % 4 != 0) { pipeline_instancenorm_reduce_sum4_fp16_to_fp32 = new Pipeline(vkdev); - pipeline_instancenorm_reduce_sum4_fp16_to_fp32->set_optimal_local_size_xyz(64, 1, channels); + pipeline_instancenorm_reduce_sum4_fp16_to_fp32->set_optimal_local_size_xyz(16, 1, channels); pipeline_instancenorm_reduce_sum4_fp16_to_fp32->create("instancenorm_reduce_sum4_fp16_to_fp32", opt, std::vector(), 2, 6); pipeline_instancenorm_reduce_sum4_fp32[0] = new Pipeline(vkdev); - pipeline_instancenorm_reduce_sum4_fp32[0]->set_optimal_local_size_xyz(64, 1, channels); + pipeline_instancenorm_reduce_sum4_fp32[0]->set_optimal_local_size_xyz(16, 1, channels); pipeline_instancenorm_reduce_sum4_fp32[0]->create("instancenorm_reduce_sum4_fp32", opt, std::vector(), 2, 6); pipeline_instancenorm_reduce_sum4_fp32[1] = new Pipeline(vkdev); - pipeline_instancenorm_reduce_sum4_fp32[1]->set_optimal_local_size_xyz(64, 1, channels); + pipeline_instancenorm_reduce_sum4_fp32[1]->set_optimal_local_size_xyz(16, 1, channels); pipeline_instancenorm_reduce_sum4_fp32[1]->create("instancenorm_reduce_sum4_fp32", opt, std::vector(), 2, 6); pipeline_instancenorm_reduce_mean = new Pipeline(vkdev); @@ -80,14 +80,14 @@ int InstanceNorm_vulkan::create_pipeline(const Option& opt) if (channels % 4 == 0) { pipeline_instancenorm_reduce_sum4_fp16_to_fp32_pack4 = new Pipeline(vkdev); - pipeline_instancenorm_reduce_sum4_fp16_to_fp32_pack4->set_optimal_local_size_xyz(64, 1, std::max(1, channels / 4)); + pipeline_instancenorm_reduce_sum4_fp16_to_fp32_pack4->set_optimal_local_size_xyz(16, 1, std::max(1, channels / 4)); pipeline_instancenorm_reduce_sum4_fp16_to_fp32_pack4->create("instancenorm_reduce_sum4_fp16_to_fp32_pack4", opt, std::vector(), 2, 6); pipeline_instancenorm_reduce_sum4_fp32_pack4[0] = new Pipeline(vkdev); - pipeline_instancenorm_reduce_sum4_fp32_pack4[0]->set_optimal_local_size_xyz(64, 1, std::max(1, channels / 4)); + pipeline_instancenorm_reduce_sum4_fp32_pack4[0]->set_optimal_local_size_xyz(16, 1, std::max(1, channels / 4)); pipeline_instancenorm_reduce_sum4_fp32_pack4[0]->create("instancenorm_reduce_sum4_fp32_pack4", opt, std::vector(), 2, 6); pipeline_instancenorm_reduce_sum4_fp32_pack4[1] = new Pipeline(vkdev); - pipeline_instancenorm_reduce_sum4_fp32_pack4[1]->set_optimal_local_size_xyz(64, 1, std::max(1, channels / 4)); + pipeline_instancenorm_reduce_sum4_fp32_pack4[1]->set_optimal_local_size_xyz(16, 1, std::max(1, channels / 4)); pipeline_instancenorm_reduce_sum4_fp32_pack4[1]->create("instancenorm_reduce_sum4_fp32_pack4", opt, std::vector(), 2, 6); pipeline_instancenorm_reduce_mean_pack4 = new Pipeline(vkdev);