// Tencent is pleased to support the open source community by making ncnn available. // // Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved. // // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except // in compliance with the License. You may obtain a copy of the License at // // https://opensource.org/licenses/BSD-3-Clause // // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. #include "gpu.h" #if NCNN_VULKAN #include #include #include #include #include #include #include "mat.h" #if __ANDROID__ #define ENABLE_VALIDATION_LAYER 0 #else #define ENABLE_VALIDATION_LAYER 0 #endif namespace ncnn { // global static VkInstance g_instance = 0; static int g_gpu_count = 0; static int g_default_gpu_index = -1; // NOTE 8 is large enough i think ... static GpuInfo g_gpu_infos[8]; #if ENABLE_VALIDATION_LAYER static VkDebugUtilsMessengerEXT callback; static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback( VkDebugUtilsMessageSeverityFlagBitsEXT /*messageSeverity*/, VkDebugUtilsMessageTypeFlagsEXT /*messageType*/, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* /*pUserData*/) { fprintf(stderr, "validation layer: %s\n", pCallbackData->pMessage); return VK_FALSE; } VkResult CreateDebugUtilsMessengerEXT(VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDebugUtilsMessengerEXT* pCallback) { PFN_vkCreateDebugUtilsMessengerEXT func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); if (func) return func(instance, pCreateInfo, pAllocator, pCallback); return VK_ERROR_EXTENSION_NOT_PRESENT; } void DestroyDebugUtilsMessengerEXT(VkInstance instance, VkDebugUtilsMessengerEXT callback, const VkAllocationCallbacks* pAllocator) { PFN_vkDestroyDebugUtilsMessengerEXT func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr(instance, "vkDestroyDebugUtilsMessengerEXT"); if (func) func(instance, callback, pAllocator); } #endif // ENABLE_VALIDATION_LAYER static uint32_t find_device_compute_queue(const std::vector& queueFamilyProperties) { // first try, compute only queue for (uint32_t i=0; i& queueFamilyProperties) { // first try, transfer only queue for (uint32_t i=0; i 0) return 0; fprintf(stderr, "no vulkan device\n"); return -1; } int create_gpu_instance() { VkResult ret; std::vector enabledLayers; #if ENABLE_VALIDATION_LAYER uint32_t instanceLayerPropertyCount; ret = vkEnumerateInstanceLayerProperties(&instanceLayerPropertyCount, NULL); if (ret != VK_SUCCESS) { fprintf(stderr, "vkEnumerateInstanceLayerProperties failed %d\n", ret); return -1; } std::vector instanceLayerProperties(instanceLayerPropertyCount); ret = vkEnumerateInstanceLayerProperties(&instanceLayerPropertyCount, instanceLayerProperties.data()); if (ret != VK_SUCCESS) { fprintf(stderr, "vkEnumerateInstanceLayerProperties failed %d\n", ret); return -1; } for (uint32_t i=0; i enabledExtensions; uint32_t instanceExtensionPropertyCount; ret = vkEnumerateInstanceExtensionProperties(NULL, &instanceExtensionPropertyCount, NULL); if (ret != VK_SUCCESS) { fprintf(stderr, "vkEnumerateInstanceExtensionProperties failed %d\n", ret); return -1; } std::vector instanceExtensionProperties(instanceExtensionPropertyCount); ret = vkEnumerateInstanceExtensionProperties(NULL, &instanceExtensionPropertyCount, instanceExtensionProperties.data()); if (ret != VK_SUCCESS) { fprintf(stderr, "vkEnumerateInstanceExtensionProperties failed %d\n", ret); return -1; } for (uint32_t j=0; j 8) physicalDeviceCount = 8; std::vector physicalDevices(physicalDeviceCount); ret = vkEnumeratePhysicalDevices(g_instance, &physicalDeviceCount, physicalDevices.data()); if (ret != VK_SUCCESS) { fprintf(stderr, "vkEnumeratePhysicalDevices failed %d\n", ret); return -1; } g_gpu_count = physicalDeviceCount; // find proper device and queue for (uint32_t i=0; i queueFamilyProperties(queueFamilyPropertiesCount); vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, queueFamilyProperties.data()); gpu_info.compute_queue_index = find_device_compute_queue(queueFamilyProperties); gpu_info.transfer_queue_index = find_device_transfer_queue(queueFamilyProperties); // find memory type index VkPhysicalDeviceMemoryProperties physicalDeviceMemoryProperties; vkGetPhysicalDeviceMemoryProperties(physicalDevice, &physicalDeviceMemoryProperties); // // print memory info // for (uint32_t j=0; j deviceExtensionProperties(deviceExtensionPropertyCount); ret = vkEnumerateDeviceExtensionProperties(physicalDevice, NULL, &deviceExtensionPropertyCount, deviceExtensionProperties.data()); if (ret != VK_SUCCESS) { fprintf(stderr, "vkEnumerateDeviceExtensionProperties failed %d\n", ret); return -1; } // extension capability gpu_info.support_VK_KHR_8bit_storage = 0; gpu_info.support_VK_KHR_16bit_storage = 0; gpu_info.support_VK_KHR_bind_memory2 = 0; gpu_info.support_VK_KHR_dedicated_allocation = 0; gpu_info.support_VK_KHR_descriptor_update_template = 0; gpu_info.support_VK_KHR_get_memory_requirements2 = 0; gpu_info.support_VK_KHR_get_physical_device_properties2 = 0; gpu_info.support_VK_KHR_push_descriptor = 0; gpu_info.support_VK_KHR_shader_float16_int8 = 0; gpu_info.support_VK_KHR_shader_float_controls = 0; gpu_info.support_VK_KHR_storage_buffer_storage_class = 0; for (uint32_t j=0; j enabledExtensions; if (info.support_VK_KHR_8bit_storage) enabledExtensions.push_back("VK_KHR_8bit_storage"); if (info.support_VK_KHR_16bit_storage) enabledExtensions.push_back("VK_KHR_16bit_storage"); if (info.support_VK_KHR_bind_memory2) enabledExtensions.push_back("VK_KHR_bind_memory2"); if (info.support_VK_KHR_dedicated_allocation) enabledExtensions.push_back("VK_KHR_dedicated_allocation"); if (info.support_VK_KHR_descriptor_update_template) enabledExtensions.push_back("VK_KHR_descriptor_update_template"); if (info.support_VK_KHR_get_memory_requirements2) enabledExtensions.push_back("VK_KHR_get_memory_requirements2"); if (info.support_VK_KHR_get_physical_device_properties2) enabledExtensions.push_back("VK_KHR_get_physical_device_properties2"); if (info.support_VK_KHR_push_descriptor) enabledExtensions.push_back("VK_KHR_push_descriptor"); if (info.support_VK_KHR_shader_float16_int8) enabledExtensions.push_back("VK_KHR_shader_float16_int8"); if (info.support_VK_KHR_shader_float_controls) enabledExtensions.push_back("VK_KHR_shader_float_controls"); if (info.support_VK_KHR_storage_buffer_storage_class) enabledExtensions.push_back("VK_KHR_storage_buffer_storage_class"); VkDeviceQueueCreateInfo deviceQueueCreateInfos[2]; deviceQueueCreateInfos[0].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; deviceQueueCreateInfos[0].pNext = 0; deviceQueueCreateInfos[0].flags = 0; deviceQueueCreateInfos[0].queueFamilyIndex = info.compute_queue_index; deviceQueueCreateInfos[0].queueCount = 1; deviceQueueCreateInfos[0].pQueuePriorities = queuePriorities; deviceQueueCreateInfos[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; deviceQueueCreateInfos[1].pNext = 0; deviceQueueCreateInfos[1].flags = 0; deviceQueueCreateInfos[1].queueFamilyIndex = info.transfer_queue_index; deviceQueueCreateInfos[1].queueCount = 1; deviceQueueCreateInfos[1].pQueuePriorities = queuePriorities; VkDeviceCreateInfo deviceCreateInfo; deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; deviceCreateInfo.pNext = 0; deviceCreateInfo.flags = 0; if (info.compute_queue_index == info.transfer_queue_index) { deviceCreateInfo.queueCreateInfoCount = 1; } else { deviceCreateInfo.queueCreateInfoCount = 2; } deviceCreateInfo.pQueueCreateInfos = deviceQueueCreateInfos; deviceCreateInfo.enabledLayerCount = 0; deviceCreateInfo.ppEnabledLayerNames = 0; deviceCreateInfo.enabledExtensionCount = enabledExtensions.size(); deviceCreateInfo.ppEnabledExtensionNames = enabledExtensions.data(); deviceCreateInfo.pEnabledFeatures = 0;// VkPhysicalDeviceFeatures pointer VkResult ret = vkCreateDevice(info.physical_device, &deviceCreateInfo, 0, &device); if (ret != VK_SUCCESS) { fprintf(stderr, "vkCreateDevice failed %d\n", ret); } init_device_extension(); create_shader_module(); blob_buffer_allocator = new VkBlobBufferAllocator(this); staging_buffer_allocator = new VkStagingBufferAllocator(this); } VulkanDevice::~VulkanDevice() { delete blob_buffer_allocator; delete staging_buffer_allocator; destroy_shader_module(); vkDestroyDevice(device, 0); } VkShaderModule VulkanDevice::get_shader_module(const char* name) const { for (int i=0; i