From 5e0513d9bd39b0ae94a13f9b849a8f6a99cf3af9 Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Fri, 2 Jan 2026 17:33:08 +0100 Subject: [PATCH 1/8] vulkan: use VkPhysicalDeviceSubgroupProperties Instead of VkPhysicalDeviceVulkan11Properties, that was added in Vulkan 1.2. "The members of VkPhysicalDeviceVulkan11Properties have the same values as the corresponding members of ... VkPhysicalDeviceSubgroupProperties ..." --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index deed5055..ec6611c2 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4681,26 +4681,26 @@ static vk_device ggml_vk_get_device(size_t idx) { } device->float_controls_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16; - device->subgroup_basic = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eBasic); - device->subgroup_arithmetic = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eArithmetic); + device->subgroup_basic = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eBasic); + device->subgroup_arithmetic = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eArithmetic); #ifdef __APPLE__ // Workaround for subgroup arithmetic failing on MoltenVK with AMD GPUs (issue 15846) if (device->vendor_id == VK_VENDOR_ID_AMD) { device->subgroup_arithmetic = false; } #endif - device->subgroup_shuffle = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eShuffle); - device->subgroup_clustered = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eClustered); + device->subgroup_shuffle = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eShuffle); + device->subgroup_clustered = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eClustered); - device->subgroup_ballot = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eBallot); + device->subgroup_ballot = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eBallot); - device->subgroup_vote = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eVote); + device->subgroup_vote = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eVote); const bool force_disable_f16 = getenv("GGML_VK_DISABLE_F16") != nullptr; From 76f8a0201a0e14cbb93cefe582bae90308d4e955 Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Wed, 21 Jan 2026 06:02:38 +0100 Subject: [PATCH 2/8] vulkan: use VkPhysicalDeviceFloatControlsProperties on Vulkan 1.1 "The members of VkPhysicalDeviceVulkan12Properties must have the same values as the corresponding members of ... VkPhysicalDeviceFloatControlsProperties ..." --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index ec6611c2..49b268ea 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4483,6 +4483,9 @@ static vk_device ggml_vk_get_device(size_t idx) { device->physical_device = physical_devices[dev_num]; const std::vector ext_props = device->physical_device.enumerateDeviceExtensionProperties(); + vk::PhysicalDeviceProperties device_props = device->physical_device.getProperties(); + const bool device_is_vulkan_12 = device_props.apiVersion >= VK_API_VERSION_1_2; + device->architecture = get_device_architecture(device->physical_device); const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY"); @@ -4509,6 +4512,7 @@ static vk_device ggml_vk_get_device(size_t idx) { device->integer_dot_product = false; device->shader_64b_indexing = false; bool bfloat16_support = false; + bool shader_float_controls_khr = false; for (const auto& properties : ext_props) { if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) { @@ -4559,6 +4563,8 @@ static vk_device ggml_vk_get_device(size_t idx) { } else if (strcmp("VK_EXT_shader_64bit_indexing", properties.extensionName) == 0) { device->shader_64b_indexing = true; #endif + } else if (strcmp("VK_KHR_shader_float_controls", properties.extensionName) == 0) { + shader_float_controls_khr = true; } } @@ -4571,6 +4577,7 @@ static vk_device ggml_vk_get_device(size_t idx) { vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props; vk::PhysicalDeviceVulkan11Properties vk11_props; vk::PhysicalDeviceVulkan12Properties vk12_props; + vk::PhysicalDeviceFloatControlsProperties float_controls_props; vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props; vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR shader_integer_dot_product_props; vk::PhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_props; @@ -4578,10 +4585,21 @@ static vk_device ggml_vk_get_device(size_t idx) { props2.pNext = &props3; props3.pNext = &subgroup_props; subgroup_props.pNext = &driver_props; - driver_props.pNext = &vk11_props; - vk11_props.pNext = &vk12_props; - VkBaseOutStructure * last_struct = (VkBaseOutStructure *)&vk12_props; + VkBaseOutStructure * last_struct; + + if (device_is_vulkan_12) { + driver_props.pNext = &vk11_props; + vk11_props.pNext = &vk12_props; + last_struct = (VkBaseOutStructure *)&vk12_props; + } else { + if (shader_float_controls_khr) { + driver_props.pNext = &float_controls_props; + last_struct = (VkBaseOutStructure *)&float_controls_props; + } else { + last_struct = (VkBaseOutStructure *)&driver_props; + } + } if (maintenance4_support) { last_struct->pNext = (VkBaseOutStructure *)&props4; @@ -4679,7 +4697,11 @@ static vk_device ggml_vk_get_device(size_t idx) { } else { device->shader_core_count = 0; } - device->float_controls_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16; + if (device_is_vulkan_12) { + device->float_controls_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16; + } else { + device->float_controls_rte_fp16 = shader_float_controls_khr ? float_controls_props.shaderRoundingModeRTEFloat16 : false; + } device->subgroup_basic = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eBasic); From e18bd369b92e4a405914c8e70043f3a3db233a8d Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Wed, 21 Jan 2026 06:25:42 +0100 Subject: [PATCH 3/8] vulkan: use individual features structures on Vulkan 1.1 --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 167 ++++++++++++++++++++++++--- 1 file changed, 150 insertions(+), 17 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 49b268ea..a7a3f208 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -4502,6 +4502,7 @@ static vk_device ggml_vk_get_device(size_t idx) { bool fp16_storage = false; bool fp16_compute = false; + bool int8_storage_khr = false; bool maintenance4_support = false; bool sm_builtins = false; bool amd_shader_core_properties2 = false; @@ -4512,13 +4513,19 @@ static vk_device ggml_vk_get_device(size_t idx) { device->integer_dot_product = false; device->shader_64b_indexing = false; bool bfloat16_support = false; + bool buffer_device_address_khr = false; + bool timeline_semaphore_khr = false; + bool vulkan_memory_model_khr = false; bool shader_float_controls_khr = false; + bool descriptor_indexing_ext = false; for (const auto& properties : ext_props) { if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) { maintenance4_support = true; } else if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) { fp16_storage = true; + } else if (strcmp("VK_KHR_8bit_storage", properties.extensionName) == 0) { + int8_storage_khr = true; } else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) { fp16_compute = true; } else if (strcmp("VK_NV_shader_sm_builtins", properties.extensionName) == 0) { @@ -4563,11 +4570,27 @@ static vk_device ggml_vk_get_device(size_t idx) { } else if (strcmp("VK_EXT_shader_64bit_indexing", properties.extensionName) == 0) { device->shader_64b_indexing = true; #endif + } else if (strcmp("VK_KHR_buffer_device_address", properties.extensionName) == 0) { + buffer_device_address_khr = true; + } else if (strcmp("VK_KHR_timeline_semaphore", properties.extensionName) == 0) { + timeline_semaphore_khr = true; + } else if (strcmp("VK_KHR_vulkan_memory_model", properties.extensionName) == 0) { + vulkan_memory_model_khr = true; } else if (strcmp("VK_KHR_shader_float_controls", properties.extensionName) == 0) { shader_float_controls_khr = true; + } else if (strcmp("VK_EXT_descriptor_indexing", properties.extensionName) == 0) { + descriptor_indexing_ext = true; } } + if (!device_is_vulkan_12 && !timeline_semaphore_khr) { + throw std::runtime_error("Unsupported device: timeline semaphores required"); + } + + if (!device_is_vulkan_12 && !int8_storage_khr) { + throw std::runtime_error("Unsupported device: 8-bit storage required"); + } + vk::PhysicalDeviceProperties2 props2; vk::PhysicalDeviceMaintenance3Properties props3; vk::PhysicalDeviceMaintenance4Properties props4; @@ -4765,17 +4788,78 @@ static vk_device ggml_vk_get_device(size_t idx) { device_features2.pNext = nullptr; device_features2.features = (VkPhysicalDeviceFeatures)device_features; - VkPhysicalDeviceVulkan11Features vk11_features; - vk11_features.pNext = nullptr; - vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - device_features2.pNext = &vk11_features; + VkPhysicalDeviceVulkan11Features vk11_features {}; + VkPhysicalDeviceVulkan12Features vk12_features {}; - VkPhysicalDeviceVulkan12Features vk12_features; - vk12_features.pNext = nullptr; - vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; - vk11_features.pNext = &vk12_features; + // Used when Vulkan 1.2 API not available + VkPhysicalDevice16BitStorageFeatures storage_16bit_features {}; + VkPhysicalDevice8BitStorageFeatures storage_8bit_features {}; + VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features {}; + VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features {}; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features {}; + VkPhysicalDeviceVulkanMemoryModelFeatures vulkan_memory_model_features {}; + VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features {}; - last_struct = (VkBaseOutStructure *)&vk12_features; + if (device_is_vulkan_12) { + // Use vk11 and vk12 features structures + vk11_features.pNext = nullptr; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + device_features2.pNext = &vk11_features; + + vk12_features.pNext = nullptr; + vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + vk11_features.pNext = &vk12_features; + + last_struct = (VkBaseOutStructure *)&vk12_features; + } else { + // Use individual features structures + storage_16bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; + storage_16bit_features.pNext = nullptr; + device_features2.pNext = &storage_16bit_features; + last_struct = (VkBaseOutStructure *)&storage_16bit_features; + + if (int8_storage_khr) { + storage_8bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; + storage_8bit_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&storage_8bit_features; + last_struct = (VkBaseOutStructure *)&storage_8bit_features; + } + + if (fp16_compute) { + float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; + float16_int8_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&float16_int8_features; + last_struct = (VkBaseOutStructure *)&float16_int8_features; + } + + if (buffer_device_address_khr) { + buffer_device_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR; + buffer_device_address_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&buffer_device_address_features; + last_struct = (VkBaseOutStructure *)&buffer_device_address_features; + } + + if (descriptor_indexing_ext) { + descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; + descriptor_indexing_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&descriptor_indexing_features; + last_struct = (VkBaseOutStructure *)&descriptor_indexing_features; + } + + if (vulkan_memory_model_khr) { + vulkan_memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES; + vulkan_memory_model_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&vulkan_memory_model_features; + last_struct = (VkBaseOutStructure *)&vulkan_memory_model_features; + } + + if (timeline_semaphore_khr) { + timeline_semaphore_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES; + timeline_semaphore_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&timeline_semaphore_features; + last_struct = (VkBaseOutStructure *)&timeline_semaphore_features; + } + } VkPhysicalDevicePipelineRobustnessFeaturesEXT pl_robustness_features; pl_robustness_features.pNext = nullptr; @@ -4883,9 +4967,29 @@ static vk_device ggml_vk_get_device(size_t idx) { vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2); + bool shader_float16_supported; + bool buffer_device_address_supported; + bool vulkan_memory_model_supported; + bool storage_buffer_16bit_access_supported; + bool shader_rounding_mode_rte_fp16; + + if (device_is_vulkan_12) { + shader_float16_supported = vk12_features.shaderFloat16; + buffer_device_address_supported = vk12_features.bufferDeviceAddress; + vulkan_memory_model_supported = vk12_features.vulkanMemoryModel; + storage_buffer_16bit_access_supported = vk11_features.storageBuffer16BitAccess; + shader_rounding_mode_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16; + } else { + shader_float16_supported = float16_int8_features.shaderFloat16; + buffer_device_address_supported = buffer_device_address_features.bufferDeviceAddress && buffer_device_address_khr; + vulkan_memory_model_supported = vulkan_memory_model_features.vulkanMemoryModel && vulkan_memory_model_khr; + storage_buffer_16bit_access_supported = storage_16bit_features.storageBuffer16BitAccess; + shader_rounding_mode_rte_fp16 = shader_float_controls_khr ? float_controls_props.shaderRoundingModeRTEFloat16 : false; + } + device->pipeline_executable_properties_support = pipeline_executable_properties_support; - device->fp16 = device->fp16 && vk12_features.shaderFloat16; + device->fp16 = device->fp16 && shader_float16_supported; #if defined(VK_KHR_shader_bfloat16) device->bf16 = bfloat16_support && bfloat16_features.shaderBFloat16Type; @@ -4895,13 +4999,13 @@ static vk_device ggml_vk_get_device(size_t idx) { device->pipeline_robustness = pl_robustness_features.pipelineRobustness; - device->multi_add = vk12_props.shaderRoundingModeRTEFloat16 && + device->multi_add = shader_rounding_mode_rte_fp16 && device->properties.limits.maxPushConstantsSize >= sizeof(vk_op_multi_add_push_constants) && getenv("GGML_VK_DISABLE_MULTI_ADD") == nullptr; device->shader_int64 = device_features2.features.shaderInt64; - device->buffer_device_address = vk12_features.bufferDeviceAddress; - device->vulkan_memory_model = vk12_features.vulkanMemoryModel; + device->buffer_device_address = buffer_device_address_supported; + device->vulkan_memory_model = vulkan_memory_model_supported; if (device->subgroup_size_control) { device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize; @@ -4933,7 +5037,7 @@ static vk_device ggml_vk_get_device(size_t idx) { coopmat2_features.cooperativeMatrixPerElementOperations && coopmat2_features.cooperativeMatrixTensorAddressing && coopmat2_features.cooperativeMatrixBlockLoads && - vk12_features.bufferDeviceAddress) { + buffer_device_address_supported) { std::vector flexible_dimensions; uint32_t count = 0; @@ -5000,12 +5104,22 @@ static vk_device ggml_vk_get_device(size_t idx) { #endif } - if (!vk11_features.storageBuffer16BitAccess) { + if (!storage_buffer_16bit_access_supported) { std::cerr << "ggml_vulkan: device " << GGML_VK_NAME << idx << " does not support 16-bit storage." << std::endl; throw std::runtime_error("Unsupported device"); } - device_extensions.push_back("VK_KHR_16bit_storage"); + if (fp16_storage) { + device_extensions.push_back("VK_KHR_16bit_storage"); + } + + if (!device_is_vulkan_12 && timeline_semaphore_khr) { + device_extensions.push_back("VK_KHR_timeline_semaphore"); + } + + if (!device_is_vulkan_12 && int8_storage_khr) { + device_extensions.push_back("VK_KHR_8bit_storage"); + } #ifdef GGML_VULKAN_VALIDATE device_extensions.push_back("VK_KHR_shader_non_semantic_info"); @@ -5015,6 +5129,22 @@ static vk_device ggml_vk_get_device(size_t idx) { device_extensions.push_back("VK_KHR_shader_float16_int8"); } + if (!device_is_vulkan_12 && device->buffer_device_address) { + device_extensions.push_back("VK_KHR_buffer_device_address"); + } + + if (!device_is_vulkan_12 && device->vulkan_memory_model) { + device_extensions.push_back("VK_KHR_vulkan_memory_model"); + } + + if (!device_is_vulkan_12 && shader_float_controls_khr) { + device_extensions.push_back("VK_KHR_shader_float_controls"); + } + + if (!device_is_vulkan_12 && descriptor_indexing_ext) { + device_extensions.push_back("VK_EXT_descriptor_indexing"); + } + #if defined(VK_KHR_cooperative_matrix) if (device->coopmat_support) { // Query supported shapes @@ -5200,7 +5330,10 @@ static vk_device ggml_vk_get_device(size_t idx) { vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_create_info( {}, dsl_binding); - descriptor_set_layout_create_info.setPNext(&dslbfci); + if (device_is_vulkan_12 || descriptor_indexing_ext) { + descriptor_set_layout_create_info.setPNext(&dslbfci); + } + device->dsl = device->device.createDescriptorSetLayout(descriptor_set_layout_create_info); ggml_vk_load_shaders(device); From c613b87ad4f73454143b88693de63c674b56848d Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Wed, 21 Jan 2026 06:29:31 +0100 Subject: [PATCH 4/8] vulkan: use pfn_vkGetBufferDeviceAddress on Vulkan 1.1 --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index a7a3f208..38b4c3d9 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -585,6 +585,8 @@ struct vk_device_struct { bool multi_add; bool shader_int64; bool buffer_device_address; + // Not needed for Vulkan 1.2+ where it's a core function + PFN_vkGetBufferDeviceAddressKHR pfn_vkGetBufferDeviceAddress = nullptr; bool vulkan_memory_model; bool add_rms_fusion; @@ -2585,8 +2587,13 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std buf->size = size; if (device->buffer_device_address) { - const vk::BufferDeviceAddressInfo addressInfo(buf->buffer); - buf->bda_addr = device->device.getBufferAddress(addressInfo); + if (device->pfn_vkGetBufferDeviceAddress){ + vk::BufferDeviceAddressInfo addressInfo(buf->buffer); + buf->bda_addr = device->pfn_vkGetBufferDeviceAddress(device->device, &static_cast(addressInfo)); + } else { + const vk::BufferDeviceAddressInfo addressInfo(buf->buffer); + buf->bda_addr = device->device.getBufferAddress(addressInfo); + } } device->memory_logger->log_allocation(buf, size); @@ -5268,6 +5275,15 @@ static vk_device ggml_vk_get_device(size_t idx) { device_create_info.setPNext(&device_features2); device->device = device->physical_device.createDevice(device_create_info); + if (!device_is_vulkan_12 && device->buffer_device_address) { + device->pfn_vkGetBufferDeviceAddress = (PFN_vkGetBufferDeviceAddressKHR) + vkGetDeviceProcAddr(device->device, "vkGetBufferDeviceAddressKHR"); + + if (!device->pfn_vkGetBufferDeviceAddress) { + throw std::runtime_error("Failed to load vkGetBufferDeviceAddressKHR"); + } + } + // Queues ggml_vk_create_queue(device, device->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }, false); From 7bb0b9713e91a6b5feda39fc7bcadabc2f5268df Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Wed, 21 Jan 2026 06:05:09 +0100 Subject: [PATCH 5/8] vulkan: handle Vulkan 1.1 in ggml_vk_print_gpu_info() Use VkPhysicalDeviceShaderFloat16Int8Features on Vulkan 1.1 --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 34 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 38b4c3d9..7d48d6b1 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5447,6 +5447,9 @@ static void ggml_vk_print_gpu_info(size_t idx) { const vk_device_architecture device_architecture = get_device_architecture(physical_device); + vk::PhysicalDeviceProperties device_props = physical_device.getProperties(); + const bool device_is_vulkan_12 = device_props.apiVersion >= VK_API_VERSION_1_2; + const char* GGML_VK_DISABLE_F16 = getenv("GGML_VK_DISABLE_F16"); bool force_disable_f16 = GGML_VK_DISABLE_F16 != nullptr; @@ -5475,18 +5478,26 @@ static void ggml_vk_print_gpu_info(size_t idx) { device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; device_features2.pNext = nullptr; - VkPhysicalDeviceVulkan11Features vk11_features; - vk11_features.pNext = nullptr; - vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - device_features2.pNext = &vk11_features; + VkPhysicalDeviceVulkan11Features vk11_features {}; + VkPhysicalDeviceVulkan12Features vk12_features {}; + VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features {}; - VkPhysicalDeviceVulkan12Features vk12_features; - vk12_features.pNext = nullptr; - vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; - vk11_features.pNext = &vk12_features; + if (device_is_vulkan_12) { + vk11_features.pNext = nullptr; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + device_features2.pNext = &vk11_features; - // Pointer to the last chain element - last_struct = (VkBaseOutStructure *)&vk12_features; + vk12_features.pNext = nullptr; + vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + vk11_features.pNext = &vk12_features; + + last_struct = (VkBaseOutStructure *)&vk12_features; + } else { + float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; + float16_int8_features.pNext = nullptr; + device_features2.pNext = &float16_int8_features; + last_struct = (VkBaseOutStructure *)&float16_int8_features; + } #if defined(GGML_VULKAN_COOPMAT_GLSLC_SUPPORT) VkPhysicalDeviceCooperativeMatrixFeaturesKHR coopmat_features; @@ -5518,7 +5529,8 @@ static void ggml_vk_print_gpu_info(size_t idx) { vkGetPhysicalDeviceFeatures2(physical_device, &device_features2); - fp16 = fp16 && vk12_features.shaderFloat16; + bool shader_float16_supported = device_is_vulkan_12 ? vk12_features.shaderFloat16 : float16_int8_features.shaderFloat16; + fp16 = fp16 && shader_float16_supported; #if defined(VK_KHR_shader_bfloat16) bool bf16 = bfloat16_support && bfloat16_features.shaderBFloat16Type; From 3cac3937571667d54cdf7a50a38b43621a0d4975 Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Wed, 21 Jan 2026 06:21:06 +0100 Subject: [PATCH 6/8] vulkan: handle Vulkan 1.1 in ggml_vk_device_is_supported() Check required exstensions when using Vulkan 1.1 and use VkPhysicalDevice16BitStorageFeatures --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 40 ++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 7d48d6b1..c1fd458f 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -15343,17 +15343,47 @@ static bool ggml_vk_instance_debug_utils_ext_available( } static bool ggml_vk_device_is_supported(const vk::PhysicalDevice & vkdev) { + vk::PhysicalDeviceProperties device_props = vkdev.getProperties(); + const bool device_is_vulkan_12 = device_props.apiVersion >= VK_API_VERSION_1_2; + + if (!device_is_vulkan_12) { + // Check for required extensions on Vulkan 1.1 + std::vector ext_props = vkdev.enumerateDeviceExtensionProperties(); + bool timeline_semaphore_khr = false; + bool int8_storage_khr = false; + + for (const auto& properties : ext_props) { + if (strcmp("VK_KHR_timeline_semaphore", properties.extensionName) == 0) { + timeline_semaphore_khr = true; + } else if (strcmp("VK_KHR_8bit_storage", properties.extensionName) == 0) { + int8_storage_khr = true; + } + } + + if (!timeline_semaphore_khr || !int8_storage_khr) { + return false; + } + } + VkPhysicalDeviceFeatures2 device_features2; device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - VkPhysicalDeviceVulkan11Features vk11_features; - vk11_features.pNext = nullptr; - vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - device_features2.pNext = &vk11_features; + VkPhysicalDeviceVulkan11Features vk11_features {}; + VkPhysicalDevice16BitStorageFeatures storage_16bit_features {}; + + if (device_is_vulkan_12) { + vk11_features.pNext = nullptr; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + device_features2.pNext = &vk11_features; + } else { + storage_16bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; + storage_16bit_features.pNext = nullptr; + device_features2.pNext = &storage_16bit_features; + } vkGetPhysicalDeviceFeatures2(vkdev, &device_features2); - return vk11_features.storageBuffer16BitAccess; + return device_is_vulkan_12 ? vk11_features.storageBuffer16BitAccess : storage_16bit_features.storageBuffer16BitAccess; } static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch) { From 95fa09250dc01797b12df2293c0699dc6195edb0 Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Wed, 21 Jan 2026 06:29:50 +0100 Subject: [PATCH 7/8] vulkan: lower API requirement to 1.1 --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index c1fd458f..bedeca89 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5585,9 +5585,9 @@ static void ggml_vk_instance_init() { uint32_t api_version = vk::enumerateInstanceVersion(); - if (api_version < VK_API_VERSION_1_2) { - std::cerr << "ggml_vulkan: Error: Vulkan 1.2 required." << std::endl; - throw vk::SystemError(vk::Result::eErrorFeatureNotPresent, "Vulkan 1.2 required"); + if (api_version < VK_API_VERSION_1_1) { + std::cerr << "ggml_vulkan: Error: Vulkan 1.1 required." << std::endl; + throw vk::SystemError(vk::Result::eErrorFeatureNotPresent, "Vulkan 1.1 required"); } vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, api_version }; From b95296c4e082de5712abba1fa1393f20fe2fbefa Mon Sep 17 00:00:00 2001 From: Thomas Guillem Date: Mon, 19 Jan 2026 13:25:52 +0100 Subject: [PATCH 8/8] vulkan: add 1.1 shader compatibility mode if GGML_VULKAN_MIN_1_1 is defined: - rte shaders will be built with 1.1 API and "SPV_KHR_float_controls" spirv extension. - all no _cm2 shaders will be built with 1.1 API No changes if GGML_VULKAN_MIN_1_1 is not defined (default). --- ggml/CMakeLists.txt | 1 + ggml/src/ggml-vulkan/CMakeLists.txt | 5 ++++ .../ggml-vulkan/vulkan-shaders/CMakeLists.txt | 3 ++ ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl | 6 +++- .../vulkan-shaders/vulkan-shaders-gen.cpp | 29 +++++++++++++++++-- 5 files changed, 41 insertions(+), 3 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 0176ca1c..4c8b45f1 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -222,6 +222,7 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF) option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF) option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) +option(GGML_VULKAN_MIN_1_1 "ggml: target Vulkan 1.1 minimum (SPIR-V 1.3)" OFF) option(GGML_WEBGPU "ggml: use WebGPU" OFF) option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF) option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF) diff --git a/ggml/src/ggml-vulkan/CMakeLists.txt b/ggml/src/ggml-vulkan/CMakeLists.txt index de01336c..b5a5d816 100644 --- a/ggml/src/ggml-vulkan/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/CMakeLists.txt @@ -112,6 +112,11 @@ if (Vulkan_FOUND) list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DGGML_VULKAN_SHADER_DEBUG_INFO=ON) endif() + if (GGML_VULKAN_MIN_1_1) + add_compile_definitions(GGML_VULKAN_MIN_1_1) + list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DGGML_VULKAN_MIN_1_1=ON) + endif() + if (GGML_VULKAN_VALIDATE) add_compile_definitions(GGML_VULKAN_VALIDATE) endif() diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt b/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt index e1f613fb..393d21b3 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt @@ -23,6 +23,9 @@ if (GGML_VULKAN_SHADER_DEBUG_INFO) add_compile_definitions(GGML_VULKAN_SHADER_DEBUG_INFO) message(STATUS "Enabling shader debug info") endif() +if (GGML_VULKAN_MIN_1_1) + add_compile_definitions(GGML_VULKAN_MIN_1_1) +endif() set(TARGET vulkan-shaders-gen) add_executable(${TARGET} vulkan-shaders-gen.cpp) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl b/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl index ad51c1e8..ce7ac8d8 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl @@ -1,5 +1,9 @@ #if RTE16 #extension GL_EXT_spirv_intrinsics : enable -spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits +spirv_execution_mode( +#ifdef VULKAN11_RTE + extensions = ["SPV_KHR_float_controls"], +#endif + capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits #endif // RTE16 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index bbdbf9dc..f809658b 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -322,7 +322,21 @@ compile_count_guard acquire_compile_slot() { } void string_to_spv_func(std::string name, std::string in_path, std::string out_path, std::map defines, bool coopmat, bool dep_file, compile_count_guard slot) { - std::string target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2"; + std::string target_env; + bool is_vulkan11; +#ifdef GGML_VULKAN_MIN_1_1 + // Vulkan 1.1 compatibility mode + if (name.find("_cm2") != std::string::npos) { + target_env = "--target-env=vulkan1.3"; + is_vulkan11 = false; + } else { + target_env = "--target-env=vulkan1.1"; + is_vulkan11 = true; + } +#else + target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2"; + is_vulkan11 = false; +#endif #ifdef _WIN32 std::vector cmd = {GLSLC, "-fshader-stage=compute", target_env, "\"" + in_path + "\"", "-o", "\"" + out_path + "\""}; @@ -333,7 +347,13 @@ void string_to_spv_func(std::string name, std::string in_path, std::string out_p // disable spirv-opt for coopmat shaders for https://github.com/ggerganov/llama.cpp/issues/10734 // disable spirv-opt for bf16 shaders for https://github.com/ggml-org/llama.cpp/issues/15344 // disable spirv-opt for rope shaders for https://github.com/ggml-org/llama.cpp/issues/16860 - if (!coopmat && name.find("bf16") == std::string::npos && name.find("rope") == std::string::npos) { + // disable spirv-opt for RTE shaders with vulkan1.1: spirv-opt rejects RoundingModeRTE with vulkan1.1 target + bool has_rte = (name.find("_rte") != std::string::npos) || + (defines.find("RTE16") != defines.end() && defines.at("RTE16") == "1"); + if (!coopmat && + name.find("bf16") == std::string::npos && + name.find("rope") == std::string::npos && + !(has_rte && is_vulkan11)) { cmd.push_back("-O"); } @@ -351,6 +371,11 @@ void string_to_spv_func(std::string name, std::string in_path, std::string out_p cmd.push_back("-g"); #endif + // Need SPV_KHR_float_controls extension for Vulkan 1.1 RTE shaders + if (is_vulkan11 && has_rte) { + cmd.push_back("-DVULKAN11_RTE=1"); + } + for (const auto& define : defines) { cmd.push_back("-D" + define.first + "=" + define.second); }