diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index c780077a..86d76dc9 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -222,6 +222,7 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF) option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF) option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) +option(GGML_VULKAN_MIN_1_1 "ggml: target Vulkan 1.1 minimum (SPIR-V 1.3)" OFF) option(GGML_WEBGPU "ggml: use WebGPU" OFF) option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF) option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF) diff --git a/ggml/src/ggml-vulkan/CMakeLists.txt b/ggml/src/ggml-vulkan/CMakeLists.txt index 715a263a..783a692e 100644 --- a/ggml/src/ggml-vulkan/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/CMakeLists.txt @@ -112,6 +112,11 @@ if (Vulkan_FOUND) list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DGGML_VULKAN_SHADER_DEBUG_INFO=ON) endif() + if (GGML_VULKAN_MIN_1_1) + add_compile_definitions(GGML_VULKAN_MIN_1_1) + list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DGGML_VULKAN_MIN_1_1=ON) + endif() + if (GGML_VULKAN_VALIDATE) add_compile_definitions(GGML_VULKAN_VALIDATE) endif() diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 15ed5b2a..e3534d76 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -619,6 +619,8 @@ struct vk_device_struct { bool multi_add; bool shader_int64; bool buffer_device_address; + // Not needed for Vulkan 1.2+ where it's a core function + PFN_vkGetBufferDeviceAddressKHR pfn_vkGetBufferDeviceAddress = nullptr; bool vulkan_memory_model; bool add_rms_fusion; @@ -2710,8 +2712,13 @@ static vk_buffer ggml_vk_create_buffer(vk_device& device, size_t size, const std buf->size = size; if (device->buffer_device_address) { - const vk::BufferDeviceAddressInfo addressInfo(buf->buffer); - buf->bda_addr = device->device.getBufferAddress(addressInfo); + if (device->pfn_vkGetBufferDeviceAddress){ + vk::BufferDeviceAddressInfo addressInfo(buf->buffer); + buf->bda_addr = device->pfn_vkGetBufferDeviceAddress(device->device, &static_cast(addressInfo)); + } else { + const vk::BufferDeviceAddressInfo addressInfo(buf->buffer); + buf->bda_addr = device->device.getBufferAddress(addressInfo); + } } device->memory_logger->log_allocation(buf, size); @@ -4798,6 +4805,9 @@ static vk_device ggml_vk_get_device(size_t idx) { device->physical_device = physical_devices[dev_num]; const std::vector ext_props = device->physical_device.enumerateDeviceExtensionProperties(); + vk::PhysicalDeviceProperties device_props = device->physical_device.getProperties(); + const bool device_is_vulkan_12 = device_props.apiVersion >= VK_API_VERSION_1_2; + device->architecture = get_device_architecture(device->physical_device); const char* GGML_VK_PREFER_HOST_MEMORY = getenv("GGML_VK_PREFER_HOST_MEMORY"); @@ -4814,6 +4824,7 @@ static vk_device ggml_vk_get_device(size_t idx) { bool fp16_storage = false; bool fp16_compute = false; + bool int8_storage_khr = false; bool maintenance4_support = false; bool sm_builtins = false; bool amd_shader_core_properties2 = false; @@ -4824,12 +4835,19 @@ static vk_device ggml_vk_get_device(size_t idx) { device->integer_dot_product = false; device->shader_64b_indexing = false; bool bfloat16_support = false; + bool buffer_device_address_khr = false; + bool timeline_semaphore_khr = false; + bool vulkan_memory_model_khr = false; + bool shader_float_controls_khr = false; + bool descriptor_indexing_ext = false; for (const auto& properties : ext_props) { if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) { maintenance4_support = true; } else if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) { fp16_storage = true; + } else if (strcmp("VK_KHR_8bit_storage", properties.extensionName) == 0) { + int8_storage_khr = true; } else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) { fp16_compute = true; } else if (strcmp("VK_NV_shader_sm_builtins", properties.extensionName) == 0) { @@ -4874,9 +4892,27 @@ static vk_device ggml_vk_get_device(size_t idx) { } else if (strcmp("VK_EXT_shader_64bit_indexing", properties.extensionName) == 0) { device->shader_64b_indexing = true; #endif + } else if (strcmp("VK_KHR_buffer_device_address", properties.extensionName) == 0) { + buffer_device_address_khr = true; + } else if (strcmp("VK_KHR_timeline_semaphore", properties.extensionName) == 0) { + timeline_semaphore_khr = true; + } else if (strcmp("VK_KHR_vulkan_memory_model", properties.extensionName) == 0) { + vulkan_memory_model_khr = true; + } else if (strcmp("VK_KHR_shader_float_controls", properties.extensionName) == 0) { + shader_float_controls_khr = true; + } else if (strcmp("VK_EXT_descriptor_indexing", properties.extensionName) == 0) { + descriptor_indexing_ext = true; } } + if (!device_is_vulkan_12 && !timeline_semaphore_khr) { + throw std::runtime_error("Unsupported device: timeline semaphores required"); + } + + if (!device_is_vulkan_12 && !int8_storage_khr) { + throw std::runtime_error("Unsupported device: 8-bit storage required"); + } + vk::PhysicalDeviceProperties2 props2; vk::PhysicalDeviceMaintenance3Properties props3; vk::PhysicalDeviceMaintenance4Properties props4; @@ -4886,6 +4922,7 @@ static vk_device ggml_vk_get_device(size_t idx) { vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props; vk::PhysicalDeviceVulkan11Properties vk11_props; vk::PhysicalDeviceVulkan12Properties vk12_props; + vk::PhysicalDeviceFloatControlsProperties float_controls_props; vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props; vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR shader_integer_dot_product_props; vk::PhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_host_props; @@ -4893,10 +4930,21 @@ static vk_device ggml_vk_get_device(size_t idx) { props2.pNext = &props3; props3.pNext = &subgroup_props; subgroup_props.pNext = &driver_props; - driver_props.pNext = &vk11_props; - vk11_props.pNext = &vk12_props; - VkBaseOutStructure * last_struct = (VkBaseOutStructure *)&vk12_props; + VkBaseOutStructure * last_struct; + + if (device_is_vulkan_12) { + driver_props.pNext = &vk11_props; + vk11_props.pNext = &vk12_props; + last_struct = (VkBaseOutStructure *)&vk12_props; + } else { + if (shader_float_controls_khr) { + driver_props.pNext = &float_controls_props; + last_struct = (VkBaseOutStructure *)&float_controls_props; + } else { + last_struct = (VkBaseOutStructure *)&driver_props; + } + } if (maintenance4_support) { last_struct->pNext = (VkBaseOutStructure *)&props4; @@ -4996,28 +5044,32 @@ static vk_device ggml_vk_get_device(size_t idx) { } else { device->shader_core_count = 0; } - device->float_controls_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16; + if (device_is_vulkan_12) { + device->float_controls_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16; + } else { + device->float_controls_rte_fp16 = shader_float_controls_khr ? float_controls_props.shaderRoundingModeRTEFloat16 : false; + } - device->subgroup_basic = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eBasic); - device->subgroup_arithmetic = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eArithmetic); + device->subgroup_basic = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eBasic); + device->subgroup_arithmetic = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eArithmetic); #ifdef __APPLE__ // Workaround for subgroup arithmetic failing on MoltenVK with AMD GPUs (issue 15846) if (device->vendor_id == VK_VENDOR_ID_AMD) { device->subgroup_arithmetic = false; } #endif - device->subgroup_shuffle = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eShuffle); - device->subgroup_clustered = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eClustered); + device->subgroup_shuffle = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eShuffle); + device->subgroup_clustered = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eClustered); - device->subgroup_ballot = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eBallot); + device->subgroup_ballot = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eBallot); - device->subgroup_vote = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) && - (vk11_props.subgroupSupportedOperations & vk::SubgroupFeatureFlagBits::eVote); + device->subgroup_vote = (subgroup_props.supportedStages & vk::ShaderStageFlagBits::eCompute) && + (subgroup_props.supportedOperations & vk::SubgroupFeatureFlagBits::eVote); const bool force_disable_f16 = getenv("GGML_VK_DISABLE_F16") != nullptr; @@ -5063,17 +5115,78 @@ static vk_device ggml_vk_get_device(size_t idx) { device_features2.pNext = nullptr; device_features2.features = (VkPhysicalDeviceFeatures)device_features; - VkPhysicalDeviceVulkan11Features vk11_features; - vk11_features.pNext = nullptr; - vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - device_features2.pNext = &vk11_features; + VkPhysicalDeviceVulkan11Features vk11_features {}; + VkPhysicalDeviceVulkan12Features vk12_features {}; - VkPhysicalDeviceVulkan12Features vk12_features; - vk12_features.pNext = nullptr; - vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; - vk11_features.pNext = &vk12_features; + // Used when Vulkan 1.2 API not available + VkPhysicalDevice16BitStorageFeatures storage_16bit_features {}; + VkPhysicalDevice8BitStorageFeatures storage_8bit_features {}; + VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features {}; + VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features {}; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features {}; + VkPhysicalDeviceVulkanMemoryModelFeatures vulkan_memory_model_features {}; + VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features {}; - last_struct = (VkBaseOutStructure *)&vk12_features; + if (device_is_vulkan_12) { + // Use vk11 and vk12 features structures + vk11_features.pNext = nullptr; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + device_features2.pNext = &vk11_features; + + vk12_features.pNext = nullptr; + vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + vk11_features.pNext = &vk12_features; + + last_struct = (VkBaseOutStructure *)&vk12_features; + } else { + // Use individual features structures + storage_16bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; + storage_16bit_features.pNext = nullptr; + device_features2.pNext = &storage_16bit_features; + last_struct = (VkBaseOutStructure *)&storage_16bit_features; + + if (int8_storage_khr) { + storage_8bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES; + storage_8bit_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&storage_8bit_features; + last_struct = (VkBaseOutStructure *)&storage_8bit_features; + } + + if (fp16_compute) { + float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; + float16_int8_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&float16_int8_features; + last_struct = (VkBaseOutStructure *)&float16_int8_features; + } + + if (buffer_device_address_khr) { + buffer_device_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR; + buffer_device_address_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&buffer_device_address_features; + last_struct = (VkBaseOutStructure *)&buffer_device_address_features; + } + + if (descriptor_indexing_ext) { + descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; + descriptor_indexing_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&descriptor_indexing_features; + last_struct = (VkBaseOutStructure *)&descriptor_indexing_features; + } + + if (vulkan_memory_model_khr) { + vulkan_memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES; + vulkan_memory_model_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&vulkan_memory_model_features; + last_struct = (VkBaseOutStructure *)&vulkan_memory_model_features; + } + + if (timeline_semaphore_khr) { + timeline_semaphore_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES; + timeline_semaphore_features.pNext = nullptr; + last_struct->pNext = (VkBaseOutStructure *)&timeline_semaphore_features; + last_struct = (VkBaseOutStructure *)&timeline_semaphore_features; + } + } VkPhysicalDevicePipelineRobustnessFeaturesEXT pl_robustness_features; pl_robustness_features.pNext = nullptr; @@ -5181,9 +5294,29 @@ static vk_device ggml_vk_get_device(size_t idx) { vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2); + bool shader_float16_supported; + bool buffer_device_address_supported; + bool vulkan_memory_model_supported; + bool storage_buffer_16bit_access_supported; + bool shader_rounding_mode_rte_fp16; + + if (device_is_vulkan_12) { + shader_float16_supported = vk12_features.shaderFloat16; + buffer_device_address_supported = vk12_features.bufferDeviceAddress; + vulkan_memory_model_supported = vk12_features.vulkanMemoryModel; + storage_buffer_16bit_access_supported = vk11_features.storageBuffer16BitAccess; + shader_rounding_mode_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16; + } else { + shader_float16_supported = float16_int8_features.shaderFloat16; + buffer_device_address_supported = buffer_device_address_features.bufferDeviceAddress && buffer_device_address_khr; + vulkan_memory_model_supported = vulkan_memory_model_features.vulkanMemoryModel && vulkan_memory_model_khr; + storage_buffer_16bit_access_supported = storage_16bit_features.storageBuffer16BitAccess; + shader_rounding_mode_rte_fp16 = shader_float_controls_khr ? float_controls_props.shaderRoundingModeRTEFloat16 : false; + } + device->pipeline_executable_properties_support = pipeline_executable_properties_support; - device->fp16 = device->fp16 && vk12_features.shaderFloat16; + device->fp16 = device->fp16 && shader_float16_supported; #if defined(VK_KHR_shader_bfloat16) device->bf16 = bfloat16_support && bfloat16_features.shaderBFloat16Type; @@ -5193,13 +5326,13 @@ static vk_device ggml_vk_get_device(size_t idx) { device->pipeline_robustness = pl_robustness_features.pipelineRobustness; - device->multi_add = vk12_props.shaderRoundingModeRTEFloat16 && + device->multi_add = shader_rounding_mode_rte_fp16 && device->properties.limits.maxPushConstantsSize >= sizeof(vk_op_multi_add_push_constants) && getenv("GGML_VK_DISABLE_MULTI_ADD") == nullptr; device->shader_int64 = device_features2.features.shaderInt64; - device->buffer_device_address = vk12_features.bufferDeviceAddress; - device->vulkan_memory_model = vk12_features.vulkanMemoryModel; + device->buffer_device_address = buffer_device_address_supported; + device->vulkan_memory_model = vulkan_memory_model_supported; if (device->subgroup_size_control) { device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize; @@ -5227,7 +5360,7 @@ static vk_device ggml_vk_get_device(size_t idx) { coopmat2_features.cooperativeMatrixPerElementOperations && coopmat2_features.cooperativeMatrixTensorAddressing && coopmat2_features.cooperativeMatrixBlockLoads && - vk12_features.bufferDeviceAddress) { + buffer_device_address_supported) { std::vector flexible_dimensions; uint32_t count = 0; @@ -5294,12 +5427,22 @@ static vk_device ggml_vk_get_device(size_t idx) { #endif } - if (!vk11_features.storageBuffer16BitAccess) { + if (!storage_buffer_16bit_access_supported) { std::cerr << "ggml_vulkan: device " << GGML_VK_NAME << idx << " does not support 16-bit storage." << std::endl; throw std::runtime_error("Unsupported device"); } - device_extensions.push_back("VK_KHR_16bit_storage"); + if (fp16_storage) { + device_extensions.push_back("VK_KHR_16bit_storage"); + } + + if (!device_is_vulkan_12 && timeline_semaphore_khr) { + device_extensions.push_back("VK_KHR_timeline_semaphore"); + } + + if (!device_is_vulkan_12 && int8_storage_khr) { + device_extensions.push_back("VK_KHR_8bit_storage"); + } #ifdef GGML_VULKAN_VALIDATE device_extensions.push_back("VK_KHR_shader_non_semantic_info"); @@ -5309,6 +5452,22 @@ static vk_device ggml_vk_get_device(size_t idx) { device_extensions.push_back("VK_KHR_shader_float16_int8"); } + if (!device_is_vulkan_12 && device->buffer_device_address) { + device_extensions.push_back("VK_KHR_buffer_device_address"); + } + + if (!device_is_vulkan_12 && device->vulkan_memory_model) { + device_extensions.push_back("VK_KHR_vulkan_memory_model"); + } + + if (!device_is_vulkan_12 && shader_float_controls_khr) { + device_extensions.push_back("VK_KHR_shader_float_controls"); + } + + if (!device_is_vulkan_12 && descriptor_indexing_ext) { + device_extensions.push_back("VK_EXT_descriptor_indexing"); + } + #if defined(VK_KHR_cooperative_matrix) if (device->coopmat_support) { // Query supported shapes @@ -5430,6 +5589,15 @@ static vk_device ggml_vk_get_device(size_t idx) { device_create_info.setPNext(&device_features2); device->device = device->physical_device.createDevice(device_create_info); + if (!device_is_vulkan_12 && device->buffer_device_address) { + device->pfn_vkGetBufferDeviceAddress = (PFN_vkGetBufferDeviceAddressKHR) + vkGetDeviceProcAddr(device->device, "vkGetBufferDeviceAddressKHR"); + + if (!device->pfn_vkGetBufferDeviceAddress) { + throw std::runtime_error("Failed to load vkGetBufferDeviceAddressKHR"); + } + } + // Queues ggml_vk_create_queue(device, device->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }, false); @@ -5492,7 +5660,10 @@ static vk_device ggml_vk_get_device(size_t idx) { vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_create_info( {}, dsl_binding); - descriptor_set_layout_create_info.setPNext(&dslbfci); + if (device_is_vulkan_12 || descriptor_indexing_ext) { + descriptor_set_layout_create_info.setPNext(&dslbfci); + } + device->dsl = device->device.createDescriptorSetLayout(descriptor_set_layout_create_info); ggml_vk_load_shaders(device); @@ -5597,6 +5768,9 @@ static void ggml_vk_print_gpu_info(size_t idx) { const vk_device_architecture device_architecture = get_device_architecture(physical_device); + vk::PhysicalDeviceProperties device_props = physical_device.getProperties(); + const bool device_is_vulkan_12 = device_props.apiVersion >= VK_API_VERSION_1_2; + const char* GGML_VK_DISABLE_F16 = getenv("GGML_VK_DISABLE_F16"); bool force_disable_f16 = GGML_VK_DISABLE_F16 != nullptr; @@ -5625,18 +5799,26 @@ static void ggml_vk_print_gpu_info(size_t idx) { device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; device_features2.pNext = nullptr; - VkPhysicalDeviceVulkan11Features vk11_features; - vk11_features.pNext = nullptr; - vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - device_features2.pNext = &vk11_features; + VkPhysicalDeviceVulkan11Features vk11_features {}; + VkPhysicalDeviceVulkan12Features vk12_features {}; + VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features {}; - VkPhysicalDeviceVulkan12Features vk12_features; - vk12_features.pNext = nullptr; - vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; - vk11_features.pNext = &vk12_features; + if (device_is_vulkan_12) { + vk11_features.pNext = nullptr; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + device_features2.pNext = &vk11_features; - // Pointer to the last chain element - last_struct = (VkBaseOutStructure *)&vk12_features; + vk12_features.pNext = nullptr; + vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + vk11_features.pNext = &vk12_features; + + last_struct = (VkBaseOutStructure *)&vk12_features; + } else { + float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES; + float16_int8_features.pNext = nullptr; + device_features2.pNext = &float16_int8_features; + last_struct = (VkBaseOutStructure *)&float16_int8_features; + } #if defined(GGML_VULKAN_COOPMAT_GLSLC_SUPPORT) VkPhysicalDeviceCooperativeMatrixFeaturesKHR coopmat_features; @@ -5668,7 +5850,8 @@ static void ggml_vk_print_gpu_info(size_t idx) { vkGetPhysicalDeviceFeatures2(physical_device, &device_features2); - fp16 = fp16 && vk12_features.shaderFloat16; + bool shader_float16_supported = device_is_vulkan_12 ? vk12_features.shaderFloat16 : float16_int8_features.shaderFloat16; + fp16 = fp16 && shader_float16_supported; #if defined(VK_KHR_shader_bfloat16) bool bf16 = bfloat16_support && bfloat16_features.shaderBFloat16Type; @@ -5723,9 +5906,9 @@ static void ggml_vk_instance_init() { uint32_t api_version = vk::enumerateInstanceVersion(); - if (api_version < VK_API_VERSION_1_2) { - std::cerr << "ggml_vulkan: Error: Vulkan 1.2 required." << std::endl; - throw vk::SystemError(vk::Result::eErrorFeatureNotPresent, "Vulkan 1.2 required"); + if (api_version < VK_API_VERSION_1_1) { + std::cerr << "ggml_vulkan: Error: Vulkan 1.1 required." << std::endl; + throw vk::SystemError(vk::Result::eErrorFeatureNotPresent, "Vulkan 1.1 required"); } vk::ApplicationInfo app_info{ "ggml-vulkan", 1, nullptr, 0, api_version }; @@ -15999,17 +16182,47 @@ static bool ggml_vk_instance_debug_utils_ext_available( } static bool ggml_vk_device_is_supported(const vk::PhysicalDevice & vkdev) { + vk::PhysicalDeviceProperties device_props = vkdev.getProperties(); + const bool device_is_vulkan_12 = device_props.apiVersion >= VK_API_VERSION_1_2; + + if (!device_is_vulkan_12) { + // Check for required extensions on Vulkan 1.1 + std::vector ext_props = vkdev.enumerateDeviceExtensionProperties(); + bool timeline_semaphore_khr = false; + bool int8_storage_khr = false; + + for (const auto& properties : ext_props) { + if (strcmp("VK_KHR_timeline_semaphore", properties.extensionName) == 0) { + timeline_semaphore_khr = true; + } else if (strcmp("VK_KHR_8bit_storage", properties.extensionName) == 0) { + int8_storage_khr = true; + } + } + + if (!timeline_semaphore_khr || !int8_storage_khr) { + return false; + } + } + VkPhysicalDeviceFeatures2 device_features2; device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; - VkPhysicalDeviceVulkan11Features vk11_features; - vk11_features.pNext = nullptr; - vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; - device_features2.pNext = &vk11_features; + VkPhysicalDeviceVulkan11Features vk11_features {}; + VkPhysicalDevice16BitStorageFeatures storage_16bit_features {}; + + if (device_is_vulkan_12) { + vk11_features.pNext = nullptr; + vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; + device_features2.pNext = &vk11_features; + } else { + storage_16bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES; + storage_16bit_features.pNext = nullptr; + device_features2.pNext = &storage_16bit_features; + } vkGetPhysicalDeviceFeatures2(vkdev, &device_features2); - return vk11_features.storageBuffer16BitAccess; + return device_is_vulkan_12 ? vk11_features.storageBuffer16BitAccess : storage_16bit_features.storageBuffer16BitAccess; } static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch) { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt b/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt index e1f613fb..393d21b3 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt @@ -23,6 +23,9 @@ if (GGML_VULKAN_SHADER_DEBUG_INFO) add_compile_definitions(GGML_VULKAN_SHADER_DEBUG_INFO) message(STATUS "Enabling shader debug info") endif() +if (GGML_VULKAN_MIN_1_1) + add_compile_definitions(GGML_VULKAN_MIN_1_1) +endif() set(TARGET vulkan-shaders-gen) add_executable(${TARGET} vulkan-shaders-gen.cpp) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl b/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl index ad51c1e8..ce7ac8d8 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rte.glsl @@ -1,5 +1,9 @@ #if RTE16 #extension GL_EXT_spirv_intrinsics : enable -spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits +spirv_execution_mode( +#ifdef VULKAN11_RTE + extensions = ["SPV_KHR_float_controls"], +#endif + capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits #endif // RTE16 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index 8186dba3..5a6f79bf 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -322,7 +322,21 @@ compile_count_guard acquire_compile_slot() { } void string_to_spv_func(std::string name, std::string in_path, std::string out_path, std::map defines, bool coopmat, bool dep_file, compile_count_guard slot) { - std::string target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2"; + std::string target_env; + bool is_vulkan11; +#ifdef GGML_VULKAN_MIN_1_1 + // Vulkan 1.1 compatibility mode + if (name.find("_cm2") != std::string::npos) { + target_env = "--target-env=vulkan1.3"; + is_vulkan11 = false; + } else { + target_env = "--target-env=vulkan1.1"; + is_vulkan11 = true; + } +#else + target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2"; + is_vulkan11 = false; +#endif #ifdef _WIN32 std::vector cmd = {GLSLC, "-fshader-stage=compute", target_env, "\"" + in_path + "\"", "-o", "\"" + out_path + "\""}; @@ -333,7 +347,13 @@ void string_to_spv_func(std::string name, std::string in_path, std::string out_p // disable spirv-opt for coopmat shaders for https://github.com/ggml-org/llama.cpp/issues/10734 // disable spirv-opt for bf16 shaders for https://github.com/ggml-org/llama.cpp/issues/15344 // disable spirv-opt for rope shaders for https://github.com/ggml-org/llama.cpp/issues/16860 - if (!coopmat && name.find("bf16") == std::string::npos && name.find("rope") == std::string::npos) { + // disable spirv-opt for RTE shaders with vulkan1.1: spirv-opt rejects RoundingModeRTE with vulkan1.1 target + bool has_rte = (name.find("_rte") != std::string::npos) || + (defines.find("RTE16") != defines.end() && defines.at("RTE16") == "1"); + if (!coopmat && + name.find("bf16") == std::string::npos && + name.find("rope") == std::string::npos && + !(has_rte && is_vulkan11)) { cmd.push_back("-O"); } @@ -351,6 +371,11 @@ void string_to_spv_func(std::string name, std::string in_path, std::string out_p cmd.push_back("-g"); #endif + // Need SPV_KHR_float_controls extension for Vulkan 1.1 RTE shaders + if (is_vulkan11 && has_rte) { + cmd.push_back("-DVULKAN11_RTE=1"); + } + for (const auto& define : defines) { cmd.push_back("-D" + define.first + "=" + define.second); }