vulkan: use individual features structures on Vulkan 1.1
This commit is contained in:
parent
76f8a0201a
commit
e18bd369b9
|
|
@ -4502,6 +4502,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
|
||||
bool fp16_storage = false;
|
||||
bool fp16_compute = false;
|
||||
bool int8_storage_khr = false;
|
||||
bool maintenance4_support = false;
|
||||
bool sm_builtins = false;
|
||||
bool amd_shader_core_properties2 = false;
|
||||
|
|
@ -4512,13 +4513,19 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
device->integer_dot_product = false;
|
||||
device->shader_64b_indexing = false;
|
||||
bool bfloat16_support = false;
|
||||
bool buffer_device_address_khr = false;
|
||||
bool timeline_semaphore_khr = false;
|
||||
bool vulkan_memory_model_khr = false;
|
||||
bool shader_float_controls_khr = false;
|
||||
bool descriptor_indexing_ext = false;
|
||||
|
||||
for (const auto& properties : ext_props) {
|
||||
if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) {
|
||||
maintenance4_support = true;
|
||||
} else if (strcmp("VK_KHR_16bit_storage", properties.extensionName) == 0) {
|
||||
fp16_storage = true;
|
||||
} else if (strcmp("VK_KHR_8bit_storage", properties.extensionName) == 0) {
|
||||
int8_storage_khr = true;
|
||||
} else if (strcmp("VK_KHR_shader_float16_int8", properties.extensionName) == 0) {
|
||||
fp16_compute = true;
|
||||
} else if (strcmp("VK_NV_shader_sm_builtins", properties.extensionName) == 0) {
|
||||
|
|
@ -4563,11 +4570,27 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
} else if (strcmp("VK_EXT_shader_64bit_indexing", properties.extensionName) == 0) {
|
||||
device->shader_64b_indexing = true;
|
||||
#endif
|
||||
} else if (strcmp("VK_KHR_buffer_device_address", properties.extensionName) == 0) {
|
||||
buffer_device_address_khr = true;
|
||||
} else if (strcmp("VK_KHR_timeline_semaphore", properties.extensionName) == 0) {
|
||||
timeline_semaphore_khr = true;
|
||||
} else if (strcmp("VK_KHR_vulkan_memory_model", properties.extensionName) == 0) {
|
||||
vulkan_memory_model_khr = true;
|
||||
} else if (strcmp("VK_KHR_shader_float_controls", properties.extensionName) == 0) {
|
||||
shader_float_controls_khr = true;
|
||||
} else if (strcmp("VK_EXT_descriptor_indexing", properties.extensionName) == 0) {
|
||||
descriptor_indexing_ext = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && !timeline_semaphore_khr) {
|
||||
throw std::runtime_error("Unsupported device: timeline semaphores required");
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && !int8_storage_khr) {
|
||||
throw std::runtime_error("Unsupported device: 8-bit storage required");
|
||||
}
|
||||
|
||||
vk::PhysicalDeviceProperties2 props2;
|
||||
vk::PhysicalDeviceMaintenance3Properties props3;
|
||||
vk::PhysicalDeviceMaintenance4Properties props4;
|
||||
|
|
@ -4765,17 +4788,78 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
device_features2.pNext = nullptr;
|
||||
device_features2.features = (VkPhysicalDeviceFeatures)device_features;
|
||||
|
||||
VkPhysicalDeviceVulkan11Features vk11_features;
|
||||
vk11_features.pNext = nullptr;
|
||||
vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
|
||||
device_features2.pNext = &vk11_features;
|
||||
VkPhysicalDeviceVulkan11Features vk11_features {};
|
||||
VkPhysicalDeviceVulkan12Features vk12_features {};
|
||||
|
||||
VkPhysicalDeviceVulkan12Features vk12_features;
|
||||
vk12_features.pNext = nullptr;
|
||||
vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
|
||||
vk11_features.pNext = &vk12_features;
|
||||
// Used when Vulkan 1.2 API not available
|
||||
VkPhysicalDevice16BitStorageFeatures storage_16bit_features {};
|
||||
VkPhysicalDevice8BitStorageFeatures storage_8bit_features {};
|
||||
VkPhysicalDeviceShaderFloat16Int8Features float16_int8_features {};
|
||||
VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features {};
|
||||
VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features {};
|
||||
VkPhysicalDeviceVulkanMemoryModelFeatures vulkan_memory_model_features {};
|
||||
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore_features {};
|
||||
|
||||
last_struct = (VkBaseOutStructure *)&vk12_features;
|
||||
if (device_is_vulkan_12) {
|
||||
// Use vk11 and vk12 features structures
|
||||
vk11_features.pNext = nullptr;
|
||||
vk11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
|
||||
device_features2.pNext = &vk11_features;
|
||||
|
||||
vk12_features.pNext = nullptr;
|
||||
vk12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
|
||||
vk11_features.pNext = &vk12_features;
|
||||
|
||||
last_struct = (VkBaseOutStructure *)&vk12_features;
|
||||
} else {
|
||||
// Use individual features structures
|
||||
storage_16bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES;
|
||||
storage_16bit_features.pNext = nullptr;
|
||||
device_features2.pNext = &storage_16bit_features;
|
||||
last_struct = (VkBaseOutStructure *)&storage_16bit_features;
|
||||
|
||||
if (int8_storage_khr) {
|
||||
storage_8bit_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
|
||||
storage_8bit_features.pNext = nullptr;
|
||||
last_struct->pNext = (VkBaseOutStructure *)&storage_8bit_features;
|
||||
last_struct = (VkBaseOutStructure *)&storage_8bit_features;
|
||||
}
|
||||
|
||||
if (fp16_compute) {
|
||||
float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
|
||||
float16_int8_features.pNext = nullptr;
|
||||
last_struct->pNext = (VkBaseOutStructure *)&float16_int8_features;
|
||||
last_struct = (VkBaseOutStructure *)&float16_int8_features;
|
||||
}
|
||||
|
||||
if (buffer_device_address_khr) {
|
||||
buffer_device_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR;
|
||||
buffer_device_address_features.pNext = nullptr;
|
||||
last_struct->pNext = (VkBaseOutStructure *)&buffer_device_address_features;
|
||||
last_struct = (VkBaseOutStructure *)&buffer_device_address_features;
|
||||
}
|
||||
|
||||
if (descriptor_indexing_ext) {
|
||||
descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT;
|
||||
descriptor_indexing_features.pNext = nullptr;
|
||||
last_struct->pNext = (VkBaseOutStructure *)&descriptor_indexing_features;
|
||||
last_struct = (VkBaseOutStructure *)&descriptor_indexing_features;
|
||||
}
|
||||
|
||||
if (vulkan_memory_model_khr) {
|
||||
vulkan_memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES;
|
||||
vulkan_memory_model_features.pNext = nullptr;
|
||||
last_struct->pNext = (VkBaseOutStructure *)&vulkan_memory_model_features;
|
||||
last_struct = (VkBaseOutStructure *)&vulkan_memory_model_features;
|
||||
}
|
||||
|
||||
if (timeline_semaphore_khr) {
|
||||
timeline_semaphore_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES;
|
||||
timeline_semaphore_features.pNext = nullptr;
|
||||
last_struct->pNext = (VkBaseOutStructure *)&timeline_semaphore_features;
|
||||
last_struct = (VkBaseOutStructure *)&timeline_semaphore_features;
|
||||
}
|
||||
}
|
||||
|
||||
VkPhysicalDevicePipelineRobustnessFeaturesEXT pl_robustness_features;
|
||||
pl_robustness_features.pNext = nullptr;
|
||||
|
|
@ -4883,9 +4967,29 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
|
||||
vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2);
|
||||
|
||||
bool shader_float16_supported;
|
||||
bool buffer_device_address_supported;
|
||||
bool vulkan_memory_model_supported;
|
||||
bool storage_buffer_16bit_access_supported;
|
||||
bool shader_rounding_mode_rte_fp16;
|
||||
|
||||
if (device_is_vulkan_12) {
|
||||
shader_float16_supported = vk12_features.shaderFloat16;
|
||||
buffer_device_address_supported = vk12_features.bufferDeviceAddress;
|
||||
vulkan_memory_model_supported = vk12_features.vulkanMemoryModel;
|
||||
storage_buffer_16bit_access_supported = vk11_features.storageBuffer16BitAccess;
|
||||
shader_rounding_mode_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16;
|
||||
} else {
|
||||
shader_float16_supported = float16_int8_features.shaderFloat16;
|
||||
buffer_device_address_supported = buffer_device_address_features.bufferDeviceAddress && buffer_device_address_khr;
|
||||
vulkan_memory_model_supported = vulkan_memory_model_features.vulkanMemoryModel && vulkan_memory_model_khr;
|
||||
storage_buffer_16bit_access_supported = storage_16bit_features.storageBuffer16BitAccess;
|
||||
shader_rounding_mode_rte_fp16 = shader_float_controls_khr ? float_controls_props.shaderRoundingModeRTEFloat16 : false;
|
||||
}
|
||||
|
||||
device->pipeline_executable_properties_support = pipeline_executable_properties_support;
|
||||
|
||||
device->fp16 = device->fp16 && vk12_features.shaderFloat16;
|
||||
device->fp16 = device->fp16 && shader_float16_supported;
|
||||
|
||||
#if defined(VK_KHR_shader_bfloat16)
|
||||
device->bf16 = bfloat16_support && bfloat16_features.shaderBFloat16Type;
|
||||
|
|
@ -4895,13 +4999,13 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
|
||||
device->pipeline_robustness = pl_robustness_features.pipelineRobustness;
|
||||
|
||||
device->multi_add = vk12_props.shaderRoundingModeRTEFloat16 &&
|
||||
device->multi_add = shader_rounding_mode_rte_fp16 &&
|
||||
device->properties.limits.maxPushConstantsSize >= sizeof(vk_op_multi_add_push_constants) &&
|
||||
getenv("GGML_VK_DISABLE_MULTI_ADD") == nullptr;
|
||||
|
||||
device->shader_int64 = device_features2.features.shaderInt64;
|
||||
device->buffer_device_address = vk12_features.bufferDeviceAddress;
|
||||
device->vulkan_memory_model = vk12_features.vulkanMemoryModel;
|
||||
device->buffer_device_address = buffer_device_address_supported;
|
||||
device->vulkan_memory_model = vulkan_memory_model_supported;
|
||||
|
||||
if (device->subgroup_size_control) {
|
||||
device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize;
|
||||
|
|
@ -4933,7 +5037,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
coopmat2_features.cooperativeMatrixPerElementOperations &&
|
||||
coopmat2_features.cooperativeMatrixTensorAddressing &&
|
||||
coopmat2_features.cooperativeMatrixBlockLoads &&
|
||||
vk12_features.bufferDeviceAddress) {
|
||||
buffer_device_address_supported) {
|
||||
|
||||
std::vector<VkCooperativeMatrixFlexibleDimensionsPropertiesNV> flexible_dimensions;
|
||||
uint32_t count = 0;
|
||||
|
|
@ -5000,12 +5104,22 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
#endif
|
||||
}
|
||||
|
||||
if (!vk11_features.storageBuffer16BitAccess) {
|
||||
if (!storage_buffer_16bit_access_supported) {
|
||||
std::cerr << "ggml_vulkan: device " << GGML_VK_NAME << idx << " does not support 16-bit storage." << std::endl;
|
||||
throw std::runtime_error("Unsupported device");
|
||||
}
|
||||
|
||||
device_extensions.push_back("VK_KHR_16bit_storage");
|
||||
if (fp16_storage) {
|
||||
device_extensions.push_back("VK_KHR_16bit_storage");
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && timeline_semaphore_khr) {
|
||||
device_extensions.push_back("VK_KHR_timeline_semaphore");
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && int8_storage_khr) {
|
||||
device_extensions.push_back("VK_KHR_8bit_storage");
|
||||
}
|
||||
|
||||
#ifdef GGML_VULKAN_VALIDATE
|
||||
device_extensions.push_back("VK_KHR_shader_non_semantic_info");
|
||||
|
|
@ -5015,6 +5129,22 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
device_extensions.push_back("VK_KHR_shader_float16_int8");
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && device->buffer_device_address) {
|
||||
device_extensions.push_back("VK_KHR_buffer_device_address");
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && device->vulkan_memory_model) {
|
||||
device_extensions.push_back("VK_KHR_vulkan_memory_model");
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && shader_float_controls_khr) {
|
||||
device_extensions.push_back("VK_KHR_shader_float_controls");
|
||||
}
|
||||
|
||||
if (!device_is_vulkan_12 && descriptor_indexing_ext) {
|
||||
device_extensions.push_back("VK_EXT_descriptor_indexing");
|
||||
}
|
||||
|
||||
#if defined(VK_KHR_cooperative_matrix)
|
||||
if (device->coopmat_support) {
|
||||
// Query supported shapes
|
||||
|
|
@ -5200,7 +5330,10 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||
vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_create_info(
|
||||
{},
|
||||
dsl_binding);
|
||||
descriptor_set_layout_create_info.setPNext(&dslbfci);
|
||||
if (device_is_vulkan_12 || descriptor_indexing_ext) {
|
||||
descriptor_set_layout_create_info.setPNext(&dslbfci);
|
||||
}
|
||||
|
||||
device->dsl = device->device.createDescriptorSetLayout(descriptor_set_layout_create_info);
|
||||
|
||||
ggml_vk_load_shaders(device);
|
||||
|
|
|
|||
Loading…
Reference in New Issue