@@ -1974,6 +1974,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
1974
1974
bool fp16_compute = false ;
1975
1975
bool maintenance4_support = false ;
1976
1976
bool sm_builtins = false ;
1977
+ bool amd_shader_core_properties2 = false ;
1977
1978
bool pipeline_robustness = false ;
1978
1979
bool coopmat2_support = false ;
1979
1980
device->coopmat_support = false ;
@@ -1988,6 +1989,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
1988
1989
fp16_compute = true ;
1989
1990
} else if (strcmp (" VK_NV_shader_sm_builtins" , properties.extensionName ) == 0 ) {
1990
1991
sm_builtins = true ;
1992
+ } else if (strcmp (" VK_AMD_shader_core_properties2" , properties.extensionName ) == 0 ) {
1993
+ amd_shader_core_properties2 = true ;
1991
1994
} else if (strcmp (" VK_EXT_pipeline_robustness" , properties.extensionName ) == 0 ) {
1992
1995
pipeline_robustness = true ;
1993
1996
} else if (strcmp (" VK_KHR_cooperative_matrix" , properties.extensionName ) == 0 ) {
@@ -2006,6 +2009,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
2006
2009
vk::PhysicalDeviceMaintenance4Properties props4;
2007
2010
vk::PhysicalDeviceSubgroupProperties subgroup_props;
2008
2011
vk::PhysicalDeviceShaderSMBuiltinsPropertiesNV sm_props;
2012
+ vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props;
2009
2013
props2.pNext = &props3;
2010
2014
props3.pNext = &subgroup_props;
2011
2015
@@ -2019,6 +2023,10 @@ static vk_device ggml_vk_get_device(size_t idx) {
2019
2023
last_struct->pNext = (VkBaseOutStructure *)&sm_props;
2020
2024
last_struct = (VkBaseOutStructure *)&sm_props;
2021
2025
}
2026
+ if (amd_shader_core_properties2) {
2027
+ last_struct->pNext = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
2028
+ last_struct = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
2029
+ }
2022
2030
2023
2031
#if defined(VK_NV_cooperative_matrix2)
2024
2032
vk::PhysicalDeviceCooperativeMatrix2PropertiesNV coopmat2_props;
@@ -2046,6 +2054,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
2046
2054
device->uma = device->properties .deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
2047
2055
if (sm_builtins) {
2048
2056
device->shader_core_count = sm_props.shaderSMCount ;
2057
+ } else if (amd_shader_core_properties2) {
2058
+ device->shader_core_count = amd_shader_core_properties2_props.activeComputeUnitCount ;
2049
2059
} else {
2050
2060
device->shader_core_count = 0 ;
2051
2061
}
@@ -2314,6 +2324,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
2314
2324
// Shaders
2315
2325
// Disable matmul tile sizes early if performance low or not supported
2316
2326
switch (device->vendor_id ) {
2327
+ #ifndef GGML_VULKAN_RUN_TESTS
2317
2328
case VK_VENDOR_ID_AMD:
2318
2329
case VK_VENDOR_ID_INTEL:
2319
2330
device->mul_mat_l = false ;
@@ -2331,6 +2342,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
2331
2342
device->mul_mat_id_m = true ;
2332
2343
device->mul_mat_id_s = false ;
2333
2344
break ;
2345
+ #endif
2334
2346
default :
2335
2347
device->mul_mat_l = true ;
2336
2348
device->mul_mat_m = true ;
0 commit comments