@@ -1701,13 +1701,16 @@ static vk_device ggml_vk_get_device(size_t idx) {
1701
1701
1702
1702
bool maintenance4_support = false ;
1703
1703
bool sm_builtins = false ;
1704
+ bool amd_shader_core_properties2 = false ;
1704
1705
1705
1706
// Check if maintenance4 is supported
1706
1707
for (const auto & properties : ext_props) {
1707
1708
if (strcmp (" VK_KHR_maintenance4" , properties.extensionName ) == 0 ) {
1708
1709
maintenance4_support = true ;
1709
1710
} else if (strcmp (" VK_NV_shader_sm_builtins" , properties.extensionName ) == 0 ) {
1710
1711
sm_builtins = true ;
1712
+ } else if (strcmp (" VK_AMD_shader_core_properties2" , properties.extensionName ) == 0 ) {
1713
+ amd_shader_core_properties2 = true ;
1711
1714
}
1712
1715
}
1713
1716
@@ -1716,6 +1719,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
1716
1719
vk::PhysicalDeviceMaintenance4Properties props4;
1717
1720
vk::PhysicalDeviceSubgroupProperties subgroup_props;
1718
1721
vk::PhysicalDeviceShaderSMBuiltinsPropertiesNV sm_props;
1722
+ vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props;
1719
1723
props2.pNext = &props3;
1720
1724
props3.pNext = &subgroup_props;
1721
1725
@@ -1729,6 +1733,10 @@ static vk_device ggml_vk_get_device(size_t idx) {
1729
1733
last_struct->pNext = (VkBaseOutStructure *)&sm_props;
1730
1734
last_struct = (VkBaseOutStructure *)&sm_props;
1731
1735
}
1736
+ if (amd_shader_core_properties2) {
1737
+ last_struct->pNext = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
1738
+ last_struct = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
1739
+ }
1732
1740
1733
1741
device->physical_device .getProperties2 (&props2);
1734
1742
device->properties = props2.properties ;
@@ -1748,6 +1756,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
1748
1756
device->uma = device->properties .deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
1749
1757
if (sm_builtins) {
1750
1758
device->shader_core_count = sm_props.shaderSMCount ;
1759
+ } else if (amd_shader_core_properties2) {
1760
+ device->shader_core_count = amd_shader_core_properties2_props.activeComputeUnitCount ;
1751
1761
} else {
1752
1762
device->shader_core_count = 0 ;
1753
1763
}
@@ -1822,7 +1832,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
1822
1832
vk11_features.pNext = &vk12_features;
1823
1833
1824
1834
// Pointer to the last chain element
1825
- VkBaseOutStructure * last_struct = (VkBaseOutStructure *)&vk12_features;
1835
+ last_struct = (VkBaseOutStructure *)&vk12_features;
1826
1836
1827
1837
VkPhysicalDevicePipelineRobustnessFeaturesEXT pl_robustness_features;
1828
1838
pl_robustness_features.pNext = nullptr ;
@@ -1952,6 +1962,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
1952
1962
// Shaders
1953
1963
// Disable matmul tile sizes early if performance low or not supported
1954
1964
switch (device->vendor_id ) {
1965
+ #ifndef GGML_VULKAN_RUN_TESTS
1955
1966
case VK_VENDOR_ID_AMD:
1956
1967
case VK_VENDOR_ID_INTEL:
1957
1968
device->mul_mat_l = false ;
@@ -1969,6 +1980,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
1969
1980
device->mul_mat_id_m = true ;
1970
1981
device->mul_mat_id_s = false ;
1971
1982
break ;
1983
+ #endif
1972
1984
default :
1973
1985
device->mul_mat_l = true ;
1974
1986
device->mul_mat_m = true ;
0 commit comments