Skip to content

Commit 7002d6c

Browse files
committed
Vulkan: Add VK_AMD_shader_core_properties2 support to read Compute Unit count for split_k logic
1 parent 9622fbe commit 7002d6c

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1701,13 +1701,16 @@ static vk_device ggml_vk_get_device(size_t idx) {
17011701

17021702
bool maintenance4_support = false;
17031703
bool sm_builtins = false;
1704+
bool amd_shader_core_properties2 = false;
17041705

17051706
// Check if maintenance4 is supported
17061707
for (const auto& properties : ext_props) {
17071708
if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) {
17081709
maintenance4_support = true;
17091710
} else if (strcmp("VK_NV_shader_sm_builtins", properties.extensionName) == 0) {
17101711
sm_builtins = true;
1712+
} else if (strcmp("VK_AMD_shader_core_properties2", properties.extensionName) == 0) {
1713+
amd_shader_core_properties2 = true;
17111714
}
17121715
}
17131716

@@ -1716,6 +1719,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
17161719
vk::PhysicalDeviceMaintenance4Properties props4;
17171720
vk::PhysicalDeviceSubgroupProperties subgroup_props;
17181721
vk::PhysicalDeviceShaderSMBuiltinsPropertiesNV sm_props;
1722+
vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props;
17191723
props2.pNext = &props3;
17201724
props3.pNext = &subgroup_props;
17211725

@@ -1729,6 +1733,10 @@ static vk_device ggml_vk_get_device(size_t idx) {
17291733
last_struct->pNext = (VkBaseOutStructure *)&sm_props;
17301734
last_struct = (VkBaseOutStructure *)&sm_props;
17311735
}
1736+
if (amd_shader_core_properties2) {
1737+
last_struct->pNext = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
1738+
last_struct = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
1739+
}
17321740

17331741
device->physical_device.getProperties2(&props2);
17341742
device->properties = props2.properties;
@@ -1748,6 +1756,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
17481756
device->uma = device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
17491757
if (sm_builtins) {
17501758
device->shader_core_count = sm_props.shaderSMCount;
1759+
} else if (amd_shader_core_properties2) {
1760+
device->shader_core_count = amd_shader_core_properties2_props.activeComputeUnitCount;
17511761
} else {
17521762
device->shader_core_count = 0;
17531763
}
@@ -1822,7 +1832,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
18221832
vk11_features.pNext = &vk12_features;
18231833

18241834
// Pointer to the last chain element
1825-
VkBaseOutStructure * last_struct = (VkBaseOutStructure *)&vk12_features;
1835+
last_struct = (VkBaseOutStructure *)&vk12_features;
18261836

18271837
VkPhysicalDevicePipelineRobustnessFeaturesEXT pl_robustness_features;
18281838
pl_robustness_features.pNext = nullptr;
@@ -1952,6 +1962,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
19521962
// Shaders
19531963
// Disable matmul tile sizes early if performance low or not supported
19541964
switch (device->vendor_id) {
1965+
#ifndef GGML_VULKAN_RUN_TESTS
19551966
case VK_VENDOR_ID_AMD:
19561967
case VK_VENDOR_ID_INTEL:
19571968
device->mul_mat_l = false;
@@ -1969,6 +1980,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
19691980
device->mul_mat_id_m = true;
19701981
device->mul_mat_id_s = false;
19711982
break;
1983+
#endif
19721984
default:
19731985
device->mul_mat_l = true;
19741986
device->mul_mat_m = true;

0 commit comments

Comments
 (0)