@@ -1485,12 +1485,12 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
1485
1485
}
1486
1486
1487
1487
struct GpuPipelineConfig {
1488
- // List of all aliases for a given GPU .
1489
- // For example, this can include names like "NAVI10", "RX 5700", etc.
1490
- std::vector<std::string> device_names ;
1488
+ // GPU architecture identifier .
1489
+ // Example: vk_device_architecture::AMD_GCN
1490
+ vk_device_architecture arch ;
1491
1491
1492
1492
// Mapping of pipeline names to their specific subgroup sizes.
1493
- // Example: {"soft_max_f32", 64}.
1493
+ // Example: {"soft_max_f32", 64}
1494
1494
std::unordered_map<std::string, uint32_t > pipelines;
1495
1495
1496
1496
// Default subgroup size for this GPU.
@@ -1501,7 +1501,7 @@ struct GpuPipelineConfig {
1501
1501
// Define configurations for different GPUs.
1502
1502
static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
1503
1503
{
1504
- { " NAVI10 " , " NAVI14 " , " RX 5700 " , " RX 5600 " , " RX 5500 " } ,
1504
+ vk_device_architecture::AMD_RDNA1 ,
1505
1505
{
1506
1506
{" soft_max_f32" , 64 }, {" soft_max_f32_wg512" , 64 },
1507
1507
{" soft_max_f32_f16" , 64 }, {" soft_max_f32_f16_wg512" , 64 },
@@ -1511,16 +1511,14 @@ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
1511
1511
},
1512
1512
};
1513
1513
1514
- static uint32_t get_subgroup_size (const std::string &pipeline_name, const std::string &device_name ) {
1514
+ static uint32_t get_subgroup_size (const std::string &pipeline_name, const vk_device_architecture &arch ) {
1515
1515
for (const auto &config : gpu_pipeline_configs) {
1516
- for (const auto &alias : config.device_names ) {
1517
- if (device_name.find (alias) != std::string::npos) {
1518
- auto pipIt = config.pipelines .find (pipeline_name);
1519
- if (pipIt != config.pipelines .end () && pipIt->second != 0 ) {
1520
- return pipIt->second ;
1521
- }
1522
- return config.default_subgroup_size ;
1516
+ if (config.arch == arch) {
1517
+ auto pipIt = config.pipelines .find (pipeline_name);
1518
+ if (pipIt != config.pipelines .end () && pipIt->second != 0 ) {
1519
+ return pipIt->second ;
1523
1520
}
1521
+ return config.default_subgroup_size ;
1524
1522
}
1525
1523
}
1526
1524
// If no matching configuration is found, return 0.
@@ -1647,16 +1645,12 @@ static void ggml_vk_load_shaders(vk_device& device) {
1647
1645
device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
1648
1646
}
1649
1647
1650
- vk::PhysicalDeviceProperties2 props2;
1651
- device->physical_device .getProperties2 (&props2);
1652
- std::string device_name = props2.properties .deviceName .data ();
1653
-
1654
1648
std::vector<std::future<void >> compiles;
1655
1649
auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void * spv_data, const std::string &entrypoint,
1656
1650
uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t , 3 > wg_denoms, const std::vector<uint32_t >& specialization_constants,
1657
1651
uint32_t align, bool disable_robustness = false , bool require_full_subgroups = false , uint32_t required_subgroup_size = 0 ) {
1658
1652
1659
- required_subgroup_size = get_subgroup_size (name, device_name );
1653
+ required_subgroup_size = get_subgroup_size (name, device-> architecture );
1660
1654
1661
1655
if (!pipeline) {
1662
1656
pipeline = std::make_shared<vk_pipeline_struct>();
@@ -2810,7 +2804,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
2810
2804
subgroup_props.pNext = &driver_props;
2811
2805
physical_device.getProperties2 (&props2);
2812
2806
2813
- uint32_t default_subgroup_size = get_subgroup_size (" " , props2.properties .deviceName .data ());
2807
+ vk_device_architecture arch = get_device_architecture (physical_device);
2808
+ uint32_t default_subgroup_size = get_subgroup_size (" " , arch);
2814
2809
const size_t subgroup_size = (default_subgroup_size != 0 ) ? default_subgroup_size : subgroup_props.subgroupSize ;
2815
2810
2816
2811
const bool uma = props2.properties .deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
0 commit comments