@@ -1423,33 +1423,46 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
1423
1423
return supported;
1424
1424
}
1425
1425
1426
- // Define a configuration map per GPU.
1427
- // Outer key: GPU identifier (e.g. "NAVI1").
1428
- // Inner map: key is pipeline name; value is the subgroup size.
1429
- static std::unordered_map<std::string, std::unordered_map<std::string, uint32_t >> gpu_pipeline_config = {
1430
- {" NAVI1" , {
1431
- {" soft_max_f32" , 64 }, {" soft_max_f32_wg512" , 64 },
1432
- {" soft_max_f32_f16" , 64 }, {" soft_max_f32_f16_wg512" , 64 },
1433
- {" im2col_f32" , 64 }, {" im2col_f32_f16" , 64 },
1434
- }}
1426
+ struct GpuPipelineConfig {
1427
+ // List of all aliases for a given GPU.
1428
+ // For example, this can include names like "NAVI10", "RX 5700", etc.
1429
+ std::vector<std::string> device_names;
1430
+
1431
+ // Mapping of pipeline names to their specific subgroup sizes.
1432
+ // Example: {"soft_max_f32", 64}.
1433
+ std::unordered_map<std::string, uint32_t > pipelines;
1434
+
1435
+ // Default subgroup size for this GPU.
1436
+ // Defaults to 0 if not explicitly provided.
1437
+ uint32_t default_subgroup_size = 0 ;
1438
+ };
1439
+
1440
+ // Define configurations for different GPUs.
1441
+ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
1442
+ {
1443
+ {" NAVI10" , " NAVI14" , " RX 5700" , " RX 5600" , " RX 5500" },
1444
+ {
1445
+ {" soft_max_f32" , 64 }, {" soft_max_f32_wg512" , 64 },
1446
+ {" soft_max_f32_f16" , 64 }, {" soft_max_f32_f16_wg512" , 64 },
1447
+ {" im2col_f32" , 64 }, {" im2col_f32_f16" , 64 },
1448
+ },
1449
+ 32
1450
+ },
1435
1451
};
1436
1452
1437
1453
static uint32_t get_subgroup_size (const std::string &pipeline_name, const std::string &device_name) {
1438
- std::string foundKey;
1439
- for (const auto &entry : gpu_pipeline_config) {
1440
- if (device_name.find (entry.first ) != std::string::npos) {
1441
- foundKey = entry.first ;
1442
- break ;
1443
- }
1444
- }
1445
- if (!foundKey.empty ()) {
1446
- auto &pipelineMap = gpu_pipeline_config[foundKey];
1447
- auto pipIt = pipelineMap.find (pipeline_name);
1448
- if (pipIt != pipelineMap.end () && pipIt->second != 0 ) {
1449
- return pipIt->second ;
1454
+ for (const auto &config : gpu_pipeline_configs) {
1455
+ for (const auto &alias : config.device_names ) {
1456
+ if (device_name.find (alias) != std::string::npos) {
1457
+ auto pipIt = config.pipelines .find (pipeline_name);
1458
+ if (pipIt != config.pipelines .end () && pipIt->second != 0 ) {
1459
+ return pipIt->second ;
1460
+ }
1461
+ return config.default_subgroup_size ;
1462
+ }
1450
1463
}
1451
1464
}
1452
- // If not defined , return 0.
1465
+ // If no matching configuration is found , return 0.
1453
1466
return 0 ;
1454
1467
}
1455
1468
@@ -1583,9 +1596,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
1583
1596
uint32_t align, bool disable_robustness = false , bool require_full_subgroups = false , uint32_t required_subgroup_size = 0 ) {
1584
1597
1585
1598
required_subgroup_size = get_subgroup_size (name, device_name);
1586
- if (required_subgroup_size == 0 ) {
1587
- required_subgroup_size = (device_name.find (" NAVI1" ) != std::string::npos) ? 32 : required_subgroup_size;
1588
- }
1589
1599
1590
1600
if (!pipeline) {
1591
1601
pipeline = std::make_shared<vk_pipeline_struct>();
0 commit comments