@@ -1423,6 +1423,49 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
1423
1423
return supported;
1424
1424
}
1425
1425
1426
+ struct GpuPipelineConfig {
1427
+ // List of all aliases for a given GPU.
1428
+ // For example, this can include names like "NAVI10", "RX 5700", etc.
1429
+ std::vector<std::string> device_names;
1430
+
1431
+ // Mapping of pipeline names to their specific subgroup sizes.
1432
+ // Example: {"soft_max_f32", 64}.
1433
+ std::unordered_map<std::string, uint32_t > pipelines;
1434
+
1435
+ // Default subgroup size for this GPU.
1436
+ // Defaults to 0 if not explicitly provided.
1437
+ uint32_t default_subgroup_size = 0 ;
1438
+ };
1439
+
1440
+ // Define configurations for different GPUs.
1441
+ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
1442
+ {
1443
+ {" NAVI10" , " NAVI14" , " RX 5700" , " RX 5600" , " RX 5500" },
1444
+ {
1445
+ {" soft_max_f32" , 64 }, {" soft_max_f32_wg512" , 64 },
1446
+ {" soft_max_f32_f16" , 64 }, {" soft_max_f32_f16_wg512" , 64 },
1447
+ {" im2col_f32" , 64 }, {" im2col_f32_f16" , 64 },
1448
+ },
1449
+ 32
1450
+ },
1451
+ };
1452
+
1453
+ static uint32_t get_subgroup_size (const std::string &pipeline_name, const std::string &device_name) {
1454
+ for (const auto &config : gpu_pipeline_configs) {
1455
+ for (const auto &alias : config.device_names ) {
1456
+ if (device_name.find (alias) != std::string::npos) {
1457
+ auto pipIt = config.pipelines .find (pipeline_name);
1458
+ if (pipIt != config.pipelines .end () && pipIt->second != 0 ) {
1459
+ return pipIt->second ;
1460
+ }
1461
+ return config.default_subgroup_size ;
1462
+ }
1463
+ }
1464
+ }
1465
+ // If no matching configuration is found, return 0.
1466
+ return 0 ;
1467
+ }
1468
+
1426
1469
static void ggml_vk_load_shaders (vk_device& device) {
1427
1470
VK_LOG_DEBUG (" ggml_vk_load_shaders(" << device->name << " )" );
1428
1471
@@ -1543,11 +1586,17 @@ static void ggml_vk_load_shaders(vk_device& device) {
1543
1586
device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
1544
1587
}
1545
1588
1589
+ vk::PhysicalDeviceProperties2 props2;
1590
+ device->physical_device .getProperties2 (&props2);
1591
+ std::string device_name = props2.properties .deviceName .data ();
1592
+
1546
1593
std::vector<std::future<void >> compiles;
1547
1594
auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void * spv_data, const std::string &entrypoint,
1548
1595
uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t , 3 > wg_denoms, const std::vector<uint32_t >& specialization_constants,
1549
1596
uint32_t align, bool disable_robustness = false , bool require_full_subgroups = false , uint32_t required_subgroup_size = 0 ) {
1550
1597
1598
+ required_subgroup_size = get_subgroup_size (name, device_name);
1599
+
1551
1600
if (!pipeline) {
1552
1601
pipeline = std::make_shared<vk_pipeline_struct>();
1553
1602
pipeline->name = name;
@@ -2699,7 +2748,9 @@ static void ggml_vk_print_gpu_info(size_t idx) {
2699
2748
subgroup_props.pNext = &driver_props;
2700
2749
physical_device.getProperties2 (&props2);
2701
2750
2702
- const size_t subgroup_size = subgroup_props.subgroupSize ;
2751
+ uint32_t default_subgroup_size = get_subgroup_size (" " , props2.properties .deviceName .data ());
2752
+ const size_t subgroup_size = (default_subgroup_size != 0 ) ? default_subgroup_size : subgroup_props.subgroupSize ;
2753
+
2703
2754
const bool uma = props2.properties .deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
2704
2755
2705
2756
bool fp16_storage = false ;
0 commit comments