|
1 | 1 | #include "ggml-vulkan.h"
|
2 | 2 | #include <vulkan/vulkan_core.h>
|
| 3 | +#include <cstdint> |
3 | 4 | #if defined(GGML_VULKAN_RUN_TESTS) || defined(GGML_VULKAN_PERF) || defined(GGML_VULKAN_CHECK_RESULTS)
|
4 | 5 | #include <chrono>
|
5 | 6 | #include "ggml-cpu.h"
|
@@ -1423,33 +1424,46 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
|
1423 | 1424 | return supported;
|
1424 | 1425 | }
|
1425 | 1426 |
|
1426 |
| -// Define a configuration map per GPU. |
1427 |
| -// Outer key: GPU identifier (e.g. "NAVI1"). |
1428 |
| -// Inner map: key is pipeline name; value is the subgroup size. |
1429 |
| -static std::unordered_map<std::string, std::unordered_map<std::string, uint32_t>> gpu_pipeline_config = { |
1430 |
| - {"NAVI1", { |
1431 |
| - {"soft_max_f32", 64}, {"soft_max_f32_wg512", 64}, |
1432 |
| - {"soft_max_f32_f16", 64}, {"soft_max_f32_f16_wg512", 64}, |
1433 |
| - {"im2col_f32", 64}, {"im2col_f32_f16", 64}, |
1434 |
| - }} |
| 1427 | +struct GpuPipelineConfig { |
| 1428 | + // List of all aliases for a given GPU. |
| 1429 | + // For example, this can include names like "NAVI10", "RX 5700", etc. |
| 1430 | + std::vector<std::string> device_names; |
| 1431 | + |
| 1432 | + // Mapping of pipeline names to their specific subgroup sizes. |
| 1433 | + // Example: {"soft_max_f32", 64}. |
| 1434 | + std::unordered_map<std::string, uint32_t> pipelines; |
| 1435 | + |
| 1436 | + // Default subgroup size for this GPU. |
| 1437 | + // Defaults to 0 if not explicitly provided. |
| 1438 | + uint32_t default_subgroup_size = 0; |
| 1439 | +}; |
| 1440 | + |
| 1441 | +// Define configurations for different GPUs. |
| 1442 | +static std::vector<GpuPipelineConfig> gpu_pipeline_configs = { |
| 1443 | + { |
| 1444 | + {"NAVI10", "NAVI14", "RX 5700", "RX 5600", "RX 5500"}, |
| 1445 | + { |
| 1446 | + {"soft_max_f32", 64}, {"soft_max_f32_wg512", 64}, |
| 1447 | + {"soft_max_f32_f16", 64}, {"soft_max_f32_f16_wg512", 64}, |
| 1448 | + {"im2col_f32", 64}, {"im2col_f32_f16", 64}, |
| 1449 | + }, |
| 1450 | + 32 |
| 1451 | + }, |
1435 | 1452 | };
|
1436 | 1453 |
|
1437 | 1454 | static uint32_t get_subgroup_size(const std::string &pipeline_name, const std::string &device_name) {
|
1438 |
| - std::string foundKey; |
1439 |
| - for (const auto &entry : gpu_pipeline_config) { |
1440 |
| - if (device_name.find(entry.first) != std::string::npos) { |
1441 |
| - foundKey = entry.first; |
1442 |
| - break; |
1443 |
| - } |
1444 |
| - } |
1445 |
| - if (!foundKey.empty()) { |
1446 |
| - auto &pipelineMap = gpu_pipeline_config[foundKey]; |
1447 |
| - auto pipIt = pipelineMap.find(pipeline_name); |
1448 |
| - if (pipIt != pipelineMap.end() && pipIt->second != 0) { |
1449 |
| - return pipIt->second; |
| 1455 | + for (const auto &config : gpu_pipeline_configs) { |
| 1456 | + for (const auto &alias : config.device_names) { |
| 1457 | + if (device_name.find(alias) != std::string::npos) { |
| 1458 | + auto pipIt = config.pipelines.find(pipeline_name); |
| 1459 | + if (pipIt != config.pipelines.end() && pipIt->second != 0) { |
| 1460 | + return pipIt->second; |
| 1461 | + } |
| 1462 | + return config.default_subgroup_size; |
| 1463 | + } |
1450 | 1464 | }
|
1451 | 1465 | }
|
1452 |
| - // If not defined, return 0. |
| 1466 | + // If no matching configuration is found, return 0. |
1453 | 1467 | return 0;
|
1454 | 1468 | }
|
1455 | 1469 |
|
@@ -1583,9 +1597,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
1583 | 1597 | uint32_t align, bool disable_robustness = false, bool require_full_subgroups = false, uint32_t required_subgroup_size = 0) {
|
1584 | 1598 |
|
1585 | 1599 | required_subgroup_size = get_subgroup_size(name, device_name);
|
1586 |
| - if (required_subgroup_size == 0) { |
1587 |
| - required_subgroup_size = (device_name.find("NAVI1") != std::string::npos) ? 32 : required_subgroup_size; |
1588 |
| - } |
1589 | 1600 |
|
1590 | 1601 | if (!pipeline) {
|
1591 | 1602 | pipeline = std::make_shared<vk_pipeline_struct>();
|
|
0 commit comments