Skip to content

Commit 6369be0

Browse files
ckastnerslaren
andauthored
Implement GGML_CPU_ALL_VARIANTS for PowerPC (#14286)
* Add PowerPC feature detection and scoring * ggml-cpu: Implement GGML_CPU_ALL_VARIANTS for PowerPC * ggml-cpu: Delay some initializations until function is called When using GGML_BACKEND_DL=ON, these initializations might use instructions that are not supported by the current CPU. --------- Co-authored-by: Diego Devesa <[email protected]>
1 parent 88fc854 commit 6369be0

File tree

4 files changed

+135
-14
lines changed

4 files changed

+135
-14
lines changed

ggml/src/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name)
286286
foreach (feat ${ARGN})
287287
set(GGML_INTERNAL_${feat} ON)
288288
endforeach()
289+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
290+
foreach (feat ${ARGN})
291+
set(GGML_INTERNAL_${feat} ON)
292+
endforeach()
289293
endif()
290294

291295
ggml_add_cpu_backend_variant_impl(${tag_name})
@@ -337,6 +341,19 @@ if (GGML_CPU_ALL_VARIANTS)
337341
else()
338342
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
339343
endif()
344+
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
345+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
346+
ggml_add_cpu_backend_variant(power0)
347+
ggml_add_cpu_backend_variant(power7_1 POWER7)
348+
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
349+
ggml_add_cpu_backend_variant(power8_1 POWER8)
350+
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
351+
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
352+
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
353+
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
354+
else()
355+
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
356+
endif()
340357
else()
341358
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
342359
endif()

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
388388
else()
389389
list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
390390
endif()
391+
elseif(GGML_CPU_ALL_VARIANTS)
392+
# Begin with the lowest baseline
393+
set(ARCH_DEFINITIONS "")
394+
395+
# When a feature is selected, bump the MCPU to the first
396+
# version that supported it
397+
foreach(PVER RANGE 7 11)
398+
if(DEFINED GGML_INTERNAL_POWER${PVER})
399+
set(POWERPC_MCPU "power${PVER}")
400+
list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER})
401+
endif()
402+
endforeach()
403+
if (GGML_INTERNAL_VSX)
404+
list(APPEND ARCH_DEFINITIONS GGML_USE_VSX)
405+
list(APPEND ARCH_FLAGS -mvsx)
406+
endif()
407+
408+
if (DEFINED POWERPC_MCPU)
409+
list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU})
410+
endif()
411+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS})
391412
else()
392413
if (GGML_CPU_POWERPC_CPUTYPE)
393414
list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE})
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# include "ggml-backend-impl.h"
2+
3+
#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
4+
5+
#if defined(__linux__)
6+
#include <sys/auxv.h>
7+
#endif
8+
9+
#include <string>
10+
11+
struct powerpc_features {
12+
std::string platform = "";
13+
int power_version = -1;
14+
15+
bool has_vsx = false;
16+
17+
powerpc_features() {
18+
#if defined(__linux__)
19+
unsigned long auxval = getauxval(AT_PLATFORM);
20+
if (auxval) {
21+
platform = std::string(reinterpret_cast<const char*>(auxval));
22+
// TBD: Do systems exist that return this in uppercase?
23+
if (platform.substr(0, 5) == "power") {
24+
// Extractt a numeric suffix, if one exists
25+
int vpos = -1;
26+
for (int i = platform.length() - 1; i >= 0; i--) {
27+
if (std::isdigit(platform[i])) {
28+
vpos = i;
29+
} else {
30+
break;
31+
}
32+
}
33+
if (vpos > -1) {
34+
power_version = std::stoi(platform.substr(vpos));
35+
}
36+
}
37+
}
38+
#endif
39+
if (power_version >= 9) {
40+
has_vsx = true;
41+
}
42+
}
43+
};
44+
45+
static int ggml_backend_cpu_powerpc_score() {
46+
int score = 1;
47+
powerpc_features pf;
48+
49+
// Platform scores
50+
#if defined(GGML_USE_POWER7)
51+
if (pf.power_version < 7) { return 0; }
52+
score += 1<<1;
53+
#endif
54+
#if defined(GGML_USE_POWER8)
55+
if (pf.power_version < 8) { return 0; }
56+
score += 1<<2;
57+
#endif
58+
#if defined(GGML_USE_POWER9)
59+
if (pf.power_version < 9) { return 0; }
60+
score += 1<<3;
61+
#endif
62+
#if defined(GGML_USE_POWER10)
63+
if (pf.power_version < 10) { return 0; }
64+
score += 1<<4;
65+
#endif
66+
#if defined(GGML_USE_POWER11)
67+
if (pf.power_version < 11) { return 0; }
68+
score += 1<<5;
69+
#endif
70+
71+
// Feature scores
72+
#if defined(GGML_USE_VSX)
73+
if (!pf.has_vsx) { return 0; }
74+
score += 1<<6;
75+
#endif
76+
77+
return score;
78+
}
79+
80+
GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score)
81+
82+
#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)

ggml/src/ggml-cpu/repack.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,44 +1411,45 @@ template <typename BLOC_TYPE, int64_t INTER_SIZE, int64_t NB_COLS, ggml_type PAR
14111411
}
14121412
};
14131413

1414-
// instance for Q4
1415-
static const tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1416-
static const tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1417-
static const tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1418-
static const tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1419-
1420-
// instance for IQ4
1421-
static const tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1422-
14231414
} // namespace ggml::cpu::repack
14241415

14251416
static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) {
1417+
1418+
// instance for Q4
1419+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 4, 4, GGML_TYPE_Q8_0> q4_0_4x4_q8_0;
1420+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 4, GGML_TYPE_Q8_0> q4_0_4x8_q8_0;
1421+
static const ggml::cpu::repack::tensor_traits<block_q4_0, 8, 8, GGML_TYPE_Q8_0> q4_0_8x8_q8_0;
1422+
static const ggml::cpu::repack::tensor_traits<block_q4_K, 8, 8, GGML_TYPE_Q8_K> q4_K_8x8_q8_K;
1423+
1424+
// instance for IQ4
1425+
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
1426+
14261427
if (cur->type == GGML_TYPE_Q4_0) {
14271428
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
14281429
if (cur->ne[1] % 8 == 0) {
1429-
return &ggml::cpu::repack::q4_0_8x8_q8_0;
1430+
return &q4_0_8x8_q8_0;
14301431
}
14311432
}
14321433
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
14331434
if (cur->ne[1] % 4 == 0) {
1434-
return &ggml::cpu::repack::q4_0_4x8_q8_0;
1435+
return &q4_0_4x8_q8_0;
14351436
}
14361437
}
14371438
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14381439
if (cur->ne[1] % 4 == 0) {
1439-
return &ggml::cpu::repack::q4_0_4x4_q8_0;
1440+
return &q4_0_4x4_q8_0;
14401441
}
14411442
}
14421443
} else if (cur->type == GGML_TYPE_Q4_K) {
14431444
if (ggml_cpu_has_avx2()) {
14441445
if (cur->ne[1] % 8 == 0) {
1445-
return &ggml::cpu::repack::q4_K_8x8_q8_K;
1446+
return &q4_K_8x8_q8_K;
14461447
}
14471448
}
14481449
} else if (cur->type == GGML_TYPE_IQ4_NL) {
14491450
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
14501451
if (cur->ne[1] % 4 == 0) {
1451-
return &ggml::cpu::repack::iq4_nl_4x4_q8_0;
1452+
return &iq4_nl_4x4_q8_0;
14521453
}
14531454
}
14541455
}

0 commit comments

Comments
 (0)