@@ -175,6 +175,39 @@ typedef double ggml_float;
175
175
#define GGML_COMPUTE_FP16_TO_FP32 (x ) _cvtsh_ss(x)
176
176
#define GGML_COMPUTE_FP32_TO_FP16 (x ) _cvtss_sh(x, 0)
177
177
178
+ #elif defined(__POWER9_VECTOR__ )
179
+
180
+ #define GGML_COMPUTE_FP16_TO_FP32 (x ) ggml_compute_fp16_to_fp32(x)
181
+ #define GGML_COMPUTE_FP32_TO_FP16 (x ) ggml_compute_fp32_to_fp16(x)
182
+ /* the inline asm below is about 12% faster than the lookup method */
183
+ #define GGML_FP16_TO_FP32 (x ) GGML_COMPUTE_FP16_TO_FP32(x)
184
+ #define GGML_FP32_TO_FP16 (x ) GGML_COMPUTE_FP32_TO_FP16(x)
185
+
186
+ static inline float ggml_compute_fp16_to_fp32 (ggml_fp16_t h ) {
187
+ register float f ;
188
+ register double d ;
189
+ __asm__(
190
+ "mtfprd %0,%2\n"
191
+ "xscvhpdp %0,%0\n"
192
+ "frsp %1,%0\n" :
193
+ /* temp */ "=d" (d ),
194
+ /* out */ "=f" (f ):
195
+ /* in */ "r" (h ));
196
+ return f ;
197
+ }
198
+
199
+ static inline ggml_fp16_t ggml_compute_fp32_to_fp16 (float f ) {
200
+ register double d ;
201
+ register ggml_fp16_t r ;
202
+ __asm__( /* xscvdphp can work on double or single precision */
203
+ "xscvdphp %0,%2\n"
204
+ "mffprd %1,%0\n" :
205
+ /* temp */ "=d" (d ),
206
+ /* out */ "=r" (r ):
207
+ /* in */ "f" (f ));
208
+ return r ;
209
+ }
210
+
178
211
#else
179
212
180
213
// FP16 <-> FP32
@@ -272,6 +305,7 @@ static float table_f32_f16[1 << 16];
272
305
273
306
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
274
307
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
308
+ // This is also true for POWER9.
275
309
#if !defined(GGML_FP16_TO_FP32 ) || !defined(GGML_FP32_TO_FP16 )
276
310
277
311
inline static float ggml_lookup_fp16_to_fp32 (ggml_fp16_t f ) {
@@ -462,7 +496,7 @@ static void quantize_row_q4_0_reference(const float * restrict x, void * restric
462
496
void quantize_row_q4_0 (const float * restrict x , void * restrict y , int k ) {
463
497
assert (k % QK == 0 );
464
498
465
- #if __ARM_NEON || defined(__AVX2__ ) || defined(__wasm_simd128__ )
499
+ #if __ARM_NEON || defined(__AVX2__ ) || defined(__wasm_simd128__ ) || defined( __POWER9_VECTOR__ )
466
500
const int nb = k / QK ;
467
501
const size_t bs = sizeof (float ) + QK /2 ;
468
502
@@ -472,7 +506,52 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
472
506
uint8_t pp [QK /2 ];
473
507
#endif
474
508
475
- #if __ARM_NEON
509
+ #if defined(__POWER9_VECTOR__ )
510
+ #if QK == 32
511
+ const vector float v85 = vec_splats (8.5f );
512
+ for (int i = 0 ; i < nb ; i ++ ) {
513
+ float amax = 0.0f ; // absolute max
514
+
515
+ vector float srcv [8 ];
516
+ vector float asrcv [8 ];
517
+ vector float amaxv [8 ];
518
+
519
+ for (int l = 0 ; l < 8 ; l ++ ) srcv [l ] = * (vector float * )(x + i * 32 + 4 * l );
520
+ for (int l = 0 ; l < 8 ; l ++ ) asrcv [l ] = vec_abs (srcv [l ]);
521
+
522
+ for (int l = 0 ; l < 4 ; l ++ ) amaxv [2 * l ] = vec_max (asrcv [2 * l ], asrcv [2 * l + 1 ]);
523
+ //for (int l = 0; l < 2; l++) amaxv[4*l] = vec_max(amaxv[4*l], amaxv[4*l+2]);
524
+ amaxv [0 ] = vec_max (amaxv [0 ], amaxv [2 ]);
525
+ amaxv [4 ] = vec_max (amaxv [4 ], amaxv [6 ]);
526
+ //for (int l = 0; l < 1; l++) amaxv[8*l] = vec_max(amaxv[8*l], amaxv[8*l+4]);
527
+ amaxv [0 ] = vec_max (amaxv [0 ], amaxv [4 ]);
528
+
529
+ amax = MAX (
530
+ MAX (vec_extract (amaxv [0 ], 0 ), vec_extract (amaxv [0 ], 1 )),
531
+ MAX (vec_extract (amaxv [0 ], 2 ), vec_extract (amaxv [0 ], 3 )));
532
+
533
+ const float d = amax / ((1 << 3 ) - 1 );
534
+ const float id = d ? 1.0 /d : 0.0 ;
535
+
536
+ * (float * )pd = d ;
537
+ pd += bs ;
538
+
539
+ const vector float vid = vec_splats (id );
540
+ for (int l = 0 ; l < 8 ; l ++ ) {
541
+ const vector float vf = vec_madd (srcv [l ], vid , v85 );
542
+ const vector signed int vi = vec_signed (vf );
543
+
544
+ pb [2 * l + 0 ] = vec_extract (vi , 0 ) | (vec_extract (vi , 1 ) << 4 );
545
+ pb [2 * l + 1 ] = vec_extract (vi , 2 ) | (vec_extract (vi , 3 ) << 4 );
546
+ }
547
+
548
+ //memcpy(pb, pp, sizeof(pp));
549
+ pb += bs ;
550
+ }
551
+ #else
552
+ #error "not implemented for QK"
553
+ #endif
554
+ #elif __ARM_NEON
476
555
#if QK == 32
477
556
for (int i = 0 ; i < nb ; i ++ ) {
478
557
float amax = 0.0f ; // absolute max
0 commit comments