Skip to content

Commit cd4fddb

Browse files
authored
cuda : fix 2-bit quants on amd hip (#5105)
* cuda : fix 2-bit quants on amd hip * use __low2float intrinsic function for new quants
1 parent c9b316c commit cd4fddb

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

ggml-cuda.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4283,7 +4283,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
42834283
q8 += 8;
42844284
aux32 >>= 7;
42854285
}
4286-
const float d = (float)bq2->d * (0.5f + aux32) * (float)bq8_1[ib32].ds.x * 0.25f;
4286+
const float d = (float)bq2->d * (0.5f + aux32) * __low2float(bq8_1[ib32].ds) * 0.25f;
42874287
return d * sumi;
42884288
#else
42894289
// iqs is 0...15
@@ -4294,7 +4294,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
42944294
const uint8_t * grid1 = (const uint8_t *)(iq2xxs_grid + aux8[2*il+0]);
42954295
const uint8_t * grid2 = (const uint8_t *)(iq2xxs_grid + aux8[2*il+1]);
42964296
const uint32_t aux32 = q2[2] | (q2[3] << 16);
4297-
const float d = (float)bq2->d * (0.5f + (aux32 >> 28)) * (float)bq8_1[ib32].ds.x * 0.25f;
4297+
const float d = (float)bq2->d * (0.5f + (aux32 >> 28)) * __low2float(bq8_1[ib32].ds) * 0.25f;
42984298
const uint8_t signs1 = ksigns_iq2xs[(aux32 >> 14*il) & 127];
42994299
const uint8_t signs2 = ksigns_iq2xs[(aux32 >> (14*il + 7)) & 127];
43004300
const int8_t * q8 = bq8_1[ib32].qs + 16*il;
@@ -4339,7 +4339,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xs_q8_1(
43394339
}
43404340
q8 += 8;
43414341
}
4342-
const float d = (float)bq2->d * (float)bq8_1[ib32].ds.x * 0.25f;
4342+
const float d = (float)bq2->d * __low2float(bq8_1[ib32].ds) * 0.25f;
43434343
return d * ((0.5f + ls1) * sumi1 + (0.5f + ls2) * sumi2);
43444344
#else
43454345
assert(false);

0 commit comments

Comments
 (0)