Skip to content

Commit 72af259

Browse files
committed
Fix misaligned memory access in Q4_1 kernel
1 parent e5d23f2 commit 72af259

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

ggml-cuda.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1368,7 +1368,9 @@ static __device__ __forceinline__ float vec_dot_q4_1_q8_1(
13681368
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
13691369
const block_q4_1 * bq4_1 = (const block_q4_1 *) vbq;
13701370

1371-
const int vi = *((int *) &bq4_1->qs[sizeof(int) * (iqs + 0)]);
1371+
int vi;
1372+
memcpy(&vi, &bq4_1->qs[sizeof(int) * (iqs + 0)], sizeof(vi));
1373+
//const int vi = *((int *) &bq4_1->qs[sizeof(int) * (iqs + 0)]);
13721374
const int ui0 = *((int *) &bq8_1->qs[sizeof(int) * (iqs + 0)]);
13731375
const int ui1 = *((int *) &bq8_1->qs[sizeof(int) * (iqs + QI4_1)]);
13741376

0 commit comments

Comments
 (0)