We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e5d23f2 commit 72af259Copy full SHA for 72af259
ggml-cuda.cu
@@ -1368,7 +1368,9 @@ static __device__ __forceinline__ float vec_dot_q4_1_q8_1(
1368
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
1369
const block_q4_1 * bq4_1 = (const block_q4_1 *) vbq;
1370
1371
- const int vi = *((int *) &bq4_1->qs[sizeof(int) * (iqs + 0)]);
+ int vi;
1372
+ memcpy(&vi, &bq4_1->qs[sizeof(int) * (iqs + 0)], sizeof(vi));
1373
+ //const int vi = *((int *) &bq4_1->qs[sizeof(int) * (iqs + 0)]);
1374
const int ui0 = *((int *) &bq8_1->qs[sizeof(int) * (iqs + 0)]);
1375
const int ui1 = *((int *) &bq8_1->qs[sizeof(int) * (iqs + QI4_1)]);
1376
0 commit comments