Skip to content

Commit 6f2ea98

Browse files
reenable CUDA arch check
1 parent ee71f36 commit 6f2ea98

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

ggml-cuda.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,7 +1400,7 @@ static __device__ __forceinline__ float vec_dot_q8_0_q8_1(const void * __restric
14001400
static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
14011401
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int iqs) {
14021402

1403-
// #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
1403+
#if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
14041404
const block_q2_K * bq2_K = (const block_q2_K *) vbq;
14051405

14061406
const int bq8_offset = 4 * (iqs/8);
@@ -1426,11 +1426,10 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
14261426
sumf_m += d8i * (__dp4a(0x01010101, uii, 0) * (sc >> 4));
14271427
}
14281428

1429-
14301429
return d*sumf_d - dmin*sumf_m;
1431-
// #else
1432-
// return 0.0f; // only to satisfy the compiler
1433-
// #endif // __CUDA_ARCH__ >= 600
1430+
#else
1431+
return 0.0f; // only to satisfy the compiler
1432+
#endif // __CUDA_ARCH__ >= 600
14341433
}
14351434

14361435
template <int qk, int qi, typename block_q_t, vec_dot_q_cuda_t vec_dot_q_cuda>

0 commit comments

Comments
 (0)