reenable CUDA arch check

JohannesGaessler · JohannesGaessler · commit 6f2ea98609ca · 2023-07-12T10:43:37.000+02:00
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
@@ -1400,7 +1400,7 @@ static __device__ __forceinline__ float vec_dot_q8_0_q8_1(const void * __restric
 static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
     const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int iqs) {
 
-// #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
+#if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
     const block_q2_K * bq2_K = (const block_q2_K *) vbq;
 
     const int bq8_offset = 4 * (iqs/8);
@@ -1426,11 +1426,10 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
         sumf_m += d8i * (__dp4a(0x01010101, uii, 0) * (sc >>  4));
     }
 
-
     return d*sumf_d - dmin*sumf_m;
-// #else
-//     return 0.0f; // only to satisfy the compiler
-// #endif // __CUDA_ARCH__ >= 600
+#else
+    return 0.0f; // only to satisfy the compiler
+#endif // __CUDA_ARCH__ >= 600
 }
 
 template <int qk, int qi, typename block_q_t, vec_dot_q_cuda_t vec_dot_q_cuda>