@@ -1258,7 +1258,7 @@ static __global__ void dequantize_block(const void * __restrict__ vx, float * __
1258
1258
}
1259
1259
1260
1260
static __device__ __forceinline__ float vec_dot_q4_0_q8_1 (const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int iqs) {
1261
- #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
1261
+ #if __CUDA_ARCH__ >= 610 // lowest compute capability for integer intrinsics
1262
1262
const block_q4_0 * bq4_0 = (const block_q4_0 *) vbq;
1263
1263
1264
1264
int vi;
@@ -1279,11 +1279,11 @@ static __device__ __forceinline__ float vec_dot_q4_0_q8_1(const void * __restric
1279
1279
return sumi*d;
1280
1280
#else
1281
1281
return 0 .0f ; // only to satisfy the compiler
1282
- #endif // __CUDA_ARCH__ >= 600
1282
+ #endif // __CUDA_ARCH__ >= 610
1283
1283
}
1284
1284
1285
1285
static __device__ __forceinline__ float vec_dot_q4_1_q8_1 (const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int iqs) {
1286
- #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
1286
+ #if __CUDA_ARCH__ >= 610 // lowest compute capability for integer intrinsics
1287
1287
const block_q4_1 * bq4_1 = (const block_q4_1 *) vbq;
1288
1288
1289
1289
const int vi = *((int *) &bq4_1->qs [sizeof (int ) * (iqs + 0 )]);
@@ -1304,11 +1304,11 @@ static __device__ __forceinline__ float vec_dot_q4_1_q8_1(const void * __restric
1304
1304
return sumi*d + m*s / QI4_1; // scale sum by QI4_1 because there are QI4_1 threads working on this block
1305
1305
#else
1306
1306
return 0 .0f ; // only to satisfy the compiler
1307
- #endif // __CUDA_ARCH__ >= 600
1307
+ #endif // __CUDA_ARCH__ >= 610
1308
1308
}
1309
1309
1310
1310
static __device__ __forceinline__ float vec_dot_q5_0_q8_1 (const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int iqs) {
1311
- #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
1311
+ #if __CUDA_ARCH__ >= 610 // lowest compute capability for integer intrinsics
1312
1312
const block_q5_0 * bq5_0 = (const block_q5_0 *) vbq;
1313
1313
1314
1314
int qs;
@@ -1339,11 +1339,11 @@ static __device__ __forceinline__ float vec_dot_q5_0_q8_1(const void * __restric
1339
1339
return sumi*d;
1340
1340
#else
1341
1341
return 0 .0f ; // only to satisfy the compiler
1342
- #endif // __CUDA_ARCH__ >= 600
1342
+ #endif // __CUDA_ARCH__ >= 610
1343
1343
}
1344
1344
1345
1345
static __device__ __forceinline__ float vec_dot_q5_1_q8_1 (const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int iqs) {
1346
- #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
1346
+ #if __CUDA_ARCH__ >= 610 // lowest compute capability for integer intrinsics
1347
1347
const block_q5_1 * bq5_1 = (const block_q5_1 *) vbq;
1348
1348
1349
1349
const int qs = *((int *) &bq5_1->qs [sizeof (int ) * (iqs + 0 )]);
@@ -1373,11 +1373,11 @@ static __device__ __forceinline__ float vec_dot_q5_1_q8_1(const void * __restric
1373
1373
return sumi*d + m*s / QI5_1; // scale sum by QI5_1 because there are QI5_1 threads working on this block
1374
1374
#else
1375
1375
return 0 .0f ; // only to satisfy the compiler
1376
- #endif // __CUDA_ARCH__ >= 600
1376
+ #endif // __CUDA_ARCH__ >= 610
1377
1377
}
1378
1378
1379
1379
static __device__ __forceinline__ float vec_dot_q8_0_q8_1 (const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int iqs) {
1380
- #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
1380
+ #if __CUDA_ARCH__ >= 610 // lowest compute capability for integer intrinsics
1381
1381
const block_q8_0 * bq8_0 = (const block_q8_0 *) vbq;
1382
1382
1383
1383
int vi;
@@ -1392,7 +1392,7 @@ static __device__ __forceinline__ float vec_dot_q8_0_q8_1(const void * __restric
1392
1392
return sumi*d;
1393
1393
#else
1394
1394
return 0 .0f ; // only to satisfy the compiler
1395
- #endif // __CUDA_ARCH__ >= 600
1395
+ #endif // __CUDA_ARCH__ >= 610
1396
1396
}
1397
1397
1398
1398
template <int qk, int qi, typename block_q_t , vec_dot_q_cuda_t vec_dot_q_cuda>
@@ -2417,7 +2417,7 @@ inline void ggml_cuda_op_mul_mat_vec(
2417
2417
src0->type == GGML_TYPE_Q5_1 ||
2418
2418
src0->type == GGML_TYPE_Q8_0;
2419
2419
2420
- const bool use_mul_mat_vec_q = g_compute_capabilities[id] >= 600 && mul_mat_vec_q_implemented;
2420
+ const bool use_mul_mat_vec_q = g_compute_capabilities[id] >= 610 && mul_mat_vec_q_implemented;
2421
2421
#endif
2422
2422
2423
2423
if (use_mul_mat_vec_q) {
0 commit comments