@@ -1244,7 +1244,7 @@ static __global__ void dequantize_mul_mat_vec(const void * vx, const dfloat * y,
1244
1244
}
1245
1245
1246
1246
static __global__ void mul_mat_p021_f16_f32 (const void * vx, const float * y, float * dst, const int ncols_x, const int nrows_x, const int nchannels_x) {
1247
- const half * x = (half *) vx;
1247
+ const half * x = (const half *) vx;
1248
1248
1249
1249
const int row_x = blockDim .y *blockIdx .y + threadIdx .y ;
1250
1250
const int channel = blockDim .z *blockIdx .z + threadIdx .z ;
@@ -1294,7 +1294,7 @@ static __global__ void mul_mat_vec_nc_f16_f32( // nc == non-contiguous
1294
1294
const void * vx, const float * y, float * dst, const int ncols_x, const int nrows_x,
1295
1295
const int row_stride_x, const int nchannels_x, const int channel_stride_x) {
1296
1296
1297
- const half * x = (half *) vx;
1297
+ const half * x = (const half *) vx;
1298
1298
1299
1299
const int row_x = blockDim .y *blockIdx .y + threadIdx .y ;
1300
1300
const int channel = blockDim .z *blockIdx .z + threadIdx .z ;
@@ -1337,14 +1337,14 @@ static __global__ void mul_mat_vec_nc_f16_f32( // nc == non-contiguous
1337
1337
}
1338
1338
1339
1339
static __device__ void cpy_1_f32_f32 (const char * cxi, char * cdsti) {
1340
- const float * xi = (float *) cxi;
1340
+ const float * xi = (const float *) cxi;
1341
1341
float * dsti = (float *) cdsti;
1342
1342
1343
1343
*dsti = *xi;
1344
1344
}
1345
1345
1346
1346
static __device__ void cpy_1_f32_f16 (const char * cxi, char * cdsti) {
1347
- const float * xi = (float *) cxi;
1347
+ const float * xi = (const float *) cxi;
1348
1348
half * dsti = (half *) cdsti;
1349
1349
1350
1350
*dsti = __float2half (*xi);
0 commit comments