small refactor

JohannesGaessler · JohannesGaessler · commit 3c4a83ecbbcc · 2023-09-05T11:48:53.000+02:00
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
@@ -1415,8 +1415,8 @@ static __device__ void convert_f16(const void * vx, const int ib, const int iqs,
 }
 
 static __global__ void quantize_q8_1(
-    const float * __restrict__ x, void * __restrict__ vy, const int kx, const int kx_padded, const int ky,
-    const int row_stride, const int channel_stride) {
+    const float * __restrict__ src, void * __restrict__ vdst, const int kx, const int kx_padded, const int ky,
+    const int ky_stride, const int channel_stride) {
 
     const int ix = blockDim.x*blockIdx.x + threadIdx.x;
 
@@ -1427,14 +1427,17 @@ static __global__ void quantize_q8_1(
     const int iy      = blockDim.y*blockIdx.y + threadIdx.y;
     const int channel = blockDim.z*blockIdx.z + threadIdx.z;
 
+    // padded and contiguous:
     const int i_padded = channel*ky*kx_padded + iy*kx_padded + ix;
 
-    block_q8_1 * y = (block_q8_1 *) vy;
+    block_q8_1 * dst = (block_q8_1 *) vdst;
 
     const int ib = i_padded / QK8_1; // block index
     const int iqs = i_padded % QK8_1; // quant index
 
-    const float xi = ix < kx ? x[channel*channel_stride + iy*row_stride + ix] : 0.0f;
+    // not padded and not necessarily contiguous:
+    const float xi = ix < kx ? src[channel*channel_stride + iy*ky_stride + ix] : 0.0f;
+
     float amax = fabsf(xi);
     float sum = xi;
 
@@ -1447,14 +1450,14 @@ static __global__ void quantize_q8_1(
     const float d = amax / 127;
     const int8_t q = amax == 0.0f ? 0 : roundf(xi / d);
 
-    y[ib].qs[iqs] = q;
+    dst[ib].qs[iqs] = q;
 
     if (iqs > 0) {
         return;
     }
 
-    reinterpret_cast<half&>(y[ib].ds.x) = d;
-    reinterpret_cast<half&>(y[ib].ds.y) = sum;
+    reinterpret_cast<half&>(dst[ib].ds.x) = d;
+    reinterpret_cast<half&>(dst[ib].ds.y) = sum;
 }
 
 template <int qk, int qr, dequantize_kernel_t dequantize_kernel>