@@ -34,7 +34,7 @@ __device__ inline int calculate_input_coord(int out_coord, int kern_coord, int s
34
34
35
35
// ───────────── Memory layout abstractions ─────────────
36
36
37
- struct WHCNLayout {
37
+ struct whcn_layout {
38
38
__device__ static int input_index (int n, int c, int y, int x, const conv_params & params) {
39
39
return n * (params.channels * params.in_w * params.in_h ) + c * params.in_w * params.in_h + y * params.in_w + x;
40
40
}
@@ -57,7 +57,7 @@ struct WHCNLayout {
57
57
}
58
58
};
59
59
60
- struct CWHNLayout {
60
+ struct cwhn_layout {
61
61
__device__ static int input_index (int n, int c, int y, int x, const conv_params & params) {
62
62
return n * (params.channels * params.in_w * params.in_h ) + (y * params.in_w + x) * params.channels + c;
63
63
}
@@ -125,10 +125,10 @@ __global__ void conv2d_dw_whcn_kernel(const T * __restrict__ in, const T * __res
125
125
stride_y, padding_x, padding_y, dilation_x, dilation_y, channels, batches };
126
126
127
127
int batch_idx, channel_idx, out_y_idx, out_x_idx;
128
- WHCNLayout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
128
+ whcn_layout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
129
129
130
- T result = compute_conv2d_dw_pixel<T, WHCNLayout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
131
- out[WHCNLayout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
130
+ T result = compute_conv2d_dw_pixel<T, whcn_layout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
131
+ out[whcn_layout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
132
132
}
133
133
134
134
template <typename T>
@@ -148,11 +148,11 @@ __global__ void conv_2d_dw_cwhn_kernel(const T * __restrict__ in, const T * __re
148
148
stride_y, padding_x, padding_y, dilation_x, dilation_y, channels, batches };
149
149
150
150
int batch_idx, channel_idx, out_y_idx, out_x_idx;
151
- CWHNLayout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
151
+ cwhn_layout ::unpack_indices (global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
152
152
153
153
const T result =
154
- compute_conv2d_dw_pixel<T, CWHNLayout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
155
- out[CWHNLayout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
154
+ compute_conv2d_dw_pixel<T, cwhn_layout >(in, kern, params, batch_idx, channel_idx, out_y_idx, out_x_idx);
155
+ out[cwhn_layout ::output_index (batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = result;
156
156
}
157
157
158
158
// ───────────── dispatcher ─────────────
@@ -197,7 +197,6 @@ void ggml_cuda_op_conv2d_dw(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
197
197
x_d, w_d, y_d, in_w, in_h, out_w, out_h, kernel_w, kernel_h, stride_x, stride_y, padding_x, padding_y,
198
198
dilation_x, dilation_y, channels, batches);
199
199
} else {
200
- // Unsupported memory layout
201
200
GGML_ABORT (" Unsupported memory layout for conv_2d_dw" );
202
201
}
203
202
}
0 commit comments