Skip to content

Commit 82755ed

Browse files
committed
fix some compilation warning
1 parent 0421009 commit 82755ed

File tree

1 file changed

+27
-16
lines changed

1 file changed

+27
-16
lines changed

ggml/src/ggml-amx/mmq.cpp

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2099,19 +2099,22 @@ void tinygemm_kernel_amx(int M, int N, int KB, const void * RESTRICT _A, const v
20992099
int32_t * C_cur = TileC0;
21002100
int32_t * C_pre = TileC1;
21012101

2102-
#define Tile4(base) base
2103-
#define Tile5(base) base + TILE_M * TILE_N
2104-
#define Tile6(base) base + 2 * TILE_M * TILE_N
2105-
#define Tile7(base) base + 3 * TILE_M * TILE_N
2102+
auto Tile4 = [&](int32_t * base) { return base; };
2103+
auto Tile5 = [&](int32_t * base) { return base + TILE_M * TILE_N; };
2104+
auto Tile6 = [&](int32_t * base) { return base + 2 * TILE_M * TILE_N; };
2105+
auto Tile7 = [&](int32_t * base) { return base + 3 * TILE_M * TILE_N; };
21062106

21072107
if (M == 2 * TILE_M) {
21082108
// i = 0
2109+
const char * B_blk0 = B + PACKED_INDEX(0, 0, KB, TILE_SIZE);
2110+
const char * B_blk1 = B + PACKED_INDEX(1, 0, KB, TILE_SIZE);
21092111
if (need_unpack) {
2110-
unpack_B<TB>(Tile0, B + PACKED_INDEX(0, 0, KB, TILE_SIZE));
2112+
unpack_B<TB>(Tile0, B_blk0);
21112113
_tile_loadd(TMM0, Tile0, TILE_N * VNNI_BLK);
21122114
} else {
2113-
_tile_loadd(TMM0, B + PACKED_INDEX(0, 0, KB, TILE_SIZE), TILE_N * VNNI_BLK);
2115+
_tile_loadd(TMM0, B_blk0, TILE_N * VNNI_BLK);
21142116
}
2117+
21152118
_tile_zero(TMM4);
21162119
_tile_loadd(TMM2, A[0].qs, lda);
21172120
_tile_dpbssd(TMM4, TMM2, TMM0);
@@ -2123,11 +2126,12 @@ void tinygemm_kernel_amx(int M, int N, int KB, const void * RESTRICT _A, const v
21232126
_tile_stored(TMM5, Tile5(C_pre), TILE_N * sizeof(int32_t));
21242127

21252128
if (need_unpack) {
2126-
unpack_B<TB>(Tile1, B + PACKED_INDEX(1, 0, KB, TILE_SIZE));
2129+
unpack_B<TB>(Tile1, B_blk0);
21272130
_tile_loadd(TMM1, Tile1, TILE_N * VNNI_BLK);
21282131
} else {
2129-
_tile_loadd(TMM1, B + PACKED_INDEX(1, 0, KB, TILE_SIZE), TILE_N * VNNI_BLK);
2132+
_tile_loadd(TMM1, B_blk1, TILE_N * VNNI_BLK);
21302133
}
2134+
21312135
_tile_zero(TMM6);
21322136
_tile_dpbssd(TMM6, TMM2, TMM1);
21332137
_tile_stored(TMM6, Tile6(C_pre), TILE_N * sizeof(int32_t));
@@ -2139,12 +2143,14 @@ void tinygemm_kernel_amx(int M, int N, int KB, const void * RESTRICT _A, const v
21392143
for (int i = 1; i < KB; ++i) {
21402144
// index of previous iter
21412145
const int ii = i - 1;
2146+
const char * B_blk0 = B + PACKED_INDEX(0, i, KB, TILE_SIZE);
2147+
const char * B_blk1 = B + PACKED_INDEX(1, i, KB, TILE_SIZE);
21422148
GGML_DISPATCH_BOOL(ii > 0, is_acc, [&] {
21432149
if (need_unpack) {
2144-
unpack_B<TB>(Tile0, B + PACKED_INDEX(0, i, KB, TILE_SIZE));
2150+
unpack_B<TB>(Tile0, B_blk0);
21452151
_tile_loadd(TMM0, Tile0, TILE_N * VNNI_BLK);
21462152
} else {
2147-
_tile_loadd(TMM0, B + PACKED_INDEX(0, i, KB, TILE_SIZE), TILE_N * VNNI_BLK);
2153+
_tile_loadd(TMM0, B_blk0, TILE_N * VNNI_BLK);
21482154
}
21492155
_tile_zero(TMM4);
21502156
_tile_loadd(TMM2, A[i].qs, lda);
@@ -2161,10 +2167,10 @@ void tinygemm_kernel_amx(int M, int N, int KB, const void * RESTRICT _A, const v
21612167
_tile_stored(TMM5, Tile5(C_cur), TILE_N * sizeof(int32_t));
21622168

21632169
if (need_unpack) {
2164-
unpack_B<TB>(Tile1, B + PACKED_INDEX(1, i, KB, TILE_SIZE));
2170+
unpack_B<TB>(Tile1, B_blk1);
21652171
_tile_loadd(TMM1, Tile1, TILE_N * VNNI_BLK);
21662172
} else {
2167-
_tile_loadd(TMM1, B + PACKED_INDEX(1, i, KB, TILE_SIZE), TILE_N * VNNI_BLK);
2173+
_tile_loadd(TMM1, B_blk1, TILE_N * VNNI_BLK);
21682174
}
21692175
_tile_zero(TMM6);
21702176
acc_C<TA, TB, is_acc>::apply(C + TILE_N, ldc, Tile6(C_pre), &A[ii], KB, B + PACKED_INDEX(1, ii, KB, TILE_SIZE), TILE_M);
@@ -2198,18 +2204,20 @@ void tinygemm_kernel_amx(int M, int N, int KB, const void * RESTRICT _A, const v
21982204
_tile_zero(TMM7);
21992205
}
22002206

2207+
const char * B_blk0 = B + PACKED_INDEX(0, i, KB, TILE_SIZE);
2208+
const char * B_blk1 = B + PACKED_INDEX(1, i, KB, TILE_SIZE);
22012209
if (need_unpack) {
2202-
unpack_B<TB>(Tile0, B + PACKED_INDEX(0, i, KB, TILE_SIZE));
2210+
unpack_B<TB>(Tile0, B_blk0);
22032211
_tile_loadd(TMM0, Tile0, TILE_N * VNNI_BLK);
22042212
} else {
2205-
_tile_loadd(TMM0, B + PACKED_INDEX(0, i, KB, TILE_SIZE), TILE_N * VNNI_BLK);
2213+
_tile_loadd(TMM0, B_blk0, TILE_N * VNNI_BLK);
22062214
}
22072215

22082216
if (need_unpack) {
2209-
unpack_B<TB>(Tile1, B + PACKED_INDEX(1, i, KB, TILE_SIZE));
2217+
unpack_B<TB>(Tile1, B_blk1);
22102218
_tile_loadd(TMM1, Tile1, TILE_N * VNNI_BLK);
22112219
} else {
2212-
_tile_loadd(TMM1, B + PACKED_INDEX(1, i, KB, TILE_SIZE), TILE_N * VNNI_BLK);
2220+
_tile_loadd(TMM1, B_blk1, TILE_N * VNNI_BLK);
22132221
}
22142222

22152223
if (m0 == TILE_M) {
@@ -2364,6 +2372,8 @@ bool ggml_compute_forward_mul_mat_use_amx(struct ggml_tensor * dst) {
23642372
const enum ggml_type type = src0->type;
23652373
const int64_t ne0 = dst->ne[0];
23662374

2375+
bool is_training = src0->grad || src1->grad;
2376+
23672377
// amx kernels enables for Q4_0, Q4_1, Q8_0, F16
23682378
// Q4_K, Q5_K, Q6_K, IQ4_XS enabled for QK_K = 256
23692379
bool has_amx_kernels = (type == GGML_TYPE_Q4_0) ||
@@ -2386,6 +2396,7 @@ bool ggml_compute_forward_mul_mat_use_amx(struct ggml_tensor * dst) {
23862396
return dst->op != GGML_OP_MUL_MAT_ID &&
23872397
is_contiguous_2d(src0) &&
23882398
is_contiguous_2d(src1) &&
2399+
!is_training &&
23892400
src1->type == GGML_TYPE_F32 &&
23902401
has_amx_kernels &&
23912402
// out features is 32x

0 commit comments

Comments
 (0)