Skip to content

Commit 15d06d0

Browse files
committed
iq1_m: Metal now works
About the same performance as iq1_s.
1 parent 6011ac3 commit 15d06d0

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

ggml-metal.metal

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4520,7 +4520,7 @@ void kernel_mul_mv_iq1_m_f32_impl(
45204520
device const block_iq1_m * xr = x + ibl;
45214521
device const uint8_t * qs = xr->qs + 4 * ib;
45224522
device const uint8_t * qh = xr->qh + 2 * ib;
4523-
device const uint16_t * sc = (device const uint16_t *)xr->scales + ib/2;
4523+
device const uint16_t * sc = (device const uint16_t *)xr->scales;
45244524

45254525
for (int row = 0; row < N_DST; row++) {
45264526

@@ -4540,8 +4540,8 @@ void kernel_mul_mv_iq1_m_f32_impl(
45404540
}
45414541
const float delta1 = sumy[0] * (qh[0] & 0x08 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA) + sumy[1] * (qh[0] & 0x80 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA);
45424542
const float delta2 = sumy[2] * (qh[1] & 0x08 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA) + sumy[3] * (qh[1] & 0x80 ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA);
4543-
sumf[row] += (float)scale.f16 * ((sum[0] + delta1) * (2*((sc[0] >> (6*(ib%2)+0)) & 7) + 1) +
4544-
(sum[1] + delta2) * (2*((sc[0] >> (6*(ib%2)+3)) & 7) + 1));
4543+
sumf[row] += (float)scale.f16 * ((sum[0] + delta1) * (2*((sc[ib/2] >> (6*(ib%2)+0)) & 7) + 1) +
4544+
(sum[1] + delta2) * (2*((sc[ib/2] >> (6*(ib%2)+3)) & 7) + 1));
45454545

45464546
sc += nb*sizeof(block_iq1_m)/2;
45474547
qs += nb*sizeof(block_iq1_m);

0 commit comments

Comments
 (0)