Skip to content

Commit 3eff3c3

Browse files
committed
Call syclcompat::dp4a inside dpct::dp4a
This reverts commit 90cb61d.
1 parent 1c16516 commit 3eff3c3

File tree

3 files changed

+66
-60
lines changed

3 files changed

+66
-60
lines changed

ggml/src/ggml-sycl/dpct/helper.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include <sycl/sycl.hpp>
1717
#include <sycl/half_type.hpp>
18+
#include <syclcompat/math.hpp>
1819
#include <oneapi/mkl.hpp>
1920
#include <map>
2021

@@ -1830,6 +1831,12 @@ namespace dpct
18301831
: id);
18311832
}
18321833

1834+
template <typename T1, typename T2, typename T3>
1835+
inline auto dp4a(T1 a, T2 b, T3 c)
1836+
{
1837+
return syclcompat::dp4a(a, b, c);
1838+
}
1839+
18331840
struct sub_sat
18341841
{
18351842
template <typename T>

ggml/src/ggml-sycl/mmq.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -575,8 +575,8 @@ vec_dot_q2_K_q8_1_impl_mmq(const int *__restrict__ v, const int *__restrict__ u,
575575

576576
#pragma unroll
577577
for (int i = i0; i < i0 + QI8_1/2; ++i) {
578-
sumi_d_sc = syclcompat::dp4a(v[i], u[i], sumi_d_sc); // SIMD dot product
579-
sumi_m = syclcompat::dp4a(m, u[i],
578+
sumi_d_sc = dpct::dp4a(v[i], u[i], sumi_d_sc); // SIMD dot product
579+
sumi_m = dpct::dp4a(m, u[i],
580580
sumi_m); // multiply sum of q8_1 values with m
581581
}
582582

@@ -730,7 +730,7 @@ vec_dot_q3_K_q8_1_impl_mmq(const int *__restrict__ v, const int *__restrict__ u,
730730
int sumi_sc = 0;
731731

732732
for (int i = i0; i < i0 + QI8_1/2; ++i) {
733-
sumi_sc = syclcompat::dp4a(v[i], u[i], sumi_sc); // SIMD dot product
733+
sumi_sc = dpct::dp4a(v[i], u[i], sumi_sc); // SIMD dot product
734734
}
735735

736736
sumi += sumi_sc * scales[i0 / (QI8_1/2)];
@@ -873,7 +873,7 @@ static __dpct_inline__ float vec_dot_q4_K_q8_1_impl_mmq(
873873

874874
#pragma unroll
875875
for (int j = 0; j < QI8_1; ++j) {
876-
sumi_d = syclcompat::dp4a((v[j] >> (4 * i)) & 0x0F0F0F0F,
876+
sumi_d = dpct::dp4a((v[j] >> (4 * i)) & 0x0F0F0F0F,
877877
u[i * QI8_1 + j], sumi_d); // SIMD dot product
878878
}
879879

@@ -1018,7 +1018,7 @@ static __dpct_inline__ float vec_dot_q5_K_q8_1_impl_mmq(
10181018

10191019
#pragma unroll
10201020
for (int j = 0; j < QI8_1; ++j) {
1021-
sumi_d = syclcompat::dp4a(v[i * QI8_1 + j], u[i * QI8_1 + j],
1021+
sumi_d = dpct::dp4a(v[i * QI8_1 + j], u[i * QI8_1 + j],
10221022
sumi_d); // SIMD dot product
10231023
}
10241024

@@ -1156,14 +1156,14 @@ vec_dot_q6_K_q8_1_impl_mmq(const int *__restrict__ v, const int *__restrict__ u,
11561156

11571157
#pragma unroll
11581158
for (int i = i0; i < i0 + 2; ++i) {
1159-
sumi_d.x() = syclcompat::dp4a(v[2 * i + 0], u[2 * i + 0],
1159+
sumi_d.x() = dpct::dp4a(v[2 * i + 0], u[2 * i + 0],
11601160
sumi_d.x()); // SIMD dot product
1161-
sumi_d.x() = syclcompat::dp4a(v[2 * i + 1], u[2 * i + 1],
1161+
sumi_d.x() = dpct::dp4a(v[2 * i + 1], u[2 * i + 1],
11621162
sumi_d.x()); // SIMD dot product
11631163

1164-
sumi_d.y() = syclcompat::dp4a(v[2 * i + 4], u[2 * i + 4],
1164+
sumi_d.y() = dpct::dp4a(v[2 * i + 4], u[2 * i + 4],
11651165
sumi_d.y()); // SIMD dot product
1166-
sumi_d.y() = syclcompat::dp4a(v[2 * i + 5], u[2 * i + 5],
1166+
sumi_d.y() = dpct::dp4a(v[2 * i + 5], u[2 * i + 5],
11671167
sumi_d.y()); // SIMD dot product
11681168
}
11691169

ggml/src/ggml-sycl/vecdotq.hpp

Lines changed: 50 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#define GGML_SYCL_VECDOTQ_HPP
1515

1616
#include "dpct/helper.hpp"
17-
#include "syclcompat/math.hpp"
1817

1918
typedef float (*vec_dot_q_sycl_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs);
2019

@@ -90,14 +89,14 @@ static __dpct_inline__ float vec_dot_q2_K_q8_1_impl_mmvq(
9089
const int vi = (v >> (2*i)) & 0x03030303;
9190

9291
sumf_d +=
93-
d8[i] * (syclcompat::dp4a(vi, u[i], 0) * (sc & 0xF)); // SIMD dot product
92+
d8[i] * (dpct::dp4a(vi, u[i], 0) * (sc & 0xF)); // SIMD dot product
9493

9594
// fill int with 4x m
9695
int m = sc >> 4;
9796
m |= m << 8;
9897
m |= m << 16;
9998
sumf_m += d8[i] *
100-
syclcompat::dp4a(
99+
dpct::dp4a(
101100
m, u[i],
102101
0); // multiply constant q2_K part with sum of q8_1 values
103102
}
@@ -140,7 +139,7 @@ static __dpct_inline__ float vec_dot_q3_K_q8_1_impl_mmvq(
140139
const int vi =
141140
dpct::vectorized_binary<sycl::char4>(vil, vih, dpct::sub_sat());
142141

143-
sumf += d8[i] * (syclcompat::dp4a(vi, u[i], 0) * sc); // SIMD dot product
142+
sumf += d8[i] * (dpct::dp4a(vi, u[i], 0) * sc); // SIMD dot product
144143
}
145144

146145
return d3 * sumf;
@@ -163,11 +162,11 @@ static __dpct_inline__ float vec_dot_q4_K_q8_1_impl_vmmq(
163162
const int v1i = (v[1] >> (4*i)) & 0x0F0F0F0F;
164163

165164
const int dot1 =
166-
syclcompat::dp4a(v1i, u[2 * i + 1],
167-
syclcompat::dp4a(v0i, u[2 * i + 0], 0)); // SIMD dot product
165+
dpct::dp4a(v1i, u[2 * i + 1],
166+
dpct::dp4a(v0i, u[2 * i + 0], 0)); // SIMD dot product
168167
const int dot2 =
169-
syclcompat::dp4a(0x01010101, u[2 * i + 1],
170-
syclcompat::dp4a(0x01010101, u[2 * i + 0], 0)); // sum of u
168+
dpct::dp4a(0x01010101, u[2 * i + 1],
169+
dpct::dp4a(0x01010101, u[2 * i + 0], 0)); // sum of u
171170

172171
sumf_d += d8[i] * (dot1 * sc[i]);
173172
sumf_m += d8[i] * (dot2 * m[i]); // multiply constant part of q4_K with sum of q8_1 values
@@ -204,11 +203,11 @@ static __dpct_inline__ float vec_dot_q5_K_q8_1_impl_vmmq(
204203
const int v1i = vl1i | vh1i;
205204

206205
const int dot1 =
207-
syclcompat::dp4a(v0i, u[2 * i + 0],
208-
syclcompat::dp4a(v1i, u[2 * i + 1], 0)); // SIMD dot product
206+
dpct::dp4a(v0i, u[2 * i + 0],
207+
dpct::dp4a(v1i, u[2 * i + 1], 0)); // SIMD dot product
209208
const int dot2 =
210-
syclcompat::dp4a(0x01010101, u[2 * i + 0],
211-
syclcompat::dp4a(0x01010101, u[2 * i + 1], 0)); // sum of u
209+
dpct::dp4a(0x01010101, u[2 * i + 0],
210+
dpct::dp4a(0x01010101, u[2 * i + 1], 0)); // sum of u
212211

213212
sumf_d += d8[i] * (dot1 * sc[i]);
214213
sumf_m += d8[i] * (dot2 * m[i]);
@@ -244,7 +243,7 @@ vec_dot_q6_K_q8_1_impl_mmvq(const int &vl, const int &vh,
244243
const int vi = dpct::vectorized_binary<sycl::char4>(
245244
(vil | vih), 0x20202020, dpct::sub_sat()); // vi = (vil | vih) - 32
246245

247-
sumf += d8[i] * (syclcompat::dp4a(vi, u[i], 0) * sc); // SIMD dot product
246+
sumf += d8[i] * (dpct::dp4a(vi, u[i], 0) * sc); // SIMD dot product
248247
}
249248

250249
return d*sumf;
@@ -267,8 +266,8 @@ static __dpct_inline__ float vec_dot_q4_0_q8_1_impl(const int *v, const int *u,
267266
const int vi1 = (v[i] >> 4) & 0x0F0F0F0F;
268267

269268
// SIMD dot product of quantized values
270-
sumi = syclcompat::dp4a(vi0, u[2 * i + 0], sumi);
271-
sumi = syclcompat::dp4a(vi1, u[2 * i + 1], sumi);
269+
sumi = dpct::dp4a(vi0, u[2 * i + 0], sumi);
270+
sumi = dpct::dp4a(vi1, u[2 * i + 1], sumi);
272271
}
273272

274273
const sycl::float2 ds8f =
@@ -294,8 +293,8 @@ static __dpct_inline__ float vec_dot_q4_1_q8_1_impl(const int *v, const int *u,
294293
const int vi1 = (v[i] >> 4) & 0x0F0F0F0F;
295294

296295
// SIMD dot product of quantized values
297-
sumi = syclcompat::dp4a(vi0, u[2 * i + 0], sumi);
298-
sumi = syclcompat::dp4a(vi1, u[2 * i + 1], sumi);
296+
sumi = dpct::dp4a(vi0, u[2 * i + 0], sumi);
297+
sumi = dpct::dp4a(vi1, u[2 * i + 1], sumi);
299298
}
300299

301300
#ifdef GGML_SYCL_F16
@@ -332,15 +331,15 @@ vec_dot_q5_0_q8_1_impl(const int *vl, const int *vh, const int *u,
332331
vi0 |= (vh[i] << 11) & 0x00001000; // 1 -> 12
333332
vi0 |= (vh[i] << 18) & 0x00100000; // 2 -> 20
334333
vi0 |= (vh[i] << 25) & 0x10000000; // 3 -> 28
335-
sumi = syclcompat::dp4a(vi0, u[2 * i + 0],
334+
sumi = dpct::dp4a(vi0, u[2 * i + 0],
336335
sumi); // SIMD dot product of quantized values
337336

338337
int vi1 = (vl[i] >> 4) & 0x0F0F0F0F; // upper 4 qs bits, still need qh as 5th bits
339338
vi1 |= (vh[i] >> 12) & 0x00000010; // 16 -> 4
340339
vi1 |= (vh[i] >> 5) & 0x00001000; // 17 -> 12
341340
vi1 |= (vh[i] << 2) & 0x00100000; // 18 -> 20
342341
vi1 |= (vh[i] << 9) & 0x10000000; // 19 -> 28
343-
sumi = syclcompat::dp4a(vi1, u[2 * i + 1],
342+
sumi = dpct::dp4a(vi1, u[2 * i + 1],
344343
sumi); // SIMD dot product of quantized values
345344
}
346345

@@ -368,15 +367,15 @@ vec_dot_q5_1_q8_1_impl(const int *vl, const int *vh, const int *u,
368367
vi0 |= (vh[i] << 11) & 0x00001000; // 1 -> 12
369368
vi0 |= (vh[i] << 18) & 0x00100000; // 2 -> 20
370369
vi0 |= (vh[i] << 25) & 0x10000000; // 3 -> 28
371-
sumi = syclcompat::dp4a(vi0, u[2 * i + 0],
370+
sumi = dpct::dp4a(vi0, u[2 * i + 0],
372371
sumi); // SIMD dot product of quantized values
373372

374373
int vi1 = (vl[i] >> 4) & 0x0F0F0F0F; // upper 4 qs bits, still need qh as 5th bits
375374
vi1 |= (vh[i] >> 12) & 0x00000010; // 16 -> 4
376375
vi1 |= (vh[i] >> 5) & 0x00001000; // 17 -> 12
377376
vi1 |= (vh[i] << 2) & 0x00100000; // 18 -> 20
378377
vi1 |= (vh[i] << 9) & 0x10000000; // 19 -> 28
379-
sumi = syclcompat::dp4a(vi1, u[2 * i + 1],
378+
sumi = dpct::dp4a(vi1, u[2 * i + 1],
380379
sumi); // SIMD dot product of quantized values
381380
}
382381

@@ -413,7 +412,7 @@ static __dpct_inline__ float vec_dot_q8_0_q8_1_impl(const int *v, const int *u,
413412
#pragma unroll
414413
for (int i = 0; i < vdr; ++i) {
415414
// SIMD dot product of quantized values
416-
sumi = syclcompat::dp4a(v[i], u[i], sumi);
415+
sumi = dpct::dp4a(v[i], u[i], sumi);
417416
}
418417

419418
return d8_0*d8_1 * sumi;
@@ -429,7 +428,7 @@ static __dpct_inline__ float vec_dot_q8_1_q8_1_impl(const int *v, const int *u,
429428
#pragma unroll
430429
for (int i = 0; i < vdr; ++i) {
431430
// SIMD dot product of quantized values
432-
sumi = syclcompat::dp4a(v[i], u[i], sumi);
431+
sumi = dpct::dp4a(v[i], u[i], sumi);
433432
}
434433

435434
#ifdef GGML_SYCL_F16
@@ -678,10 +677,10 @@ vec_dot_q4_K_q8_1(const void *__restrict__ vbq,
678677
const int v1 = q4[0];
679678
const int v2 = q4[4];
680679

681-
const int dot1 = syclcompat::dp4a(ui2, v2 & 0x0f0f0f0f, syclcompat::dp4a(ui1, v1 & 0x0f0f0f0f, 0));
682-
const int dot2 = syclcompat::dp4a(ui4, (v2 >> 4) & 0x0f0f0f0f, syclcompat::dp4a(ui3, (v1 >> 4) & 0x0f0f0f0f, 0));
683-
const int dot3 = syclcompat::dp4a(0x01010101, ui2, syclcompat::dp4a(0x01010101, ui1, 0));
684-
const int dot4 = syclcompat::dp4a(0x01010101, ui4, syclcompat::dp4a(0x01010101, ui3, 0));
680+
const int dot1 = dpct::dp4a(ui2, v2 & 0x0f0f0f0f, dpct::dp4a(ui1, v1 & 0x0f0f0f0f, 0));
681+
const int dot2 = dpct::dp4a(ui4, (v2 >> 4) & 0x0f0f0f0f, dpct::dp4a(ui3, (v1 >> 4) & 0x0f0f0f0f, 0));
682+
const int dot3 = dpct::dp4a(0x01010101, ui2, dpct::dp4a(0x01010101, ui1, 0));
683+
const int dot4 = dpct::dp4a(0x01010101, ui4, dpct::dp4a(0x01010101, ui3, 0));
685684

686685
sumf_d += d8_1 * (dot1 * s[0]) + d8_2 * (dot2 * s[1]);
687686
sumf_m += d8_1 * (dot3 * s[2]) + d8_2 * (dot4 * s[3]);
@@ -773,8 +772,8 @@ vec_dot_q5_K_q8_1(const void *__restrict__ vbq,
773772
const int v3 = (((vh >> 0) & 0x10101010) ^ 0x10101010) | ((vl1 >> 4) & 0x0f0f0f0f);
774773
const int v4 = (((vh >> 2) & 0x10101010) ^ 0x10101010) | ((vl2 >> 4) & 0x0f0f0f0f);
775774

776-
const float sumf_d = d8_1 * (syclcompat::dp4a(ui1, v1, 0) * s[0] + syclcompat::dp4a(ui2, v2, 0) * s[1])
777-
+ d8_2 * (syclcompat::dp4a(ui3, v3, 0) * s[2] + syclcompat::dp4a(ui4, v4, 0) * s[3]);
775+
const float sumf_d = d8_1 * (dpct::dp4a(ui1, v1, 0) * s[0] + dpct::dp4a(ui2, v2, 0) * s[1])
776+
+ d8_2 * (dpct::dp4a(ui3, v3, 0) * s[2] + dpct::dp4a(ui4, v4, 0) * s[3]);
778777

779778
return d * sumf_d;
780779

@@ -866,8 +865,8 @@ vec_dot_iq2_xs_q8_1(const void *__restrict__ vbq,
866865
grid[0] ^ signs[0], signs[0], std::minus<>());
867866
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
868867
grid[1] ^ signs[1], signs[1], std::minus<>());
869-
sumi1 = syclcompat::dp4a(grid_l, *((const int *)q8 + 0), sumi1);
870-
sumi1 = syclcompat::dp4a(grid_h, *((const int *)q8 + 1), sumi1);
868+
sumi1 = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi1);
869+
sumi1 = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi1);
871870
q8 += 8;
872871
}
873872
int sumi2 = 0;
@@ -878,8 +877,8 @@ vec_dot_iq2_xs_q8_1(const void *__restrict__ vbq,
878877
grid[0] ^ signs[0], signs[0], std::minus<>());
879878
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
880879
grid[1] ^ signs[1], signs[1], std::minus<>());
881-
sumi2 = syclcompat::dp4a(grid_l, *((const int *)q8 + 0), sumi2);
882-
sumi2 = syclcompat::dp4a(grid_h, *((const int *)q8 + 1), sumi2);
880+
sumi2 = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi2);
881+
sumi2 = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi2);
883882
q8 += 8;
884883
}
885884
const float d = (float)bq2->d * bq8_1[ib32].ds[0] * 0.25f;
@@ -918,8 +917,8 @@ vec_dot_iq2_s_q8_1(const void *__restrict__ vbq,
918917
grid[0] ^ signs0, signs0, std::minus<>());
919918
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
920919
grid[1] ^ signs1, signs1, std::minus<>());
921-
sumi1 = syclcompat::dp4a(grid_l, *((const int *)q8 + 0), sumi1);
922-
sumi1 = syclcompat::dp4a(grid_h, *((const int *)q8 + 1), sumi1);
920+
sumi1 = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi1);
921+
sumi1 = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi1);
923922
q8 += 8;
924923
}
925924
int sumi2 = 0;
@@ -935,8 +934,8 @@ vec_dot_iq2_s_q8_1(const void *__restrict__ vbq,
935934
grid[0] ^ signs0, signs0, std::minus<>());
936935
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
937936
grid[1] ^ signs1, signs1, std::minus<>());
938-
sumi2 = syclcompat::dp4a(grid_l, *((const int *)q8 + 0), sumi2);
939-
sumi2 = syclcompat::dp4a(grid_h, *((const int *)q8 + 1), sumi2);
937+
sumi2 = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi2);
938+
sumi2 = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi2);
940939
q8 += 8;
941940
}
942941
const float d = (float)bq2->d * bq8_1[ib32].ds[0] * 0.25f;
@@ -969,8 +968,8 @@ vec_dot_iq3_xxs_q8_1(const void *__restrict__ vbq,
969968
grid1[0] ^ signs[0], signs[0], std::minus<>());
970969
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
971970
grid2[0] ^ signs[1], signs[1], std::minus<>());
972-
sumi = syclcompat::dp4a(grid_l, *((const int *)q8 + 0), sumi);
973-
sumi = syclcompat::dp4a(grid_h, *((const int *)q8 + 1), sumi);
971+
sumi = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi);
972+
sumi = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi);
974973
q8 += 8;
975974
aux32 >>= 7;
976975
}
@@ -1010,8 +1009,8 @@ vec_dot_iq3_s_q8_1(const void *__restrict__ vbq,
10101009
grid1[0] ^ signs0, signs0, std::minus<>());
10111010
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
10121011
grid2[0] ^ signs1, signs1, std::minus<>());
1013-
sumi = syclcompat::dp4a(grid_l, *((const int *)q8 + 0), sumi);
1014-
sumi = syclcompat::dp4a(grid_h, *((const int *)q8 + 1), sumi);
1012+
sumi = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi);
1013+
sumi = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi);
10151014
q8 += 8;
10161015
}
10171016
const float d =
@@ -1038,8 +1037,8 @@ vec_dot_iq1_s_q8_1(const void *__restrict__ vbq,
10381037
const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
10391038
int grid0 = grid[0] & 0x0f0f0f0f;
10401039
int grid1 = (grid[0] >> 4) & 0x0f0f0f0f;
1041-
sumi = syclcompat::dp4a(q8[2 * l + 1], grid1,
1042-
syclcompat::dp4a(q8[2 * l + 0], grid0, sumi));
1040+
sumi = dpct::dp4a(q8[2 * l + 1], grid1,
1041+
dpct::dp4a(q8[2 * l + 0], grid0, sumi));
10431042
}
10441043

10451044
const float delta = bq1->qh[ib32] & 0x8000 ? -1-IQ1S_DELTA : -1+IQ1S_DELTA;
@@ -1067,11 +1066,11 @@ vec_dot_iq1_m_q8_1(const void *__restrict__ vbq,
10671066
const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[2*ib32+l/2] >> 4*(l%2)) & 7) << 8)));
10681067
int grid0 = grid[0] & 0x0f0f0f0f;
10691068
int grid1 = (grid[0] >> 4) & 0x0f0f0f0f;
1070-
sumi[l / 2] = syclcompat::dp4a(q8[2 * l + 1], grid1,
1071-
syclcompat::dp4a(q8[2 * l + 0], grid0, sumi[l / 2]));
1069+
sumi[l / 2] = dpct::dp4a(q8[2 * l + 1], grid1,
1070+
dpct::dp4a(q8[2 * l + 0], grid0, sumi[l / 2]));
10721071
const float delta = (bq1->qh[2*ib32+l/2] >> 4*(l%2)) & 0x08 ? -1-IQ1M_DELTA : -1+IQ1M_DELTA;
1073-
const int sumy = syclcompat::dp4a(q8[2 * l + 1], 0x01010101,
1074-
syclcompat::dp4a(q8[2 * l + 0], 0x01010101, 0));
1072+
const int sumy = dpct::dp4a(q8[2 * l + 1], 0x01010101,
1073+
dpct::dp4a(q8[2 * l + 0], 0x01010101, 0));
10751074
sumf[l/2] += delta*sumy;
10761075
}
10771076

@@ -1102,8 +1101,8 @@ vec_dot_iq4_nl_q8_1(const void *__restrict__ vbq,
11021101
for (int l = 0; l < VDR_Q4_0_Q8_1_MMVQ; ++l) {
11031102
const uint32_t aux = q4[2*l] | (q4[2*l+1] << 16);
11041103
get_int_from_table_16(aux, values, v1, v2);
1105-
sumi1 = syclcompat::dp4a(v1, q8[l + 0], sumi1);
1106-
sumi2 = syclcompat::dp4a(v2, q8[l + 4], sumi2);
1104+
sumi1 = dpct::dp4a(v1, q8[l + 0], sumi1);
1105+
sumi2 = dpct::dp4a(v2, q8[l + 4], sumi2);
11071106
}
11081107

11091108
const float d = (float)bq->d * bq8_1->ds[0];
@@ -1129,8 +1128,8 @@ vec_dot_iq4_xs_q8_1(const void *__restrict__ vbq,
11291128
int sumi1 = 0, sumi2 = 0;
11301129
for (int j = 0; j < 4; ++j) {
11311130
get_int_from_table_16(q4[j], values, v1, v2);
1132-
sumi1 = syclcompat::dp4a(v1, q8[j + 0], sumi1);
1133-
sumi2 = syclcompat::dp4a(v2, q8[j + 4], sumi2);
1131+
sumi1 = dpct::dp4a(v1, q8[j + 0], sumi1);
1132+
sumi2 = dpct::dp4a(v2, q8[j + 4], sumi2);
11341133
}
11351134
return d * (sumi1 + sumi2);
11361135
#else

0 commit comments

Comments
 (0)