Skip to content

Commit 8022c6a

Browse files
authored
[SYCL][ESIMD] Allow raw send API to use non standard types like sycl::half (#9090)
1 parent 09ceb10 commit 8022c6a

File tree

2 files changed

+56
-41
lines changed

2 files changed

+56
-41
lines changed

sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,12 @@ raw_sends(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
8282
constexpr unsigned _Width3 = n3 * sizeof(T3);
8383
static_assert(_Width3 % 32 == 0, "Invalid size for raw send msgSrc1");
8484

85+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
86+
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
87+
using ElemT3 = __ESIMD_DNS::__raw_t<T3>;
88+
8589
uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
86-
return __esimd_raw_sends2<T1, n1, T2, n2, T3, n3, N>(
90+
return __esimd_raw_sends2<ElemT1, n1, ElemT2, n2, ElemT3, n3, N>(
8791
modifier, execSize, mask.data(), numSrc0, numSrc1, numDst, sfid, exDesc,
8892
msgDesc, msgSrc0.data(), msgSrc1.data(), msgDst.data());
8993
}
@@ -133,8 +137,11 @@ raw_send(__ESIMD_NS::simd<T1, n1> msgDst, __ESIMD_NS::simd<T2, n2> msgSrc0,
133137
constexpr unsigned _Width2 = n2 * sizeof(T2);
134138
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc0");
135139

140+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
141+
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
142+
136143
uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
137-
return __esimd_raw_send2<T1, n1, T2, n2, N>(
144+
return __esimd_raw_send2<ElemT1, n1, ElemT2, n2, N>(
138145
modifier, execSize, mask.data(), numSrc0, numDst, sfid, exDesc, msgDesc,
139146
msgSrc0.data(), msgDst.data());
140147
}
@@ -181,8 +188,11 @@ raw_sends(__ESIMD_NS::simd<T1, n1> msgSrc0, __ESIMD_NS::simd<T2, n2> msgSrc1,
181188
constexpr unsigned _Width2 = n2 * sizeof(T2);
182189
static_assert(_Width2 % 32 == 0, "Invalid size for raw send msgSrc1");
183190

191+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
192+
using ElemT2 = __ESIMD_DNS::__raw_t<T2>;
193+
184194
uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
185-
__esimd_raw_sends2_noresult<T1, n1, T2, n2, N>(
195+
__esimd_raw_sends2_noresult<ElemT1, n1, ElemT2, n2, N>(
186196
modifier, execSize, mask.data(), numSrc0, numSrc1, sfid, exDesc, msgDesc,
187197
msgSrc0.data(), msgSrc1.data());
188198
}
@@ -225,11 +235,11 @@ raw_send(__ESIMD_NS::simd<T1, n1> msgSrc0, uint32_t exDesc, uint32_t msgDesc,
225235
uint8_t isSendc = 0, __ESIMD_NS::simd_mask<N> mask = 1) {
226236
constexpr unsigned _Width1 = n1 * sizeof(T1);
227237
static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0");
228-
238+
using ElemT1 = __ESIMD_DNS::__raw_t<T1>;
229239
uint8_t modifier = ((isEOT & 0x1) << 1) | (isSendc & 0x1);
230-
__esimd_raw_send2_noresult<T1, n1, N>(modifier, execSize, mask.data(),
231-
numSrc0, sfid, exDesc, msgDesc,
232-
msgSrc0.data());
240+
__esimd_raw_send2_noresult<ElemT1, n1, N>(modifier, execSize, mask.data(),
241+
numSrc0, sfid, exDesc, msgDesc,
242+
msgSrc0.data());
233243
}
234244

235245
template <typename T1, int n1, int N = 16>
@@ -3216,4 +3226,4 @@ template <int SLMAmount> class slm_allocator {
32163226
} // namespace esimd
32173227
} // namespace ext::intel
32183228
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
3219-
} // namespace sycl
3229+
} // namespace sycl

sycl/test-e2e/ESIMD/vadd_raw_send.cpp

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ ESIMD_INLINE void block_write2(AccessorTy acc, unsigned int offset,
8484
auto src0_ref2 = src0.template select<8, 1>(8);
8585

8686
src0_ref1.template select<1, 1>(2) = offset >> 4;
87-
src0_ref2 = data;
87+
src0_ref2 = data.template bit_cast_view<unsigned int>();
8888
uint32_t exDesc = 0xA;
8989
SurfaceIndex desc = esimd::get_surface_index(acc);
9090
desc += 0x40A0200;
@@ -100,50 +100,43 @@ ESIMD_INLINE void block_write2(AccessorTy acc, unsigned int offset,
100100
#endif
101101
}
102102

103-
int main(void) {
103+
template <typename T> int test(queue q) {
104104
constexpr unsigned Size = 1024 * 128;
105-
constexpr unsigned VL = 16;
106-
107-
float *A = new float[Size];
108-
float *B = new float[Size];
109-
float *C = new float[Size];
105+
constexpr unsigned VL = sizeof(T) == 4 ? 16 : 32;
106+
T *A = new T[Size];
107+
T *B = new T[Size];
108+
T *C = new T[Size];
110109

111110
for (unsigned i = 0; i < Size; ++i) {
112111
A[i] = B[i] = i;
113-
C[i] = 0.0f;
112+
C[i] = 0;
114113
}
115114

116115
try {
117-
buffer<float, 1> bufa(A, range<1>(Size));
118-
buffer<float, 1> bufb(B, range<1>(Size));
119-
buffer<float, 1> bufc(C, range<1>(Size));
116+
buffer<T, 1> bufa(A, range<1>(Size));
117+
buffer<T, 1> bufb(B, range<1>(Size));
118+
buffer<T, 1> bufc(C, range<1>(Size));
120119

121120
// We need that many workgroups
122121
range<1> GlobalRange{Size / VL};
123122

124123
// We need that many threads in each group
125124
range<1> LocalRange{1};
126125

127-
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
128-
129-
auto dev = q.get_device();
130-
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
131-
<< "\n";
132-
133126
auto e = q.submit([&](handler &cgh) {
134-
auto PA = bufa.get_access<access::mode::read>(cgh);
135-
auto PB = bufb.get_access<access::mode::read>(cgh);
136-
auto PC = bufc.get_access<access::mode::write>(cgh);
137-
cgh.parallel_for<class Test>(
127+
auto PA = bufa.template get_access<access::mode::read>(cgh);
128+
auto PB = bufb.template get_access<access::mode::read>(cgh);
129+
auto PC = bufc.template get_access<access::mode::write>(cgh);
130+
cgh.parallel_for(
138131
GlobalRange * LocalRange, [=](id<1> i) SYCL_ESIMD_KERNEL {
139-
unsigned int offset = i * VL * sizeof(float);
140-
simd<float, VL> va = dwaligned_block_read<float, VL>(PA, offset);
141-
simd<float, VL> vb = dwaligned_block_read<float, VL>(PB, offset);
142-
simd<float, VL> vc = va + vb;
132+
unsigned int offset = i * VL * sizeof(T);
133+
simd<T, VL> va = dwaligned_block_read<T, VL>(PA, offset);
134+
simd<T, VL> vb = dwaligned_block_read<T, VL>(PB, offset);
135+
simd<T, VL> vc = va + vb;
143136
constexpr int SIZE = VL / 2;
144-
block_write1(PC, offset, vc.select<SIZE, 1>(0).read());
145-
offset += SIZE * sizeof(float);
146-
block_write2(PC, offset, vc.select<SIZE, 1>(SIZE).read());
137+
block_write1(PC, offset, vc.template select<SIZE, 1>(0).read());
138+
offset += SIZE * sizeof(T);
139+
block_write2(PC, offset, vc.template select<SIZE, 1>(SIZE).read());
147140
});
148141
});
149142
e.wait();
@@ -166,16 +159,28 @@ int main(void) {
166159
}
167160
}
168161
}
169-
if (err_cnt > 0) {
170-
std::cout << " pass rate: "
171-
<< ((float)(Size - err_cnt) / (float)Size) * 100.0f << "% ("
172-
<< (Size - err_cnt) << "/" << Size << ")\n";
173-
}
174162

175163
delete[] A;
176164
delete[] B;
177165
delete[] C;
178166

179167
std::cout << (err_cnt > 0 ? "FAILED\n" : "Passed\n");
168+
return err_cnt;
169+
}
170+
171+
int main(void) {
172+
173+
queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler());
174+
175+
auto dev = q.get_device();
176+
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
177+
<< "\n";
178+
int err_cnt = 0;
179+
180+
err_cnt += test<float>(q);
181+
err_cnt += test<sycl::ext::intel::experimental::esimd::tfloat32>(q);
182+
if (dev.has(sycl::aspect::fp16)) {
183+
err_cnt += test<sycl::half>(q);
184+
}
180185
return err_cnt > 0 ? 1 : 0;
181186
}

0 commit comments

Comments
 (0)