@@ -251,13 +251,13 @@ void IR_Builder::generateSingleBarrier(G4_Predicate *prd) {
251
251
// single barrier: # producer = # consumer = # threads, barrier id = 0
252
252
// For now produce no fence
253
253
// Number of threads per threadgroup is r0.2[31:24]
254
- // mov (1) Hdr.2 <1>:ud 0x0
254
+ // mov (1) Hdr.4 <1>:uw 0x0:uw
255
255
// mov (2) Hdr.10<1>:ub R0.11<0;1,0>:ub
256
256
// This SIMD2 byte move is broadcasting the thread group size
257
257
// from the r0 header into both the producer and consumer slots.
258
258
// Hdr.2:d[31:24,23:16]
259
259
G4_Declare *header = createTempVar (8 , Type_UD, getGRFAlign ());
260
- auto dst = createDst (header->getRegVar (), 0 , 2 , 1 , Type_UD );
260
+ auto dst = createDst (header->getRegVar (), 0 , 4 , 1 , Type_UW );
261
261
uint32_t headerInitValDw2 = 0x0 ; // initial value for DWord2
262
262
if (getPlatform () >= Xe2 && getOption (vISA_ActiveThreadsOnlyBarrier)) {
263
263
headerInitValDw2 |= (1 << 8 );
@@ -268,7 +268,7 @@ void IR_Builder::generateSingleBarrier(G4_Predicate *prd) {
268
268
// bits[15:14] = 0 (producer/consumer)
269
269
// bits[23:16] = num producers = r0.11:b (r0.2[31:24] = num threads in tg)
270
270
// bits[31:24] = num consumers = r0.11:b (r0.2[31:24] = num threads in tg)
271
- auto src = createImm (headerInitValDw2, Type_UD );
271
+ auto src = createImm (headerInitValDw2, Type_UW );
272
272
auto inst0 = createMov (g4::SIMD1, dst, src, InstOpt_WriteEnable, true );
273
273
if (getPlatform () >= Xe2 && getOption (vISA_ActiveThreadsOnlyBarrier)) {
274
274
inst0->addComment (" signal barrier payload init (active only)" );
0 commit comments