Skip to content

Commit f8b61aa

Browse files
bowenxue-inteligcbot
authored andcommitted
WaveShuffleIndexSinking Bugfix and LIT coverage
Enhance pass to handle commutative ops, fix bug with lastAnchorIdx, and add/modify LIT tests to get more code coverage
1 parent 0145d7e commit f8b61aa

File tree

4 files changed

+154
-29
lines changed

4 files changed

+154
-29
lines changed

IGC/Compiler/Optimizer/WaveShuffleIndexSinking.cpp

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,6 @@ namespace IGC
7373
// Attempt to match a new WaveShuffleIndex instruction to this ShuffleGroup
7474
bool match( WaveShuffleIndexIntrinsic* shuffleInst )
7575
{
76-
// ensure same source operand
77-
if( shuffleInst->getSrc() != ShuffleOps.front()->getSrc() )
78-
return false;
79-
// ensures lane/channel is uniform and WaveShuffleIndex is a broadcast operation
80-
if( !isa<ConstantInt>( shuffleInst->getChannel() ) )
81-
return false;
82-
8376
if( ShuffleOps.size() == 1 )
8477
{
8578
// Attempting to match with fresh ShuffleGroup, match the maximal number of instructions
@@ -270,24 +263,31 @@ namespace IGC
270263
// %3 = shl i32 %1, 2 <- anchorHoistedInst (Anchor path)
271264
// %4 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %2, i32 0, i32 0)
272265
// %5 = add i32 %4, %2 <- Anchor
266+
// Find the operand that originates from outside the chain to use in anchorHoistedInst
273267
auto* anchorOp0 = InstChains.front()[ anchorIdx ]->getOperand( 0 );
274268
auto* anchorOp1 = InstChains.front()[ anchorIdx ]->getOperand( 1 );
275269
Instruction* anchorOpPrev = ( anchorIdx == 0 ) ? cast<Instruction>( ShuffleOps.front() ) : InstChains.front()[ anchorIdx - 1 ];
276270
if( anchorOp0 == anchorOpPrev )
277271
{
278272
anchorHoistedInst->setOperand( chainOpIdx, anchorOp1 );
279-
for( auto& instChain : InstChains )
280-
{
281-
instChain[ anchorIdx ]->setOperand( 1, anchorHoistedInst );
282-
}
283-
284273
}
285274
else
286275
{
287276
anchorHoistedInst->setOperand( chainOpIdx, anchorOp0 );
288-
for( auto& instChain : InstChains )
277+
}
278+
279+
// Properly set the anchor instructions in all chains to use the new anchorHoistedInst
280+
for( unsigned i = 0; i < ShuffleOps.size(); i++ )
281+
{
282+
auto* anchorOp0 = InstChains[ i ][ anchorIdx ]->getOperand( 0 );
283+
Instruction* anchorOpPrev = ( anchorIdx == 0 ) ? cast<Instruction>( ShuffleOps[ i ] ) : InstChains[ i ][ anchorIdx - 1 ];
284+
if( anchorOp0 == anchorOpPrev )
289285
{
290-
instChain[ anchorIdx ]->setOperand( 0, anchorHoistedInst );
286+
InstChains[ i ][ anchorIdx ]->setOperand( 1, anchorHoistedInst );
287+
}
288+
else
289+
{
290+
InstChains[ i ][ anchorIdx ]->setOperand( 0, anchorHoistedInst );
291291
}
292292
}
293293
}
@@ -326,7 +326,7 @@ namespace IGC
326326
Instruction* rewirePrev = ( rewireIdx == 0 ) ? cast<Instruction>(ShuffleOps[ i ]) : InstChains[ i ][ rewireIdx - 1 ];
327327
unsigned rewireOpIdx = InstChains[ i ][ rewireIdx ]->getOperand( 0 ) == rewirePrev ? 0 : 1;
328328
InstChains[ i ][ rewireIdx ]->setOperand( rewireOpIdx, lastAnchor );
329-
lastAnchorIdx++;
329+
lastAnchorIdx = rewireIdx;
330330
}
331331
}
332332

@@ -586,25 +586,31 @@ unsigned WaveShuffleIndexSinkingImpl::compareWaveShuffleIndexes( WaveShuffleInde
586586
break;
587587

588588
// Check that both operands match
589-
// TODO: could be less restrictive for commutative instructions
590589
auto* opA0 = instA->getOperand( 0 );
591590
auto* opA1 = instA->getOperand( 1 );
592591
auto* opB0 = instB->getOperand( 0 );
593592
auto* opB1 = instB->getOperand( 1 );
594593

595-
if( opA0 == curInstA && opB0 == curInstB )
594+
if( instA->isCommutative() )
596595
{
597-
if( opA1 != opB1 )
598-
break;
599-
}
600-
else if( opA1 == curInstA && opB1 == curInstB )
601-
{
602-
if( opA0 != opB0 )
596+
// covers all four cases
597+
// ex.
598+
// add i32 %ws1, %a | add i32 %a, %ws1
599+
// ... | ...
600+
// add i32 %ws2, %a | add i32 %a, %ws2
601+
//-------------------|-----------------
602+
// add i32 %ws1, %a | add i32 %a, %ws1
603+
// ... | ...
604+
// add i32 %a, %ws2 | add i32 %ws2, %a
605+
if( !( opA0 == curInstA && opB0 == curInstB && opA1 == opB1 ) && !( opA0 == curInstA && opB1 == curInstB && opA1 == opB0 ) &&
606+
!( opA1 == curInstA && opB0 == curInstB && opA0 == opB1 ) && !( opA1 == curInstA && opB1 == curInstB && opA0 == opB0 ) )
603607
break;
604608
}
605609
else
606610
{
607-
break;
611+
// covers the 2 cases in row 1 above
612+
if( !( opA0 == curInstA && opB0 == curInstB && opA1 == opB1 ) && !( opA1 == curInstA && opB1 == curInstB && opA0 == opB0 ) )
613+
break;
608614
}
609615

610616
if( isHoistable( instA ) )
@@ -666,6 +672,7 @@ bool WaveShuffleIndexSinkingImpl::isHoistableOverAnchor( BinaryOperator* instToH
666672

667673
// X & (Y | Z) <--> (X & Y) | (X & Z)
668674
// X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
675+
// In practice, FirstOp is unlikely to be And, Or, or Xor as they would themselves be hoistable and thus, never an anchor inst
669676
if( SecondOp == Instruction::And )
670677
return FirstOp == Instruction::Or || FirstOp == Instruction::Xor;
671678

IGC/Compiler/tests/WaveShuffleIndexSinking/basic.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
;
1212
; Verifies that four WaveShuffleIndex instructions with the same source and a constant channel get subsequent instructions checked and hoisted
1313
; Each WaveShuffleIndex instruction is in turn fed into an add, and then a shl
14-
; The second operand of the add is not a constant, so the add is considered an anchor instruction
14+
; The first operand of the add is not a constant, so the add is considered an anchor instruction
1515
; The second operand of the shl is a constant, so the shl is considered a hoistable instruction
1616
; Due to distributive properties, the shl is allowed to be hoisted above the add, and afterwards, above all the WaveShuffleIndex instructions
1717
; Since there are 4 WaveShuffleIndex instructions in the ShuffleGroup, we can trade a shl on the source of the WaveShuffleIndex and a shl on the second operand of the add for removing all 4 shl instructions operating on the result of each add
@@ -23,8 +23,8 @@ define void @test_wave_shuffle_index_sinking(i32* %dst0, i32* %dst1, i32* %dst2,
2323
; CHECK: [[WS0:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 0, i32 0)
2424
%ws0 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
2525
; CHECK: [[ANCHOR_HOISTED:%.*]] = shl i32 %b, 2
26-
; CHECK-NEXT: [[ANCHOR0:%.*]] = add i32 [[WS0]], [[ANCHOR_HOISTED]]
27-
%add0 = add i32 %ws0, %b
26+
; CHECK-NEXT: [[ANCHOR0:%.*]] = add i32 [[ANCHOR_HOISTED]], [[WS0]]
27+
%add0 = add i32 %b, %ws0
2828
%shl0 = shl i32 %add0, 2
2929
; CHECK: store i32 [[ANCHOR0]], i32* %dst0
3030
store i32 %shl0, i32* %dst0
@@ -37,8 +37,8 @@ define void @test_wave_shuffle_index_sinking(i32* %dst0, i32* %dst1, i32* %dst2,
3737
store i32 %shl1, i32* %dst1
3838
; CHECK: [[WS2:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 2, i32 0)
3939
%ws2 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 2, i32 0)
40-
; CHECK: [[ANCHOR2:%.*]] = add i32 [[WS2]], [[ANCHOR_HOISTED]]
41-
%add2 = add i32 %ws2, %b
40+
; CHECK: [[ANCHOR2:%.*]] = add i32 [[ANCHOR_HOISTED]], [[WS2]]
41+
%add2 = add i32 %b, %ws2
4242
%shl2 = shl i32 %add2, 2
4343
; CHECK: store i32 [[ANCHOR2]], i32* %dst2
4444
store i32 %shl2, i32* %dst2
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; Test distributive property for commutative mul instruction, and also test unhoistable add with constant operand since add does not distribute over mul
13+
; However, without any preceding anchor, add i32 %ws, 1 is hoistable since it is just an add with a constant
14+
; ------------------------------------------------
15+
16+
define void @test_compare_cases(i32* %dst0, i32* %dst1, i32* %dst2, i32* %dst3, i32* %dst4, i32* %dst5, i32 %a, i32 %b) {
17+
; CHECK: [[HOISTED:%.*]] = mul i32 %a, 2
18+
; CHECK: [[WS0:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 0, i32 0)
19+
%ws0 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
20+
; CHECK: [[ANCHOR0_HOISTED:%.*]] = mul i32 %b, 2
21+
; CHECK: [[ANCHOR0_WS0:%.*]] = add i32 [[ANCHOR0_HOISTED]], [[WS0]]
22+
%add0 = add i32 %b, %ws0
23+
%mul0 = mul i32 %add0, 2
24+
; CHECK: [[ANCHOR1_WS0:%.*]] = add i32 [[ANCHOR0_WS0]], 1
25+
%add0_2 = add i32 %mul0, 1
26+
; CHECK: store i32 [[ANCHOR1_WS0]], i32* %dst0
27+
store i32 %add0_2, i32* %dst0
28+
; CHECK: [[WS1:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 1, i32 0)
29+
%ws1 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 1, i32 0)
30+
; CHECK: [[ANCHOR0_WS1:%.*]] = add i32 [[ANCHOR0_HOISTED]], [[WS1]]
31+
%add1 = add i32 %b, %ws1
32+
%mul1 = mul i32 %add1, 2
33+
; CHECK: [[ANCHOR1_WS1:%.*]] = add i32 [[ANCHOR0_WS1]], 1
34+
%add1_2 = add i32 %mul1, 1
35+
; CHECK: store i32 [[ANCHOR1_WS1]], i32* %dst1
36+
store i32 %add1_2, i32* %dst1
37+
; CHECK: [[WS2:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 2, i32 0)
38+
%ws2 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 2, i32 0)
39+
; CHECK: [[ANCHOR0_WS2:%.*]] = add i32 [[ANCHOR0_HOISTED]], [[WS2]]
40+
%add2 = add i32 %b, %ws2
41+
%mul2 = mul i32 %add2, 2
42+
; CHECK: [[ANCHOR1_WS2:%.*]] = add i32 [[ANCHOR0_WS2]], 1
43+
%add2_2 = add i32 %mul2, 1
44+
; CHECK: store i32 [[ANCHOR1_WS2]], i32* %dst2
45+
store i32 %add2_2, i32* %dst2
46+
; CHECK: [[WS3:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED]], i32 3, i32 0)
47+
%ws3 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 3, i32 0)
48+
; CHECK: [[ANCHOR0_WS3:%.*]] = add i32 [[ANCHOR0_HOISTED]], [[WS3]]
49+
%add3 = add i32 %b, %ws3
50+
%mul3 = mul i32 %add3, 2
51+
; CHECK: [[ANCHOR1_WS3:%.*]] = add i32 [[ANCHOR0_WS3]], 1
52+
%add3_2 = add i32 %mul3, 1
53+
; CHECK: store i32 [[ANCHOR1_WS3]], i32* %dst3
54+
store i32 %add3_2, i32* %dst3
55+
56+
; CHECK: [[HOISTED2:%.*]] = add i32 %a, 4
57+
; CHECK: [[WS4:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED2]], i32 4, i32 0)
58+
%ws4 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 4, i32 0)
59+
%add4 = add i32 %ws4, 4
60+
; CHECK: store i32 [[WS4]], i32* %dst4
61+
store i32 %add4, i32* %dst4
62+
; CHECK: [[WS5:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 [[HOISTED2]], i32 5, i32 0)
63+
%ws5 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 5, i32 0)
64+
%add5 = add i32 %ws5, 4
65+
; CHECK: store i32 [[WS5]], i32* %dst5
66+
store i32 %add5, i32* %dst5
67+
ret void
68+
}
69+
70+
; Function Attrs: convergent nounwind readnone
71+
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #0
72+
73+
attributes #0 = { convergent nounwind readnone }
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
; RUN: igc_opt -igc-wave-shuffle-index-sinking -S < %s | FileCheck %s
9+
; ------------------------------------------------
10+
; WaveShuffleIndexSinking
11+
;
12+
; Unprofitable to hoist the shl up even though it is allowed, because doing so will add 3 shl in exchange for removing 2 shl
13+
; ------------------------------------------------
14+
15+
define void @test_unprofitable(i32* %dst0, i32* %dst1, i32 %a, i32 %b, i32 %c) {
16+
; CHECK-NOT: shl i32 %a, 2
17+
; CHECK: [[WS0:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
18+
%ws0 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 0, i32 0)
19+
; CHECK-NOT: shl i32 %b, 2
20+
; CHECK: [[ANCHOR0_WS0:%.*]] = add i32 [[WS0]], %b
21+
%add0 = add i32 %ws0, %b
22+
; CHECK-NOT: shl i32 %c, 2
23+
; CHECK: [[ANCHOR1_WS0:%.*]] = mul i32 [[ANCHOR0_WS0]], %c
24+
%mul0 = mul i32 %add0, %c
25+
; CHECK: [[UNPROFITABLE_WS0:%.*]] = shl i32 [[ANCHOR1_WS0]], 2
26+
%shl0 = shl i32 %mul0, 2
27+
; CHECK: store i32 [[UNPROFITABLE_WS0]], i32* %dst0
28+
store i32 %shl0, i32* %dst0
29+
; CHECK: [[WS1:%.*]] = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 1, i32 0)
30+
%ws1 = call i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32 %a, i32 1, i32 0)
31+
; CHECK: [[ANCHOR0_WS1:%.*]] = add i32 [[WS1]], %b
32+
%add1 = add i32 %ws1, %b
33+
; CHECK: [[ANCHOR1_WS1:%.*]] = mul i32 [[ANCHOR0_WS1]], %c
34+
%mul1 = mul i32 %add1, %c
35+
; CHECK: [[UNPROFITABLE_WS1:%.*]] = shl i32 [[ANCHOR1_WS1]], 2
36+
%shl1 = shl i32 %mul1, 2
37+
; CHECK: store i32 [[UNPROFITABLE_WS1]], i32* %dst1
38+
store i32 %shl1, i32* %dst1
39+
ret void
40+
}
41+
42+
; Function Attrs: convergent nounwind readnone
43+
declare i32 @llvm.genx.GenISA.WaveShuffleIndex.i32(i32, i32, i32) #0
44+
45+
attributes #0 = { convergent nounwind readnone }

0 commit comments

Comments
 (0)