Skip to content

Commit 86a934c

Browse files
dguzhaevigcbot
authored andcommitted
SplitIndirectEEtoSel on OverflowingBinaryOperators
* Don't return on overflowing instructions without nsw/nuw but continue without updating index * Check for overflow on shl pattern * Added/updated lits
1 parent ae59de1 commit 86a934c

File tree

4 files changed

+477
-106
lines changed

4 files changed

+477
-106
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5828,15 +5828,6 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
58285828
return;
58295829
}
58305830

5831-
// ignore if index instruction is OverflowingBinaryOperator and doesn't have nsw or nuw
5832-
if (OverflowingBinaryOperator* indexOp = dyn_cast<OverflowingBinaryOperator>(index))
5833-
{
5834-
if (!indexOp->hasNoSignedWrap() && !indexOp->hasNoUnsignedWrap())
5835-
{
5836-
return;
5837-
}
5838-
}
5839-
58405831
// used to calculate offsets
58415832
int64_t add = 0;
58425833
int64_t mul = 1;
@@ -5847,26 +5838,46 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
58475838
%271 = extractelement <12 x float> %234, i32 %270
58485839
*/
58495840
Value* Val1 = nullptr;
5841+
Value* Val2 = nullptr;
58505842
ConstantInt* ci_add = nullptr;
58515843
ConstantInt* ci_mul = nullptr;
58525844

5853-
auto pat1 = m_Add(m_Mul(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
5854-
auto pat2 = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
5845+
auto pat_add = m_Add(m_Value(Val2), m_ConstantInt(ci_add));
5846+
auto pat_mul = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
58555847
// Some code shows `shl+or` instead of mul+add.
5856-
auto pat21 = m_Or(m_Shl(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
5857-
auto pat22 = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
5848+
auto pat_or = m_Or(m_Value(Val2), m_ConstantInt(ci_add));
5849+
auto pat_shl = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
58585850

5859-
if (match(index, pat1) || match(index, pat2))
5851+
if (match(index, pat_mul) || (match(index, pat_add) && match(Val2, pat_mul)))
58605852
{
5861-
add = ci_add ? ci_add->getSExtValue() : 0;
58625853
mul = ci_mul ? ci_mul->getSExtValue() : 1;
5863-
index = Val1;
58645854
}
5865-
else if (match(index, pat21) || match(index, pat22))
5855+
else if (match(index, pat_shl) || (match(index, pat_or) && match(Val2, pat_shl)))
58665856
{
5867-
add = ci_add ? ci_add->getSExtValue() : 0;
58685857
mul = ci_mul ? (1LL << ci_mul->getSExtValue()) : 1LL;
5869-
index = Val1;
5858+
}
5859+
// Instruction::hasPoisonGeneratingFlags() could be used instead
5860+
// after llvm9 support is dropped
5861+
auto hasNoOverflow = [](Value* value) {
5862+
if (OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(value))
5863+
return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
5864+
return true;
5865+
};
5866+
5867+
// If pattern matched check that corresponding index calculation has nsw or nuw
5868+
if (Val1)
5869+
{
5870+
// Transformation could still be profitable,
5871+
// but index and it's multiplier shouldn't be modified
5872+
if (!hasNoOverflow(index) || (Val2 && !hasNoOverflow(Val2)))
5873+
{
5874+
mul = 1;
5875+
}
5876+
else
5877+
{
5878+
add = ci_add ? ci_add->getSExtValue() : 0;
5879+
index = Val1;
5880+
}
58705881
}
58715882

58725883
if (!isProfitableToSplit(num, mul, add))
Lines changed: 62 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,85 @@
11
;=========================== begin_copyright_notice ============================
22
;
3-
; Copyright (C) 2022 Intel Corporation
3+
; Copyright (C) 2024 Intel Corporation
44
;
55
; SPDX-License-Identifier: MIT
66
;
77
;============================ end_copyright_notice =============================
88
;
9-
; RUN: igc_opt -enable-debugify -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s
9+
; RUN: igc_opt -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s
1010
; ------------------------------------------------
1111
; SplitIndirectEEtoSel
1212
; ------------------------------------------------
13-
; Debug-info related check
1413
;
15-
; CHECK-NOT: WARNING
16-
; CHECK: CheckModuleDebugify: PASS
17-
define void @test_nuw(i32 %src1, <12 x float> %src2, float* %dst) {
18-
; CHECK-LABEL: @test_nuw(
19-
; CHECK: [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
20-
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
21-
; CHECK: [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
22-
; CHECK: [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
23-
; CHECK: [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
24-
; CHECK: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
25-
; CHECK: [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
26-
; CHECK: [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
27-
; CHECK: [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
28-
; CHECK: [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
29-
; CHECK: [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
30-
; CHECK: [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
31-
; CHECK: store float [[TMP12]], float* [[DST:%.*]], align 4
32-
; CHECK: [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
33-
; CHECK: [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
34-
; CHECK: store float [[TMP14]], float* [[DST]], align 4
35-
; CHECK: [[TMP15:%.*]] = add i32 [[TMP18]], 2
36-
; CHECK: [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
37-
; CHECK: store float [[TMP17]], float* [[DST]], align 4
38-
; CHECK: ret void
14+
; This test checks argument and instruction as index
15+
; for extractelement for SplitIndirectEEtoSel pass
16+
; no special patterns matched
17+
; ------------------------------------------------
18+
19+
; ------------------------------------------------
20+
; Case1: index is argument, profitable
21+
; ------------------------------------------------
22+
23+
define void @test_arg(i32 %src1, <4 x float> %src2, float* %dst) {
24+
; CHECK-LABEL: @test_arg(
25+
; CHECK: [[TMP1:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
26+
; CHECK: [[TMP2:%.*]] = icmp eq i32 [[SRC1]], 0
27+
; CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
28+
; CHECK: [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float undef
29+
; CHECK: [[TMP5:%.*]] = icmp eq i32 [[SRC1]], 1
30+
; CHECK: [[TMP6:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
31+
; CHECK: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP6]], float [[TMP4]]
32+
; CHECK: [[TMP8:%.*]] = icmp eq i32 [[SRC1]], 2
33+
; CHECK: [[TMP9:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
34+
; CHECK: [[TMP10:%.*]] = select i1 [[TMP8]], float [[TMP9]], float [[TMP7]]
35+
; CHECK: [[TMP11:%.*]] = icmp eq i32 [[SRC1]], 3
36+
; CHECK: [[TMP12:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
37+
; CHECK: [[TMP13:%.*]] = select i1 [[TMP11]], float [[TMP12]], float [[TMP10]]
38+
; CHECK: store float [[TMP13]], float* [[DST:%.*]], align 4
3939
;
40-
%1 = mul nuw i32 %src1, 3
41-
%2 = extractelement <12 x float> %src2, i32 %1
42-
store float %2, float* %dst, align 4
43-
%3 = add i32 %1, 1
44-
%4 = extractelement <12 x float> %src2, i32 %3
45-
store float %4, float* %dst, align 4
46-
%5 = add i32 %1, 2
47-
%6 = extractelement <12 x float> %src2, i32 %5
48-
store float %6, float* %dst, align 4
40+
%1 = extractelement <4 x float> %src2, i32 %src1
41+
store float %1, float* %dst, align 4
4942
ret void
5043
}
5144

52-
define void @test_nsw(i32 %src1, <12 x float> %src2, float* %dst) {
53-
; CHECK-LABEL: @test_nsw(
54-
; CHECK: [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
55-
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
56-
; CHECK: [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
57-
; CHECK: [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
58-
; CHECK: [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
59-
; CHECK: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
60-
; CHECK: [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
61-
; CHECK: [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
62-
; CHECK: [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
63-
; CHECK: [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
64-
; CHECK: [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
65-
; CHECK: [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
66-
; CHECK: store float [[TMP12]], float* [[DST:%.*]], align 4
67-
; CHECK: [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
68-
; CHECK: [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
69-
; CHECK: store float [[TMP14]], float* [[DST]], align 4
70-
; CHECK: [[TMP15:%.*]] = add i32 [[TMP18]], 2
71-
; CHECK: [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
72-
; CHECK: store float [[TMP17]], float* [[DST]], align 4
73-
; CHECK: ret void
45+
; ------------------------------------------------
46+
; Case2: index is instruction, profitable
47+
; ------------------------------------------------
48+
49+
define void @test_instr(i32 %src1, <4 x float> %src2, float* %dst) {
50+
; CHECK-LABEL: @test_instr(
51+
; CHECK: [[TMP1:%.*]] = add i32 [[SRC1:%.*]], 13
52+
; CHECK: [[TMP2:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[TMP1]]
53+
; CHECK: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
54+
; CHECK: [[TMP4:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
55+
; CHECK: [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float undef
56+
; CHECK: [[TMP6:%.*]] = icmp eq i32 [[TMP1]], 1
57+
; CHECK: [[TMP7:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
58+
; CHECK: [[TMP8:%.*]] = select i1 [[TMP6]], float [[TMP7]], float [[TMP5]]
59+
; CHECK: [[TMP9:%.*]] = icmp eq i32 [[TMP1]], 2
60+
; CHECK: [[TMP10:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
61+
; CHECK: [[TMP11:%.*]] = select i1 [[TMP9]], float [[TMP10]], float [[TMP8]]
62+
; CHECK: [[TMP12:%.*]] = icmp eq i32 [[TMP1]], 3
63+
; CHECK: [[TMP13:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
64+
; CHECK: [[TMP14:%.*]] = select i1 [[TMP12]], float [[TMP13]], float [[TMP11]]
65+
; CHECK: store float [[TMP14]], float* [[DST:%.*]], align 4
7466
;
75-
%1 = mul nsw i32 %src1, 3
76-
%2 = extractelement <12 x float> %src2, i32 %1
67+
%1 = add i32 %src1, 13
68+
%2 = extractelement <4 x float> %src2, i32 %1
7769
store float %2, float* %dst, align 4
78-
%3 = add i32 %1, 1
79-
%4 = extractelement <12 x float> %src2, i32 %3
80-
store float %4, float* %dst, align 4
81-
%5 = add i32 %1, 2
82-
%6 = extractelement <12 x float> %src2, i32 %5
83-
store float %6, float* %dst, align 4
8470
ret void
8571
}
8672

87-
define void @test(i32 %src1, <12 x float> %src2, float* %dst) {
88-
; CHECK-LABEL: @test(
89-
; CHECK: [[TMP1:%.*]] = mul i32 [[SRC1:%.*]], 3
90-
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[TMP1]]
91-
; CHECK: store float [[TMP2]], float* [[DST:%.*]], align 4
92-
; CHECK: [[TMP3:%.*]] = add i32 [[TMP1]], 1
93-
; CHECK: [[TMP4:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP3]]
94-
; CHECK: store float [[TMP4]], float* [[DST]], align 4
95-
; CHECK: [[TMP5:%.*]] = add i32 [[TMP1]], 2
96-
; CHECK: [[TMP6:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP5]]
97-
; CHECK: store float [[TMP6]], float* [[DST]], align 4
98-
; CHECK: ret void
73+
; ------------------------------------------------
74+
; Case3: index is argument but transformation not profitable
75+
; ------------------------------------------------
76+
77+
define void @test_arg_not_profit(i32 %src1, <12 x float> %src2, float* %dst) {
78+
; CHECK-LABEL: @test_arg_not_profit(
79+
; CHECK: [[TMP1:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
80+
; CHECK: store float [[TMP1]], float* [[DST:%.*]], align 4
9981
;
100-
%1 = mul i32 %src1, 3
101-
%2 = extractelement <12 x float> %src2, i32 %1
102-
store float %2, float* %dst, align 4
103-
%3 = add i32 %1, 1
104-
%4 = extractelement <12 x float> %src2, i32 %3
105-
store float %4, float* %dst, align 4
106-
%5 = add i32 %1, 2
107-
%6 = extractelement <12 x float> %src2, i32 %5
108-
store float %6, float* %dst, align 4
82+
%1 = extractelement <12 x float> %src2, i32 %src1
83+
store float %1, float* %dst, align 4
10984
ret void
11085
}

0 commit comments

Comments
 (0)