Skip to content

Commit 85512dd

Browse files
dguzhaevigcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: 98fae51
SplitIndirectEEtoSel set poison on idx exceeding vector length * Set poison instead of undef when for non matched idx case from: select i1 %idx_cond, float %element0, undef to: select i1 %idx_cond, float %element0, poison * Don't return on overflowing instructions without nsw/nuw but continue without updating index * Check for overflow on shl pattern * Added/updated lits
1 parent cff95df commit 85512dd

File tree

4 files changed

+106
-496
lines changed

4 files changed

+106
-496
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5828,6 +5828,15 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
58285828
return;
58295829
}
58305830

5831+
// ignore if index instruction is OverflowingBinaryOperator and doesn't have nsw or nuw
5832+
if (OverflowingBinaryOperator* indexOp = dyn_cast<OverflowingBinaryOperator>(index))
5833+
{
5834+
if (!indexOp->hasNoSignedWrap() && !indexOp->hasNoUnsignedWrap())
5835+
{
5836+
return;
5837+
}
5838+
}
5839+
58315840
// used to calculate offsets
58325841
int64_t add = 0;
58335842
int64_t mul = 1;
@@ -5838,56 +5847,32 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
58385847
%271 = extractelement <12 x float> %234, i32 %270
58395848
*/
58405849
Value* Val1 = nullptr;
5841-
Value* Val2 = nullptr;
58425850
ConstantInt* ci_add = nullptr;
58435851
ConstantInt* ci_mul = nullptr;
58445852

5845-
auto pat_add = m_Add(m_Value(Val2), m_ConstantInt(ci_add));
5846-
auto pat_mul = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
5853+
auto pat1 = m_Add(m_Mul(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
5854+
auto pat2 = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
58475855
// Some code shows `shl+or` instead of mul+add.
5848-
auto pat_or = m_Or(m_Value(Val2), m_ConstantInt(ci_add));
5849-
auto pat_shl = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
5856+
auto pat21 = m_Or(m_Shl(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
5857+
auto pat22 = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
58505858

5851-
if (match(index, pat_mul) || (match(index, pat_add) && match(Val2, pat_mul)))
5859+
if (match(index, pat1) || match(index, pat2))
58525860
{
5861+
add = ci_add ? ci_add->getSExtValue() : 0;
58535862
mul = ci_mul ? ci_mul->getSExtValue() : 1;
5863+
index = Val1;
58545864
}
5855-
else if (match(index, pat_shl) || (match(index, pat_or) && match(Val2, pat_shl)))
5865+
else if (match(index, pat21) || match(index, pat22))
58565866
{
5867+
add = ci_add ? ci_add->getSExtValue() : 0;
58575868
mul = ci_mul ? (1LL << ci_mul->getSExtValue()) : 1LL;
5858-
}
5859-
// Instruction::hasPoisonGeneratingFlags() could be used instead
5860-
// after llvm9 support is dropped
5861-
auto hasNoOverflow = [](Value* value) {
5862-
if (OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(value))
5863-
return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
5864-
return true;
5865-
};
5866-
5867-
// If pattern matched check that corresponding index calculation has nsw or nuw
5868-
if (Val1)
5869-
{
5870-
// Transformation could still be profitable,
5871-
// but index and it's multiplier shouldn't be modified
5872-
if (!hasNoOverflow(index) || (Val2 && !hasNoOverflow(Val2)))
5873-
{
5874-
mul = 1;
5875-
}
5876-
else
5877-
{
5878-
add = ci_add ? ci_add->getSExtValue() : 0;
5879-
index = Val1;
5880-
}
5869+
index = Val1;
58815870
}
58825871

58835872
if (!isProfitableToSplit(num, mul, add))
58845873
return;
58855874

5886-
#if LLVM_VERSION_MAJOR < 14
58875875
Value* vTemp = llvm::UndefValue::get(eleType);
5888-
#else
5889-
Value* vTemp = llvm::PoisonValue::get(eleType);
5890-
#endif
58915876
IRBuilder<> builder(I.getNextNode());
58925877

58935878
// returns true if we can skip this icmp, such as:
Lines changed: 87 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,110 @@
11
;=========================== begin_copyright_notice ============================
22
;
3-
; Copyright (C) 2024 Intel Corporation
3+
; Copyright (C) 2022 Intel Corporation
44
;
55
; SPDX-License-Identifier: MIT
66
;
77
;============================ end_copyright_notice =============================
88
;
9-
; RUN: igc_opt -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
9+
; RUN: igc_opt -enable-debugify -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s
1010
; ------------------------------------------------
1111
; SplitIndirectEEtoSel
1212
; ------------------------------------------------
13+
; Debug-info related check
1314
;
14-
; This test checks argument and instruction as index
15-
; for extractelement for SplitIndirectEEtoSel pass
16-
; no special patterns matched
17-
; ------------------------------------------------
18-
19-
; ------------------------------------------------
20-
; Case1: index is argument, profitable
21-
; ------------------------------------------------
22-
23-
define void @test_arg(i32 %src1, <4 x float> %src2, float* %dst) {
24-
; CHECK-LABEL: @test_arg(
25-
; CHECK: [[TMP1:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
26-
; CHECK: [[TMP2:%.*]] = icmp eq i32 [[SRC1]], 0
27-
; CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
28-
; CHECK-LLVM-14-PLUS: [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float poison
29-
; CHECK-PRE-LLVM-14: [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float undef
30-
; CHECK: [[TMP5:%.*]] = icmp eq i32 [[SRC1]], 1
31-
; CHECK: [[TMP6:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
32-
; CHECK: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP6]], float [[TMP4]]
33-
; CHECK: [[TMP8:%.*]] = icmp eq i32 [[SRC1]], 2
34-
; CHECK: [[TMP9:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
35-
; CHECK: [[TMP10:%.*]] = select i1 [[TMP8]], float [[TMP9]], float [[TMP7]]
36-
; CHECK: [[TMP11:%.*]] = icmp eq i32 [[SRC1]], 3
37-
; CHECK: [[TMP12:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
38-
; CHECK: [[TMP13:%.*]] = select i1 [[TMP11]], float [[TMP12]], float [[TMP10]]
39-
; CHECK: store float [[TMP13]], float* [[DST:%.*]], align 4
15+
; CHECK-NOT: WARNING
16+
; CHECK: CheckModuleDebugify: PASS
17+
define void @test_nuw(i32 %src1, <12 x float> %src2, float* %dst) {
18+
; CHECK-LABEL: @test_nuw(
19+
; CHECK: [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
20+
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
21+
; CHECK: [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
22+
; CHECK: [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
23+
; CHECK: [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
24+
; CHECK: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
25+
; CHECK: [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
26+
; CHECK: [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
27+
; CHECK: [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
28+
; CHECK: [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
29+
; CHECK: [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
30+
; CHECK: [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
31+
; CHECK: store float [[TMP12]], float* [[DST:%.*]], align 4
32+
; CHECK: [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
33+
; CHECK: [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
34+
; CHECK: store float [[TMP14]], float* [[DST]], align 4
35+
; CHECK: [[TMP15:%.*]] = add i32 [[TMP18]], 2
36+
; CHECK: [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
37+
; CHECK: store float [[TMP17]], float* [[DST]], align 4
38+
; CHECK: ret void
4039
;
41-
%1 = extractelement <4 x float> %src2, i32 %src1
42-
store float %1, float* %dst, align 4
40+
%1 = mul nuw i32 %src1, 3
41+
%2 = extractelement <12 x float> %src2, i32 %1
42+
store float %2, float* %dst, align 4
43+
%3 = add i32 %1, 1
44+
%4 = extractelement <12 x float> %src2, i32 %3
45+
store float %4, float* %dst, align 4
46+
%5 = add i32 %1, 2
47+
%6 = extractelement <12 x float> %src2, i32 %5
48+
store float %6, float* %dst, align 4
4349
ret void
4450
}
4551

46-
; ------------------------------------------------
47-
; Case2: index is instruction, profitable
48-
; ------------------------------------------------
49-
50-
define void @test_instr(i32 %src1, <4 x float> %src2, float* %dst) {
51-
; CHECK-LABEL: @test_instr(
52-
; CHECK: [[TMP1:%.*]] = add i32 [[SRC1:%.*]], 13
53-
; CHECK: [[TMP2:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[TMP1]]
54-
; CHECK: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
55-
; CHECK: [[TMP4:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
56-
; CHECK-LLVM-14-PLUS: [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float poison
57-
; CHECK-PRE-LLVM-14: [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float undef
58-
; CHECK: [[TMP6:%.*]] = icmp eq i32 [[TMP1]], 1
59-
; CHECK: [[TMP7:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
60-
; CHECK: [[TMP8:%.*]] = select i1 [[TMP6]], float [[TMP7]], float [[TMP5]]
61-
; CHECK: [[TMP9:%.*]] = icmp eq i32 [[TMP1]], 2
62-
; CHECK: [[TMP10:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
63-
; CHECK: [[TMP11:%.*]] = select i1 [[TMP9]], float [[TMP10]], float [[TMP8]]
64-
; CHECK: [[TMP12:%.*]] = icmp eq i32 [[TMP1]], 3
65-
; CHECK: [[TMP13:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
66-
; CHECK: [[TMP14:%.*]] = select i1 [[TMP12]], float [[TMP13]], float [[TMP11]]
67-
; CHECK: store float [[TMP14]], float* [[DST:%.*]], align 4
52+
define void @test_nsw(i32 %src1, <12 x float> %src2, float* %dst) {
53+
; CHECK-LABEL: @test_nsw(
54+
; CHECK: [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
55+
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
56+
; CHECK: [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
57+
; CHECK: [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
58+
; CHECK: [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
59+
; CHECK: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
60+
; CHECK: [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
61+
; CHECK: [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
62+
; CHECK: [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
63+
; CHECK: [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
64+
; CHECK: [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
65+
; CHECK: [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
66+
; CHECK: store float [[TMP12]], float* [[DST:%.*]], align 4
67+
; CHECK: [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
68+
; CHECK: [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
69+
; CHECK: store float [[TMP14]], float* [[DST]], align 4
70+
; CHECK: [[TMP15:%.*]] = add i32 [[TMP18]], 2
71+
; CHECK: [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
72+
; CHECK: store float [[TMP17]], float* [[DST]], align 4
73+
; CHECK: ret void
6874
;
69-
%1 = add i32 %src1, 13
70-
%2 = extractelement <4 x float> %src2, i32 %1
75+
%1 = mul nsw i32 %src1, 3
76+
%2 = extractelement <12 x float> %src2, i32 %1
7177
store float %2, float* %dst, align 4
78+
%3 = add i32 %1, 1
79+
%4 = extractelement <12 x float> %src2, i32 %3
80+
store float %4, float* %dst, align 4
81+
%5 = add i32 %1, 2
82+
%6 = extractelement <12 x float> %src2, i32 %5
83+
store float %6, float* %dst, align 4
7284
ret void
7385
}
7486

75-
; ------------------------------------------------
76-
; Case3: index is argument but transformation not profitable
77-
; ------------------------------------------------
78-
79-
define void @test_arg_not_profit(i32 %src1, <12 x float> %src2, float* %dst) {
80-
; CHECK-LABEL: @test_arg_not_profit(
81-
; CHECK: [[TMP1:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
82-
; CHECK: store float [[TMP1]], float* [[DST:%.*]], align 4
87+
define void @test(i32 %src1, <12 x float> %src2, float* %dst) {
88+
; CHECK-LABEL: @test(
89+
; CHECK: [[TMP1:%.*]] = mul i32 [[SRC1:%.*]], 3
90+
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[TMP1]]
91+
; CHECK: store float [[TMP2]], float* [[DST:%.*]], align 4
92+
; CHECK: [[TMP3:%.*]] = add i32 [[TMP1]], 1
93+
; CHECK: [[TMP4:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP3]]
94+
; CHECK: store float [[TMP4]], float* [[DST]], align 4
95+
; CHECK: [[TMP5:%.*]] = add i32 [[TMP1]], 2
96+
; CHECK: [[TMP6:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP5]]
97+
; CHECK: store float [[TMP6]], float* [[DST]], align 4
98+
; CHECK: ret void
8399
;
84-
%1 = extractelement <12 x float> %src2, i32 %src1
85-
store float %1, float* %dst, align 4
100+
%1 = mul i32 %src1, 3
101+
%2 = extractelement <12 x float> %src2, i32 %1
102+
store float %2, float* %dst, align 4
103+
%3 = add i32 %1, 1
104+
%4 = extractelement <12 x float> %src2, i32 %3
105+
store float %4, float* %dst, align 4
106+
%5 = add i32 %1, 2
107+
%6 = extractelement <12 x float> %src2, i32 %5
108+
store float %6, float* %dst, align 4
86109
ret void
87110
}

0 commit comments

Comments
 (0)