Skip to content

Commit 98fae51

Browse files
dguzhaevigcbot
authored andcommitted
SplitIndirectEEtoSel set poison on idx exceeding vector length
* Set poison instead of undef when for non matched idx case from: select i1 %idx_cond, float %element0, undef to: select i1 %idx_cond, float %element0, poison * Don't return on overflowing instructions without nsw/nuw but continue without updating index * Check for overflow on shl pattern * Added/updated lits
1 parent 376355a commit 98fae51

File tree

4 files changed

+496
-106
lines changed

4 files changed

+496
-106
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5817,15 +5817,6 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
58175817
return;
58185818
}
58195819

5820-
// ignore if index instruction is OverflowingBinaryOperator and doesn't have nsw or nuw
5821-
if (OverflowingBinaryOperator* indexOp = dyn_cast<OverflowingBinaryOperator>(index))
5822-
{
5823-
if (!indexOp->hasNoSignedWrap() && !indexOp->hasNoUnsignedWrap())
5824-
{
5825-
return;
5826-
}
5827-
}
5828-
58295820
// used to calculate offsets
58305821
int64_t add = 0;
58315822
int64_t mul = 1;
@@ -5836,32 +5827,56 @@ void SplitIndirectEEtoSel::visitExtractElementInst(llvm::ExtractElementInst& I)
58365827
%271 = extractelement <12 x float> %234, i32 %270
58375828
*/
58385829
Value* Val1 = nullptr;
5830+
Value* Val2 = nullptr;
58395831
ConstantInt* ci_add = nullptr;
58405832
ConstantInt* ci_mul = nullptr;
58415833

5842-
auto pat1 = m_Add(m_Mul(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
5843-
auto pat2 = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
5834+
auto pat_add = m_Add(m_Value(Val2), m_ConstantInt(ci_add));
5835+
auto pat_mul = m_Mul(m_Value(Val1), m_ConstantInt(ci_mul));
58445836
// Some code shows `shl+or` instead of mul+add.
5845-
auto pat21 = m_Or(m_Shl(m_Value(Val1), m_ConstantInt(ci_mul)), m_ConstantInt(ci_add));
5846-
auto pat22 = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
5837+
auto pat_or = m_Or(m_Value(Val2), m_ConstantInt(ci_add));
5838+
auto pat_shl = m_Shl(m_Value(Val1), m_ConstantInt(ci_mul));
58475839

5848-
if (match(index, pat1) || match(index, pat2))
5840+
if (match(index, pat_mul) || (match(index, pat_add) && match(Val2, pat_mul)))
58495841
{
5850-
add = ci_add ? ci_add->getSExtValue() : 0;
58515842
mul = ci_mul ? ci_mul->getSExtValue() : 1;
5852-
index = Val1;
58535843
}
5854-
else if (match(index, pat21) || match(index, pat22))
5844+
else if (match(index, pat_shl) || (match(index, pat_or) && match(Val2, pat_shl)))
58555845
{
5856-
add = ci_add ? ci_add->getSExtValue() : 0;
58575846
mul = ci_mul ? (1LL << ci_mul->getSExtValue()) : 1LL;
5858-
index = Val1;
5847+
}
5848+
// Instruction::hasPoisonGeneratingFlags() could be used instead
5849+
// after llvm9 support is dropped
5850+
auto hasNoOverflow = [](Value* value) {
5851+
if (OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(value))
5852+
return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
5853+
return true;
5854+
};
5855+
5856+
// If pattern matched check that corresponding index calculation has nsw or nuw
5857+
if (Val1)
5858+
{
5859+
// Transformation could still be profitable,
5860+
// but index and it's multiplier shouldn't be modified
5861+
if (!hasNoOverflow(index) || (Val2 && !hasNoOverflow(Val2)))
5862+
{
5863+
mul = 1;
5864+
}
5865+
else
5866+
{
5867+
add = ci_add ? ci_add->getSExtValue() : 0;
5868+
index = Val1;
5869+
}
58595870
}
58605871

58615872
if (!isProfitableToSplit(num, mul, add))
58625873
return;
58635874

5875+
#if LLVM_VERSION_MAJOR < 14
58645876
Value* vTemp = llvm::UndefValue::get(eleType);
5877+
#else
5878+
Value* vTemp = llvm::PoisonValue::get(eleType);
5879+
#endif
58655880
IRBuilder<> builder(I.getNextNode());
58665881

58675882
// returns true if we can skip this icmp, such as:
Lines changed: 64 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,110 +1,87 @@
11
;=========================== begin_copyright_notice ============================
22
;
3-
; Copyright (C) 2022 Intel Corporation
3+
; Copyright (C) 2024 Intel Corporation
44
;
55
; SPDX-License-Identifier: MIT
66
;
77
;============================ end_copyright_notice =============================
88
;
9-
; RUN: igc_opt -enable-debugify -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s
9+
; RUN: igc_opt -SplitIndirectEEtoSel -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,%LLVM_DEPENDENT_CHECK_PREFIX%
1010
; ------------------------------------------------
1111
; SplitIndirectEEtoSel
1212
; ------------------------------------------------
13-
; Debug-info related check
1413
;
15-
; CHECK-NOT: WARNING
16-
; CHECK: CheckModuleDebugify: PASS
17-
define void @test_nuw(i32 %src1, <12 x float> %src2, float* %dst) {
18-
; CHECK-LABEL: @test_nuw(
19-
; CHECK: [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
20-
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
21-
; CHECK: [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
22-
; CHECK: [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
23-
; CHECK: [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
24-
; CHECK: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
25-
; CHECK: [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
26-
; CHECK: [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
27-
; CHECK: [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
28-
; CHECK: [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
29-
; CHECK: [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
30-
; CHECK: [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
31-
; CHECK: store float [[TMP12]], float* [[DST:%.*]], align 4
32-
; CHECK: [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
33-
; CHECK: [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
34-
; CHECK: store float [[TMP14]], float* [[DST]], align 4
35-
; CHECK: [[TMP15:%.*]] = add i32 [[TMP18]], 2
36-
; CHECK: [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
37-
; CHECK: store float [[TMP17]], float* [[DST]], align 4
38-
; CHECK: ret void
14+
; This test checks argument and instruction as index
15+
; for extractelement for SplitIndirectEEtoSel pass
16+
; no special patterns matched
17+
; ------------------------------------------------
18+
19+
; ------------------------------------------------
20+
; Case1: index is argument, profitable
21+
; ------------------------------------------------
22+
23+
define void @test_arg(i32 %src1, <4 x float> %src2, float* %dst) {
24+
; CHECK-LABEL: @test_arg(
25+
; CHECK: [[TMP1:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
26+
; CHECK: [[TMP2:%.*]] = icmp eq i32 [[SRC1]], 0
27+
; CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
28+
; CHECK-LLVM-14-PLUS: [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float poison
29+
; CHECK-PRE-LLVM-14: [[TMP4:%.*]] = select i1 [[TMP2]], float [[TMP3]], float undef
30+
; CHECK: [[TMP5:%.*]] = icmp eq i32 [[SRC1]], 1
31+
; CHECK: [[TMP6:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
32+
; CHECK: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP6]], float [[TMP4]]
33+
; CHECK: [[TMP8:%.*]] = icmp eq i32 [[SRC1]], 2
34+
; CHECK: [[TMP9:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
35+
; CHECK: [[TMP10:%.*]] = select i1 [[TMP8]], float [[TMP9]], float [[TMP7]]
36+
; CHECK: [[TMP11:%.*]] = icmp eq i32 [[SRC1]], 3
37+
; CHECK: [[TMP12:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
38+
; CHECK: [[TMP13:%.*]] = select i1 [[TMP11]], float [[TMP12]], float [[TMP10]]
39+
; CHECK: store float [[TMP13]], float* [[DST:%.*]], align 4
3940
;
40-
%1 = mul nuw i32 %src1, 3
41-
%2 = extractelement <12 x float> %src2, i32 %1
42-
store float %2, float* %dst, align 4
43-
%3 = add i32 %1, 1
44-
%4 = extractelement <12 x float> %src2, i32 %3
45-
store float %4, float* %dst, align 4
46-
%5 = add i32 %1, 2
47-
%6 = extractelement <12 x float> %src2, i32 %5
48-
store float %6, float* %dst, align 4
41+
%1 = extractelement <4 x float> %src2, i32 %src1
42+
store float %1, float* %dst, align 4
4943
ret void
5044
}
5145

52-
define void @test_nsw(i32 %src1, <12 x float> %src2, float* %dst) {
53-
; CHECK-LABEL: @test_nsw(
54-
; CHECK: [[TMP1:%.*]] = icmp eq i32 [[SRC1:%.*]], 0
55-
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 0
56-
; CHECK: [[TMP3:%.*]] = select i1 [[TMP1]], float [[TMP2]], float undef
57-
; CHECK: [[TMP4:%.*]] = icmp eq i32 [[SRC1]], 1
58-
; CHECK: [[TMP5:%.*]] = extractelement <12 x float> [[SRC2]], i32 3
59-
; CHECK: [[TMP6:%.*]] = select i1 [[TMP4]], float [[TMP5]], float [[TMP3]]
60-
; CHECK: [[TMP7:%.*]] = icmp eq i32 [[SRC1]], 2
61-
; CHECK: [[TMP8:%.*]] = extractelement <12 x float> [[SRC2]], i32 6
62-
; CHECK: [[TMP9:%.*]] = select i1 [[TMP7]], float [[TMP8]], float [[TMP6]]
63-
; CHECK: [[TMP10:%.*]] = icmp eq i32 [[SRC1]], 3
64-
; CHECK: [[TMP11:%.*]] = extractelement <12 x float> [[SRC2]], i32 9
65-
; CHECK: [[TMP12:%.*]] = select i1 [[TMP10]], float [[TMP11]], float [[TMP9]]
66-
; CHECK: store float [[TMP12]], float* [[DST:%.*]], align 4
67-
; CHECK: [[TMP13:%.*]] = add i32 [[TMP18:%.*]], 1
68-
; CHECK: [[TMP14:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP13]]
69-
; CHECK: store float [[TMP14]], float* [[DST]], align 4
70-
; CHECK: [[TMP15:%.*]] = add i32 [[TMP18]], 2
71-
; CHECK: [[TMP17:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP15]]
72-
; CHECK: store float [[TMP17]], float* [[DST]], align 4
73-
; CHECK: ret void
46+
; ------------------------------------------------
47+
; Case2: index is instruction, profitable
48+
; ------------------------------------------------
49+
50+
define void @test_instr(i32 %src1, <4 x float> %src2, float* %dst) {
51+
; CHECK-LABEL: @test_instr(
52+
; CHECK: [[TMP1:%.*]] = add i32 [[SRC1:%.*]], 13
53+
; CHECK: [[TMP2:%.*]] = extractelement <4 x float> [[SRC2:%.*]], i32 [[TMP1]]
54+
; CHECK: [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
55+
; CHECK: [[TMP4:%.*]] = extractelement <4 x float> [[SRC2]], i32 0
56+
; CHECK-LLVM-14-PLUS: [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float poison
57+
; CHECK-PRE-LLVM-14: [[TMP5:%.*]] = select i1 [[TMP3]], float [[TMP4]], float undef
58+
; CHECK: [[TMP6:%.*]] = icmp eq i32 [[TMP1]], 1
59+
; CHECK: [[TMP7:%.*]] = extractelement <4 x float> [[SRC2]], i32 1
60+
; CHECK: [[TMP8:%.*]] = select i1 [[TMP6]], float [[TMP7]], float [[TMP5]]
61+
; CHECK: [[TMP9:%.*]] = icmp eq i32 [[TMP1]], 2
62+
; CHECK: [[TMP10:%.*]] = extractelement <4 x float> [[SRC2]], i32 2
63+
; CHECK: [[TMP11:%.*]] = select i1 [[TMP9]], float [[TMP10]], float [[TMP8]]
64+
; CHECK: [[TMP12:%.*]] = icmp eq i32 [[TMP1]], 3
65+
; CHECK: [[TMP13:%.*]] = extractelement <4 x float> [[SRC2]], i32 3
66+
; CHECK: [[TMP14:%.*]] = select i1 [[TMP12]], float [[TMP13]], float [[TMP11]]
67+
; CHECK: store float [[TMP14]], float* [[DST:%.*]], align 4
7468
;
75-
%1 = mul nsw i32 %src1, 3
76-
%2 = extractelement <12 x float> %src2, i32 %1
69+
%1 = add i32 %src1, 13
70+
%2 = extractelement <4 x float> %src2, i32 %1
7771
store float %2, float* %dst, align 4
78-
%3 = add i32 %1, 1
79-
%4 = extractelement <12 x float> %src2, i32 %3
80-
store float %4, float* %dst, align 4
81-
%5 = add i32 %1, 2
82-
%6 = extractelement <12 x float> %src2, i32 %5
83-
store float %6, float* %dst, align 4
8472
ret void
8573
}
8674

87-
define void @test(i32 %src1, <12 x float> %src2, float* %dst) {
88-
; CHECK-LABEL: @test(
89-
; CHECK: [[TMP1:%.*]] = mul i32 [[SRC1:%.*]], 3
90-
; CHECK: [[TMP2:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[TMP1]]
91-
; CHECK: store float [[TMP2]], float* [[DST:%.*]], align 4
92-
; CHECK: [[TMP3:%.*]] = add i32 [[TMP1]], 1
93-
; CHECK: [[TMP4:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP3]]
94-
; CHECK: store float [[TMP4]], float* [[DST]], align 4
95-
; CHECK: [[TMP5:%.*]] = add i32 [[TMP1]], 2
96-
; CHECK: [[TMP6:%.*]] = extractelement <12 x float> [[SRC2]], i32 [[TMP5]]
97-
; CHECK: store float [[TMP6]], float* [[DST]], align 4
98-
; CHECK: ret void
75+
; ------------------------------------------------
76+
; Case3: index is argument but transformation not profitable
77+
; ------------------------------------------------
78+
79+
define void @test_arg_not_profit(i32 %src1, <12 x float> %src2, float* %dst) {
80+
; CHECK-LABEL: @test_arg_not_profit(
81+
; CHECK: [[TMP1:%.*]] = extractelement <12 x float> [[SRC2:%.*]], i32 [[SRC1:%.*]]
82+
; CHECK: store float [[TMP1]], float* [[DST:%.*]], align 4
9983
;
100-
%1 = mul i32 %src1, 3
101-
%2 = extractelement <12 x float> %src2, i32 %1
102-
store float %2, float* %dst, align 4
103-
%3 = add i32 %1, 1
104-
%4 = extractelement <12 x float> %src2, i32 %3
105-
store float %4, float* %dst, align 4
106-
%5 = add i32 %1, 2
107-
%6 = extractelement <12 x float> %src2, i32 %5
108-
store float %6, float* %dst, align 4
84+
%1 = extractelement <12 x float> %src2, i32 %src1
85+
store float %1, float* %dst, align 4
10986
ret void
11087
}

0 commit comments

Comments
 (0)