Skip to content

Commit 5d940b7

Browse files
committed
Reapply "SROA: Enhance speculateSelectInstLoads"
Originally committed as ffc3fb6 Reverted in fcf2d5f due to an assertion failure. Original commit message: Allow the folding even if there is an intervening bitcast. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D106667
1 parent b7611ad commit 5d940b7

File tree

3 files changed

+101
-19
lines changed

3 files changed

+101
-19
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,14 +1330,21 @@ static void speculatePHINodeLoads(PHINode &PN) {
13301330
/// %V = select i1 %cond, i32 %V1, i32 %V2
13311331
///
13321332
/// We can do this to a select if its only uses are loads and if the operand
1333-
/// to the select can be loaded unconditionally.
1333+
/// to the select can be loaded unconditionally. If found an intervening bitcast
1334+
/// with a single use of the load, allow the promotion.
13341335
static bool isSafeSelectToSpeculate(SelectInst &SI) {
13351336
Value *TValue = SI.getTrueValue();
13361337
Value *FValue = SI.getFalseValue();
13371338
const DataLayout &DL = SI.getModule()->getDataLayout();
13381339

13391340
for (User *U : SI.users()) {
1340-
LoadInst *LI = dyn_cast<LoadInst>(U);
1341+
LoadInst *LI;
1342+
BitCastInst *BC = dyn_cast<BitCastInst>(U);
1343+
if (BC && BC->hasOneUse())
1344+
LI = dyn_cast<LoadInst>(*BC->user_begin());
1345+
else
1346+
LI = dyn_cast<LoadInst>(U);
1347+
13411348
if (!LI || !LI->isSimple())
13421349
return false;
13431350

@@ -1363,13 +1370,27 @@ static void speculateSelectInstLoads(SelectInst &SI) {
13631370
Value *FV = SI.getFalseValue();
13641371
// Replace the loads of the select with a select of two loads.
13651372
while (!SI.use_empty()) {
1366-
LoadInst *LI = cast<LoadInst>(SI.user_back());
1373+
LoadInst *LI;
1374+
BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
1375+
if (BC) {
1376+
assert(BC->hasOneUse() && "Bitcast should have a single use.");
1377+
LI = cast<LoadInst>(BC->user_back());
1378+
} else {
1379+
LI = cast<LoadInst>(SI.user_back());
1380+
}
1381+
13671382
assert(LI->isSimple() && "We only speculate simple loads");
13681383

13691384
IRB.SetInsertPoint(LI);
1370-
LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
1385+
Value *NewTV =
1386+
BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
1387+
: TV;
1388+
Value *NewFV =
1389+
BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
1390+
: FV;
1391+
LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
13711392
LI->getName() + ".sroa.speculate.load.true");
1372-
LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
1393+
LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
13731394
LI->getName() + ".sroa.speculate.load.false");
13741395
NumLoadsSpeculated += 2;
13751396

@@ -1390,6 +1411,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
13901411
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
13911412
LI->replaceAllUsesWith(V);
13921413
LI->eraseFromParent();
1414+
if (BC)
1415+
BC->eraseFromParent();
13931416
}
13941417
SI.eraseFromParent();
13951418
}

llvm/test/Transforms/SROA/phi-and-select.ll

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,23 +60,14 @@ entry:
6060
ret i32 %result
6161
}
6262

63-
; If bitcast isn't considered a safe phi/select use, the alloca
64-
; remains as an array.
65-
; FIXME: Why isn't this identical to test2?
6663
define float @test2_bitcast() {
6764
; CHECK-LABEL: @test2_bitcast(
6865
; CHECK-NEXT: entry:
69-
; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4
70-
; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca i32, align 4
71-
; CHECK-NEXT: store i32 0, i32* [[A_SROA_0]], align 4
72-
; CHECK-NEXT: store i32 1, i32* [[A_SROA_3]], align 4
73-
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_V0:%.*]] = load i32, i32* [[A_SROA_0]], align 4
74-
; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_4_V1:%.*]] = load i32, i32* [[A_SROA_3]], align 4
75-
; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 [[A_SROA_0_0_A_SROA_0_0_V0]], [[A_SROA_3_0_A_SROA_3_4_V1]]
76-
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], i32* [[A_SROA_3]], i32* [[A_SROA_0]]
77-
; CHECK-NEXT: [[SELECT_BC:%.*]] = bitcast i32* [[SELECT]] to float*
78-
; CHECK-NEXT: [[RESULT:%.*]] = load float, float* [[SELECT_BC]], align 4
79-
; CHECK-NEXT: ret float [[RESULT]]
66+
; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 0, 1
67+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 1 to float
68+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 0 to float
69+
; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], float [[TMP0]], float [[TMP1]]
70+
; CHECK-NEXT: ret float [[RESULT_SROA_SPECULATED]]
8071
;
8172
entry:
8273
%a = alloca [2 x i32]
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -sroa < %s | FileCheck %s
3+
4+
%st.half = type { half }
5+
6+
; Allow speculateSelectInstLoads to fold load and select
7+
; even if there is an intervening bitcast.
8+
define <2 x i16> @test_load_bitcast_select(i1 %cond1, i1 %cond2) {
9+
; CHECK-LABEL: @test_load_bitcast_select(
10+
; CHECK-NEXT: entry:
11+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast half 0xHFFFF to i16
12+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast half 0xH0000 to i16
13+
; CHECK-NEXT: [[LD1_SROA_SPECULATED:%.*]] = select i1 [[COND1:%.*]], i16 [[TMP0]], i16 [[TMP1]]
14+
; CHECK-NEXT: [[V1:%.*]] = insertelement <2 x i16> undef, i16 [[LD1_SROA_SPECULATED]], i32 0
15+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast half 0xHFFFF to i16
16+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast half 0xH0000 to i16
17+
; CHECK-NEXT: [[LD2_SROA_SPECULATED:%.*]] = select i1 [[COND2:%.*]], i16 [[TMP2]], i16 [[TMP3]]
18+
; CHECK-NEXT: [[V2:%.*]] = insertelement <2 x i16> [[V1]], i16 [[LD2_SROA_SPECULATED]], i32 1
19+
; CHECK-NEXT: ret <2 x i16> [[V2]]
20+
;
21+
entry:
22+
%true = alloca half, align 2
23+
%false = alloca half, align 2
24+
store half 0xHFFFF, half* %true, align 2
25+
store half 0xH0000, half* %false, align 2
26+
%false.cast = bitcast half* %false to %st.half*
27+
%true.cast = bitcast half* %true to %st.half*
28+
%sel1 = select i1 %cond1, %st.half* %true.cast, %st.half* %false.cast
29+
%cast1 = bitcast %st.half* %sel1 to i16*
30+
%ld1 = load i16, i16* %cast1, align 2
31+
%v1 = insertelement <2 x i16> undef, i16 %ld1, i32 0
32+
%sel2 = select i1 %cond2, %st.half* %true.cast, %st.half* %false.cast
33+
%cast2 = bitcast %st.half* %sel2 to i16*
34+
%ld2 = load i16, i16* %cast2, align 2
35+
%v2 = insertelement <2 x i16> %v1, i16 %ld2, i32 1
36+
ret <2 x i16> %v2
37+
}
38+
39+
%st.args = type { i32, i32* }
40+
41+
; A bitcasted load and a direct load of select.
42+
define void @test_multiple_loads_select(i1 %cmp){
43+
; CHECK-LABEL: @test_multiple_loads_select(
44+
; CHECK-NEXT: entry:
45+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* undef to i8*
46+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* undef to i8*
47+
; CHECK-NEXT: [[ADDR_I8_SROA_SPECULATED:%.*]] = select i1 [[CMP:%.*]], i8* [[TMP0]], i8* [[TMP1]]
48+
; CHECK-NEXT: call void @foo_i8(i8* [[ADDR_I8_SROA_SPECULATED]])
49+
; CHECK-NEXT: [[ADDR_I32_SROA_SPECULATED:%.*]] = select i1 [[CMP]], i32* undef, i32* undef
50+
; CHECK-NEXT: call void @foo_i32(i32* [[ADDR_I32_SROA_SPECULATED]])
51+
; CHECK-NEXT: ret void
52+
;
53+
entry:
54+
%args = alloca [2 x %st.args], align 16
55+
%arr0 = getelementptr inbounds [2 x %st.args], [2 x %st.args]* %args, i64 0, i64 0
56+
%arr1 = getelementptr inbounds [2 x %st.args], [2 x %st.args]* %args, i64 0, i64 1
57+
%sel = select i1 %cmp, %st.args* %arr1, %st.args* %arr0
58+
%addr = getelementptr inbounds %st.args, %st.args* %sel, i64 0, i32 1
59+
%bcast.i8 = bitcast i32** %addr to i8**
60+
%addr.i8 = load i8*, i8** %bcast.i8, align 8
61+
call void @foo_i8(i8* %addr.i8)
62+
%addr.i32 = load i32*, i32** %addr, align 8
63+
call void @foo_i32 (i32* %addr.i32)
64+
ret void
65+
}
66+
67+
declare void @foo_i8(i8*)
68+
declare void @foo_i32(i32*)

0 commit comments

Comments
 (0)