Skip to content

Commit 23ef75b

Browse files
committed
[GVN] Support load of pointer-select to value-select conversion.
This patch extends the available-value logic to detect loads of pointer-selects that can be replaced by a value select. For example, consider the code below: loop: %sel.phi = phi i32* [ %start, %ph ], [ %sel, %ph ] %l = load %ptr %l.sel = load %sel.phi %sel = select cond, %ptr, %sel.phi ... exit: %res = load %sel use(%res) The load of the pointer phi can be replaced by a load of the start value outside the loop and a new phi/select chain based on the loaded values, as illustrated below %l.start = load %start loop: sel.phi.prom = phi i32 [ %l.start, %ph ], [ %sel.prom, %ph ] %l = load %ptr %sel.prom = select cond, %l, %sel.phi.prom ... exit: use(%sel.prom) This is a first step towards alllowing vectorizing loops using common libc++ library functions, like std::min_element (https://clang.godbolt.org/z/6czGzzqbs) #include <vector> #include <algorithm> int foo(const std::vector<int> &V) { return *std::min_element(V.begin(), V.end()); } Reviewed By: reames Differential Revision: https://reviews.llvm.org/D118143 (cherry-picked from 8a12cae)
1 parent e360e77 commit 23ef75b

File tree

3 files changed

+188
-40
lines changed

3 files changed

+188
-40
lines changed

llvm/lib/Transforms/Scalar/GVN.cpp

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,12 +181,14 @@ struct llvm::gvn::AvailableValue {
181181
SimpleVal, // A simple offsetted value that is accessed.
182182
LoadVal, // A value produced by a load.
183183
MemIntrin, // A memory intrinsic which is loaded from.
184-
UndefVal // A UndefValue representing a value from dead block (which
184+
UndefVal, // A UndefValue representing a value from dead block (which
185185
// is not yet physically removed from the CFG).
186+
SelectVal, // A pointer select which is loaded from and for which the load
187+
// can be replace by a value select.
186188
};
187189

188190
/// V - The value that is live out of the block.
189-
PointerIntPair<Value *, 2, ValType> Val;
191+
PointerIntPair<Value *, 3, ValType> Val;
190192

191193
/// Offset - The byte offset in Val that is interesting for the load query.
192194
unsigned Offset = 0;
@@ -223,10 +225,19 @@ struct llvm::gvn::AvailableValue {
223225
return Res;
224226
}
225227

228+
static AvailableValue getSelect(SelectInst *Sel) {
229+
AvailableValue Res;
230+
Res.Val.setPointer(Sel);
231+
Res.Val.setInt(SelectVal);
232+
Res.Offset = 0;
233+
return Res;
234+
}
235+
226236
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
227237
bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
228238
bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
229239
bool isUndefValue() const { return Val.getInt() == UndefVal; }
240+
bool isSelectValue() const { return Val.getInt() == SelectVal; }
230241

231242
Value *getSimpleValue() const {
232243
assert(isSimpleValue() && "Wrong accessor");
@@ -243,6 +254,11 @@ struct llvm::gvn::AvailableValue {
243254
return cast<MemIntrinsic>(Val.getPointer());
244255
}
245256

257+
SelectInst *getSelectValue() const {
258+
assert(isSelectValue() && "Wrong accessor");
259+
return cast<SelectInst>(Val.getPointer());
260+
}
261+
246262
/// Emit code at the specified insertion point to adjust the value defined
247263
/// here to the specified type. This handles various coercion cases.
248264
Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
@@ -274,6 +290,10 @@ struct llvm::gvn::AvailableValueInBlock {
274290
return get(BB, AvailableValue::getUndef());
275291
}
276292

293+
static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel) {
294+
return get(BB, AvailableValue::getSelect(Sel));
295+
}
296+
277297
/// Emit code at the end of this block to adjust the value defined here to
278298
/// the specified type. This handles various coercion cases.
279299
Value *MaterializeAdjustedValue(LoadInst *Load, GVN &gvn) const {
@@ -876,6 +896,16 @@ ConstructSSAForLoadSet(LoadInst *Load,
876896
return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
877897
}
878898

899+
static LoadInst *findDominatingLoad(Value *Ptr, SelectInst *Sel,
900+
DominatorTree &DT) {
901+
for (Value *U : Ptr->users()) {
902+
auto *LI = dyn_cast<LoadInst>(U);
903+
if (LI && LI->getParent() == Sel->getParent() && DT.dominates(LI, Sel))
904+
return LI;
905+
}
906+
return nullptr;
907+
}
908+
879909
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
880910
Instruction *InsertPt,
881911
GVN &gvn) const {
@@ -916,6 +946,17 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
916946
<< " " << *getMemIntrinValue() << '\n'
917947
<< *Res << '\n'
918948
<< "\n\n\n");
949+
} else if (isSelectValue()) {
950+
// Introduce a new value select for a load from an eligible pointer select.
951+
SelectInst *Sel = getSelectValue();
952+
LoadInst *L1 =
953+
findDominatingLoad(Sel->getOperand(1), Sel, gvn.getDominatorTree());
954+
LoadInst *L2 =
955+
findDominatingLoad(Sel->getOperand(2), Sel, gvn.getDominatorTree());
956+
assert(L1 && L2 &&
957+
"must be able to obtain dominating loads for both value operands of "
958+
"the select");
959+
Res = SelectInst::Create(Sel->getCondition(), L1, L2, "", Sel);
919960
} else {
920961
llvm_unreachable("Should not materialize value from dead block");
921962
}
@@ -1002,8 +1043,53 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
10021043
ORE->emit(R);
10031044
}
10041045

1046+
/// Check if a load from pointer-select \p Address in \p DepBB can be converted
1047+
/// to a value select. The following conditions need to be satisfied:
1048+
/// 1. The pointer select (\p Address) must be defined in \p DepBB.
1049+
/// 2. Both value operands of the pointer select must be loaded in the same
1050+
/// basic block, before the pointer select.
1051+
/// 3. There must be no instructions between the found loads and \p End that may
1052+
/// clobber the loads.
1053+
static Optional<AvailableValue>
1054+
tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End,
1055+
Value *Address, DominatorTree &DT, AAResults *AA) {
1056+
1057+
auto *Sel = dyn_cast_or_null<SelectInst>(Address);
1058+
if (!Sel || DepBB != Sel->getParent())
1059+
return None;
1060+
1061+
LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), Sel, DT);
1062+
LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), Sel, DT);
1063+
if (!L1 || !L2)
1064+
return None;
1065+
1066+
// Ensure there are no accesses that may modify the locations referenced by
1067+
// either L1 or L2 between L1, L2 and the specified End iterator.
1068+
Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2;
1069+
MemoryLocation L1Loc = MemoryLocation::get(L1);
1070+
MemoryLocation L2Loc = MemoryLocation::get(L2);
1071+
if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) {
1072+
return isModSet(AA->getModRefInfo(&I, L1Loc)) ||
1073+
isModSet(AA->getModRefInfo(&I, L2Loc));
1074+
}))
1075+
return None;
1076+
1077+
return AvailableValue::getSelect(Sel);
1078+
}
1079+
10051080
bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
10061081
Value *Address, AvailableValue &Res) {
1082+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1083+
assert(isa<SelectInst>(Address));
1084+
if (auto R = tryToConvertLoadOfPtrSelect(
1085+
Load->getParent(), Load->getIterator(), Address, getDominatorTree(),
1086+
getAliasAnalysis())) {
1087+
Res = *R;
1088+
return true;
1089+
}
1090+
return false;
1091+
}
1092+
10071093
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
10081094
"expected a local dependence");
10091095
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1071,6 +1157,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
10711157
}
10721158
}
10731159
}
1160+
10741161
// Nothing known about this clobber, have to be conservative
10751162
LLVM_DEBUG(
10761163
// fast print dep, using operator<< on instruction is too slow.
@@ -1156,16 +1243,23 @@ void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
11561243
continue;
11571244
}
11581245

1159-
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1160-
UnavailableBlocks.push_back(DepBB);
1161-
continue;
1162-
}
1163-
11641246
// The address being loaded in this non-local block may not be the same as
11651247
// the pointer operand of the load if PHI translation occurs. Make sure
11661248
// to consider the right address.
11671249
Value *Address = Deps[i].getAddress();
11681250

1251+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1252+
if (auto R = tryToConvertLoadOfPtrSelect(DepBB, DepBB->end(), Address,
1253+
getDominatorTree(),
1254+
getAliasAnalysis())) {
1255+
ValuesPerBlock.push_back(
1256+
AvailableValueInBlock::get(DepBB, std::move(*R)));
1257+
continue;
1258+
}
1259+
UnavailableBlocks.push_back(DepBB);
1260+
continue;
1261+
}
1262+
11691263
AvailableValue AV;
11701264
if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) {
11711265
// subtlety: because we know this was a non-local dependency, we know
@@ -1902,8 +1996,9 @@ bool GVN::processLoad(LoadInst *L) {
19021996
if (Dep.isNonLocal())
19031997
return processNonLocalLoad(L);
19041998

1999+
Value *Address = L->getPointerOperand();
19052000
// Only handle the local case below
1906-
if (!Dep.isDef() && !Dep.isClobber()) {
2001+
if (!Dep.isDef() && !Dep.isClobber() && !isa<SelectInst>(Address)) {
19072002
// This might be a NonFuncLocal or an Unknown
19082003
LLVM_DEBUG(
19092004
// fast print dep, using operator<< on instruction is too slow.
@@ -1913,7 +2008,7 @@ bool GVN::processLoad(LoadInst *L) {
19132008
}
19142009

19152010
AvailableValue AV;
1916-
if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
2011+
if (AnalyzeLoadAvailability(L, Dep, Address, AV)) {
19172012
Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
19182013

19192014
// Replace the load!

llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ define i32 @test_pointer_phi_select_simp_1(i32* %a, i32* %b, i1 %cond) {
99
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
1010
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
1111
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
12+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
1213
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
1314
; CHECK-NEXT: br label [[EXIT:%.*]]
1415
; CHECK: else:
16+
; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, i32* [[A]], align 4
1517
; CHECK-NEXT: br label [[EXIT]]
1618
; CHECK: exit:
19+
; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ]
1720
; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ]
18-
; CHECK-NEXT: [[RES_2:%.*]] = load i32, i32* [[P]], align 4
1921
; CHECK-NEXT: ret i32 [[RES_2]]
2022
;
2123
entry:
@@ -118,13 +120,15 @@ define i32 @test_pointer_phi_select_simp_store_noclobber(i32* %a, i32* %b, i32*
118120
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
119121
; CHECK-NEXT: store i32 99, i32* [[C:%.*]], align 4
120122
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
123+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
121124
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
122125
; CHECK-NEXT: br label [[EXIT:%.*]]
123126
; CHECK: else:
127+
; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, i32* [[A]], align 4
124128
; CHECK-NEXT: br label [[EXIT]]
125129
; CHECK: exit:
130+
; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ]
126131
; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ]
127-
; CHECK-NEXT: [[RES_2:%.*]] = load i32, i32* [[P]], align 4
128132
; CHECK-NEXT: ret i32 [[RES_2]]
129133
;
130134
entry:
@@ -648,9 +652,9 @@ define i32 @test_pointer_phi_select_single_block_store(i32* %a, i32* %b) {
648652
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
649653
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
650654
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
655+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
651656
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
652-
; CHECK-NEXT: [[RES_0:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
653-
; CHECK-NEXT: ret i32 [[RES_0]]
657+
; CHECK-NEXT: ret i32 [[TMP0]]
654658
;
655659
entry:
656660
%l.1 = load i32, i32* %a, align 4
@@ -723,3 +727,24 @@ entry:
723727
%res.0 = load i32, i32* %min.select, align 4
724728
ret i32 %res.0
725729
}
730+
731+
define i32 @test_pointer_phi_select_single_block_store_after(i32* %a, i32* %b, i32* %c) {
732+
; CHECK-LABEL: @test_pointer_phi_select_single_block_store_after(
733+
; CHECK-NEXT: entry:
734+
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
735+
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
736+
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
737+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
738+
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
739+
; CHECK-NEXT: store i32 99, i32* [[C:%.*]], align 4
740+
; CHECK-NEXT: ret i32 [[TMP0]]
741+
;
742+
entry:
743+
%l.1 = load i32, i32* %a, align 4
744+
%l.2 = load i32, i32* %b, align 4
745+
%cmp.i.i.i = icmp ult i32 %l.1, %l.2
746+
%min.select = select i1 %cmp.i.i.i, i32* %a, i32* %b
747+
%res.0 = load i32, i32* %min.select, align 4
748+
store i32 99, i32* %c
749+
ret i32 %res.0
750+
}

0 commit comments

Comments
 (0)