Skip to content

Commit 8a12cae

Browse files
committed
[GVN] Support load of pointer-select to value-select conversion.
This patch extends the available-value logic to detect loads of pointer-selects that can be replaced by a value select. For example, consider the code below: loop: %sel.phi = phi i32* [ %start, %ph ], [ %sel, %ph ] %l = load %ptr %l.sel = load %sel.phi %sel = select cond, %ptr, %sel.phi ... exit: %res = load %sel use(%res) The load of the pointer phi can be replaced by a load of the start value outside the loop and a new phi/select chain based on the loaded values, as illustrated below %l.start = load %start loop: sel.phi.prom = phi i32 [ %l.start, %ph ], [ %sel.prom, %ph ] %l = load %ptr %sel.prom = select cond, %l, %sel.phi.prom ... exit: use(%sel.prom) This is a first step towards alllowing vectorizing loops using common libc++ library functions, like std::min_element (https://clang.godbolt.org/z/6czGzzqbs) #include <vector> #include <algorithm> int foo(const std::vector<int> &V) { return *std::min_element(V.begin(), V.end()); } Reviewed By: reames Differential Revision: https://reviews.llvm.org/D118143
1 parent dc82547 commit 8a12cae

File tree

3 files changed

+188
-40
lines changed

3 files changed

+188
-40
lines changed

llvm/lib/Transforms/Scalar/GVN.cpp

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,14 @@ struct llvm::gvn::AvailableValue {
182182
SimpleVal, // A simple offsetted value that is accessed.
183183
LoadVal, // A value produced by a load.
184184
MemIntrin, // A memory intrinsic which is loaded from.
185-
UndefVal // A UndefValue representing a value from dead block (which
185+
UndefVal, // A UndefValue representing a value from dead block (which
186186
// is not yet physically removed from the CFG).
187+
SelectVal, // A pointer select which is loaded from and for which the load
188+
// can be replace by a value select.
187189
};
188190

189191
/// V - The value that is live out of the block.
190-
PointerIntPair<Value *, 2, ValType> Val;
192+
PointerIntPair<Value *, 3, ValType> Val;
191193

192194
/// Offset - The byte offset in Val that is interesting for the load query.
193195
unsigned Offset = 0;
@@ -224,10 +226,19 @@ struct llvm::gvn::AvailableValue {
224226
return Res;
225227
}
226228

229+
static AvailableValue getSelect(SelectInst *Sel) {
230+
AvailableValue Res;
231+
Res.Val.setPointer(Sel);
232+
Res.Val.setInt(SelectVal);
233+
Res.Offset = 0;
234+
return Res;
235+
}
236+
227237
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
228238
bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
229239
bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
230240
bool isUndefValue() const { return Val.getInt() == UndefVal; }
241+
bool isSelectValue() const { return Val.getInt() == SelectVal; }
231242

232243
Value *getSimpleValue() const {
233244
assert(isSimpleValue() && "Wrong accessor");
@@ -244,6 +255,11 @@ struct llvm::gvn::AvailableValue {
244255
return cast<MemIntrinsic>(Val.getPointer());
245256
}
246257

258+
SelectInst *getSelectValue() const {
259+
assert(isSelectValue() && "Wrong accessor");
260+
return cast<SelectInst>(Val.getPointer());
261+
}
262+
247263
/// Emit code at the specified insertion point to adjust the value defined
248264
/// here to the specified type. This handles various coercion cases.
249265
Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
@@ -275,6 +291,10 @@ struct llvm::gvn::AvailableValueInBlock {
275291
return get(BB, AvailableValue::getUndef());
276292
}
277293

294+
static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel) {
295+
return get(BB, AvailableValue::getSelect(Sel));
296+
}
297+
278298
/// Emit code at the end of this block to adjust the value defined here to
279299
/// the specified type. This handles various coercion cases.
280300
Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
@@ -897,6 +917,16 @@ ConstructSSAForLoadSet(LoadInst *Load,
897917
return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
898918
}
899919

920+
static LoadInst *findDominatingLoad(Value *Ptr, SelectInst *Sel,
921+
DominatorTree &DT) {
922+
for (Value *U : Ptr->users()) {
923+
auto *LI = dyn_cast<LoadInst>(U);
924+
if (LI && LI->getParent() == Sel->getParent() && DT.dominates(LI, Sel))
925+
return LI;
926+
}
927+
return nullptr;
928+
}
929+
900930
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
901931
Instruction *InsertPt,
902932
GVNPass &gvn) const {
@@ -937,6 +967,17 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
937967
<< " " << *getMemIntrinValue() << '\n'
938968
<< *Res << '\n'
939969
<< "\n\n\n");
970+
} else if (isSelectValue()) {
971+
// Introduce a new value select for a load from an eligible pointer select.
972+
SelectInst *Sel = getSelectValue();
973+
LoadInst *L1 =
974+
findDominatingLoad(Sel->getOperand(1), Sel, gvn.getDominatorTree());
975+
LoadInst *L2 =
976+
findDominatingLoad(Sel->getOperand(2), Sel, gvn.getDominatorTree());
977+
assert(L1 && L2 &&
978+
"must be able to obtain dominating loads for both value operands of "
979+
"the select");
980+
Res = SelectInst::Create(Sel->getCondition(), L1, L2, "", Sel);
940981
} else {
941982
llvm_unreachable("Should not materialize value from dead block");
942983
}
@@ -1023,8 +1064,53 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
10231064
ORE->emit(R);
10241065
}
10251066

1067+
/// Check if a load from pointer-select \p Address in \p DepBB can be converted
1068+
/// to a value select. The following conditions need to be satisfied:
1069+
/// 1. The pointer select (\p Address) must be defined in \p DepBB.
1070+
/// 2. Both value operands of the pointer select must be loaded in the same
1071+
/// basic block, before the pointer select.
1072+
/// 3. There must be no instructions between the found loads and \p End that may
1073+
/// clobber the loads.
1074+
static Optional<AvailableValue>
1075+
tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End,
1076+
Value *Address, DominatorTree &DT, AAResults *AA) {
1077+
1078+
auto *Sel = dyn_cast_or_null<SelectInst>(Address);
1079+
if (!Sel || DepBB != Sel->getParent())
1080+
return None;
1081+
1082+
LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), Sel, DT);
1083+
LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), Sel, DT);
1084+
if (!L1 || !L2)
1085+
return None;
1086+
1087+
// Ensure there are no accesses that may modify the locations referenced by
1088+
// either L1 or L2 between L1, L2 and the specified End iterator.
1089+
Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2;
1090+
MemoryLocation L1Loc = MemoryLocation::get(L1);
1091+
MemoryLocation L2Loc = MemoryLocation::get(L2);
1092+
if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) {
1093+
return isModSet(AA->getModRefInfo(&I, L1Loc)) ||
1094+
isModSet(AA->getModRefInfo(&I, L2Loc));
1095+
}))
1096+
return None;
1097+
1098+
return AvailableValue::getSelect(Sel);
1099+
}
1100+
10261101
bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
10271102
Value *Address, AvailableValue &Res) {
1103+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1104+
assert(isa<SelectInst>(Address));
1105+
if (auto R = tryToConvertLoadOfPtrSelect(
1106+
Load->getParent(), Load->getIterator(), Address, getDominatorTree(),
1107+
getAliasAnalysis())) {
1108+
Res = *R;
1109+
return true;
1110+
}
1111+
return false;
1112+
}
1113+
10281114
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
10291115
"expected a local dependence");
10301116
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1092,6 +1178,7 @@ bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
10921178
}
10931179
}
10941180
}
1181+
10951182
// Nothing known about this clobber, have to be conservative
10961183
LLVM_DEBUG(
10971184
// fast print dep, using operator<< on instruction is too slow.
@@ -1176,16 +1263,23 @@ void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
11761263
continue;
11771264
}
11781265

1179-
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1180-
UnavailableBlocks.push_back(DepBB);
1181-
continue;
1182-
}
1183-
11841266
// The address being loaded in this non-local block may not be the same as
11851267
// the pointer operand of the load if PHI translation occurs. Make sure
11861268
// to consider the right address.
11871269
Value *Address = Deps[i].getAddress();
11881270

1271+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1272+
if (auto R = tryToConvertLoadOfPtrSelect(DepBB, DepBB->end(), Address,
1273+
getDominatorTree(),
1274+
getAliasAnalysis())) {
1275+
ValuesPerBlock.push_back(
1276+
AvailableValueInBlock::get(DepBB, std::move(*R)));
1277+
continue;
1278+
}
1279+
UnavailableBlocks.push_back(DepBB);
1280+
continue;
1281+
}
1282+
11891283
AvailableValue AV;
11901284
if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) {
11911285
// subtlety: because we know this was a non-local dependency, we know
@@ -1923,8 +2017,9 @@ bool GVNPass::processLoad(LoadInst *L) {
19232017
if (Dep.isNonLocal())
19242018
return processNonLocalLoad(L);
19252019

2020+
Value *Address = L->getPointerOperand();
19262021
// Only handle the local case below
1927-
if (!Dep.isDef() && !Dep.isClobber()) {
2022+
if (!Dep.isDef() && !Dep.isClobber() && !isa<SelectInst>(Address)) {
19282023
// This might be a NonFuncLocal or an Unknown
19292024
LLVM_DEBUG(
19302025
// fast print dep, using operator<< on instruction is too slow.
@@ -1934,7 +2029,7 @@ bool GVNPass::processLoad(LoadInst *L) {
19342029
}
19352030

19362031
AvailableValue AV;
1937-
if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
2032+
if (AnalyzeLoadAvailability(L, Dep, Address, AV)) {
19382033
Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
19392034

19402035
// Replace the load!

llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ define i32 @test_pointer_phi_select_simp_1(i32* %a, i32* %b, i1 %cond) {
99
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
1010
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
1111
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
12+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
1213
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
1314
; CHECK-NEXT: br label [[EXIT:%.*]]
1415
; CHECK: else:
16+
; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, i32* [[A]], align 4
1517
; CHECK-NEXT: br label [[EXIT]]
1618
; CHECK: exit:
19+
; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ]
1720
; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ]
18-
; CHECK-NEXT: [[RES_2:%.*]] = load i32, i32* [[P]], align 4
1921
; CHECK-NEXT: ret i32 [[RES_2]]
2022
;
2123
entry:
@@ -118,13 +120,15 @@ define i32 @test_pointer_phi_select_simp_store_noclobber(i32* %a, i32* %b, i32*
118120
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
119121
; CHECK-NEXT: store i32 99, i32* [[C:%.*]], align 4
120122
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
123+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
121124
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
122125
; CHECK-NEXT: br label [[EXIT:%.*]]
123126
; CHECK: else:
127+
; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, i32* [[A]], align 4
124128
; CHECK-NEXT: br label [[EXIT]]
125129
; CHECK: exit:
130+
; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ]
126131
; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ]
127-
; CHECK-NEXT: [[RES_2:%.*]] = load i32, i32* [[P]], align 4
128132
; CHECK-NEXT: ret i32 [[RES_2]]
129133
;
130134
entry:
@@ -648,9 +652,9 @@ define i32 @test_pointer_phi_select_single_block_store(i32* %a, i32* %b) {
648652
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
649653
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
650654
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
655+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
651656
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
652-
; CHECK-NEXT: [[RES_0:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
653-
; CHECK-NEXT: ret i32 [[RES_0]]
657+
; CHECK-NEXT: ret i32 [[TMP0]]
654658
;
655659
entry:
656660
%l.1 = load i32, i32* %a, align 4
@@ -723,3 +727,24 @@ entry:
723727
%res.0 = load i32, i32* %min.select, align 4
724728
ret i32 %res.0
725729
}
730+
731+
define i32 @test_pointer_phi_select_single_block_store_after(i32* %a, i32* %b, i32* %c) {
732+
; CHECK-LABEL: @test_pointer_phi_select_single_block_store_after(
733+
; CHECK-NEXT: entry:
734+
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
735+
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
736+
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
737+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
738+
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
739+
; CHECK-NEXT: store i32 99, i32* [[C:%.*]], align 4
740+
; CHECK-NEXT: ret i32 [[TMP0]]
741+
;
742+
entry:
743+
%l.1 = load i32, i32* %a, align 4
744+
%l.2 = load i32, i32* %b, align 4
745+
%cmp.i.i.i = icmp ult i32 %l.1, %l.2
746+
%min.select = select i1 %cmp.i.i.i, i32* %a, i32* %b
747+
%res.0 = load i32, i32* %min.select, align 4
748+
store i32 99, i32* %c
749+
ret i32 %res.0
750+
}

0 commit comments

Comments
 (0)