Skip to content

Commit 72f7b02

Browse files
committed
[GVN] Support load of pointer-select to value-select conversion.
This patch extends the available-value logic to detect loads of pointer-selects that can be replaced by a value select. For example, consider the code below: loop: %sel.phi = phi i32* [ %start, %ph ], [ %sel, %ph ] %l = load %ptr %l.sel = load %sel.phi %sel = select cond, %ptr, %sel.phi ... exit: %res = load %sel use(%res) The load of the pointer phi can be replaced by a load of the start value outside the loop and a new phi/select chain based on the loaded values, as illustrated below %l.start = load %start loop: sel.phi.prom = phi i32 [ %l.start, %ph ], [ %sel.prom, %ph ] %l = load %ptr %sel.prom = select cond, %l, %sel.phi.prom ... exit: use(%sel.prom) This is a first step towards alllowing vectorizing loops using common libc++ library functions, like std::min_element (https://clang.godbolt.org/z/6czGzzqbs) #include <vector> #include <algorithm> int foo(const std::vector<int> &V) { return *std::min_element(V.begin(), V.end()); } Reviewed By: reames Differential Revision: https://reviews.llvm.org/D118143 (cherry-picked from 8a12cae)
1 parent 1b11d48 commit 72f7b02

File tree

3 files changed

+188
-40
lines changed

3 files changed

+188
-40
lines changed

llvm/lib/Transforms/Scalar/GVN.cpp

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -181,12 +181,14 @@ struct llvm::gvn::AvailableValue {
181181
SimpleVal, // A simple offsetted value that is accessed.
182182
LoadVal, // A value produced by a load.
183183
MemIntrin, // A memory intrinsic which is loaded from.
184-
UndefVal // A UndefValue representing a value from dead block (which
184+
UndefVal, // A UndefValue representing a value from dead block (which
185185
// is not yet physically removed from the CFG).
186+
SelectVal, // A pointer select which is loaded from and for which the load
187+
// can be replace by a value select.
186188
};
187189

188190
/// V - The value that is live out of the block.
189-
PointerIntPair<Value *, 2, ValType> Val;
191+
PointerIntPair<Value *, 3, ValType> Val;
190192

191193
/// Offset - The byte offset in Val that is interesting for the load query.
192194
unsigned Offset = 0;
@@ -223,10 +225,19 @@ struct llvm::gvn::AvailableValue {
223225
return Res;
224226
}
225227

228+
static AvailableValue getSelect(SelectInst *Sel) {
229+
AvailableValue Res;
230+
Res.Val.setPointer(Sel);
231+
Res.Val.setInt(SelectVal);
232+
Res.Offset = 0;
233+
return Res;
234+
}
235+
226236
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
227237
bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
228238
bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
229239
bool isUndefValue() const { return Val.getInt() == UndefVal; }
240+
bool isSelectValue() const { return Val.getInt() == SelectVal; }
230241

231242
Value *getSimpleValue() const {
232243
assert(isSimpleValue() && "Wrong accessor");
@@ -243,6 +254,11 @@ struct llvm::gvn::AvailableValue {
243254
return cast<MemIntrinsic>(Val.getPointer());
244255
}
245256

257+
SelectInst *getSelectValue() const {
258+
assert(isSelectValue() && "Wrong accessor");
259+
return cast<SelectInst>(Val.getPointer());
260+
}
261+
246262
/// Emit code at the specified insertion point to adjust the value defined
247263
/// here to the specified type. This handles various coercion cases.
248264
Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
@@ -274,6 +290,10 @@ struct llvm::gvn::AvailableValueInBlock {
274290
return get(BB, AvailableValue::getUndef());
275291
}
276292

293+
static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel) {
294+
return get(BB, AvailableValue::getSelect(Sel));
295+
}
296+
277297
/// Emit code at the end of this block to adjust the value defined here to
278298
/// the specified type. This handles various coercion cases.
279299
Value *MaterializeAdjustedValue(LoadInst *Load, GVN &gvn) const {
@@ -893,6 +913,16 @@ ConstructSSAForLoadSet(LoadInst *Load,
893913
return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent());
894914
}
895915

916+
static LoadInst *findDominatingLoad(Value *Ptr, SelectInst *Sel,
917+
DominatorTree &DT) {
918+
for (Value *U : Ptr->users()) {
919+
auto *LI = dyn_cast<LoadInst>(U);
920+
if (LI && LI->getParent() == Sel->getParent() && DT.dominates(LI, Sel))
921+
return LI;
922+
}
923+
return nullptr;
924+
}
925+
896926
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
897927
Instruction *InsertPt,
898928
GVN &gvn) const {
@@ -933,6 +963,17 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
933963
<< " " << *getMemIntrinValue() << '\n'
934964
<< *Res << '\n'
935965
<< "\n\n\n");
966+
} else if (isSelectValue()) {
967+
// Introduce a new value select for a load from an eligible pointer select.
968+
SelectInst *Sel = getSelectValue();
969+
LoadInst *L1 =
970+
findDominatingLoad(Sel->getOperand(1), Sel, gvn.getDominatorTree());
971+
LoadInst *L2 =
972+
findDominatingLoad(Sel->getOperand(2), Sel, gvn.getDominatorTree());
973+
assert(L1 && L2 &&
974+
"must be able to obtain dominating loads for both value operands of "
975+
"the select");
976+
Res = SelectInst::Create(Sel->getCondition(), L1, L2, "", Sel);
936977
} else {
937978
llvm_unreachable("Should not materialize value from dead block");
938979
}
@@ -1019,8 +1060,53 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
10191060
ORE->emit(R);
10201061
}
10211062

1063+
/// Check if a load from pointer-select \p Address in \p DepBB can be converted
1064+
/// to a value select. The following conditions need to be satisfied:
1065+
/// 1. The pointer select (\p Address) must be defined in \p DepBB.
1066+
/// 2. Both value operands of the pointer select must be loaded in the same
1067+
/// basic block, before the pointer select.
1068+
/// 3. There must be no instructions between the found loads and \p End that may
1069+
/// clobber the loads.
1070+
static Optional<AvailableValue>
1071+
tryToConvertLoadOfPtrSelect(BasicBlock *DepBB, BasicBlock::iterator End,
1072+
Value *Address, DominatorTree &DT, AAResults *AA) {
1073+
1074+
auto *Sel = dyn_cast_or_null<SelectInst>(Address);
1075+
if (!Sel || DepBB != Sel->getParent())
1076+
return None;
1077+
1078+
LoadInst *L1 = findDominatingLoad(Sel->getOperand(1), Sel, DT);
1079+
LoadInst *L2 = findDominatingLoad(Sel->getOperand(2), Sel, DT);
1080+
if (!L1 || !L2)
1081+
return None;
1082+
1083+
// Ensure there are no accesses that may modify the locations referenced by
1084+
// either L1 or L2 between L1, L2 and the specified End iterator.
1085+
Instruction *EarlierLoad = L1->comesBefore(L2) ? L1 : L2;
1086+
MemoryLocation L1Loc = MemoryLocation::get(L1);
1087+
MemoryLocation L2Loc = MemoryLocation::get(L2);
1088+
if (any_of(make_range(EarlierLoad->getIterator(), End), [&](Instruction &I) {
1089+
return isModSet(AA->getModRefInfo(&I, L1Loc)) ||
1090+
isModSet(AA->getModRefInfo(&I, L2Loc));
1091+
}))
1092+
return None;
1093+
1094+
return AvailableValue::getSelect(Sel);
1095+
}
1096+
10221097
bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
10231098
Value *Address, AvailableValue &Res) {
1099+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1100+
assert(isa<SelectInst>(Address));
1101+
if (auto R = tryToConvertLoadOfPtrSelect(
1102+
Load->getParent(), Load->getIterator(), Address, getDominatorTree(),
1103+
getAliasAnalysis())) {
1104+
Res = *R;
1105+
return true;
1106+
}
1107+
return false;
1108+
}
1109+
10241110
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
10251111
"expected a local dependence");
10261112
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1088,6 +1174,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
10881174
}
10891175
}
10901176
}
1177+
10911178
// Nothing known about this clobber, have to be conservative
10921179
LLVM_DEBUG(
10931180
// fast print dep, using operator<< on instruction is too slow.
@@ -1173,16 +1260,23 @@ void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
11731260
continue;
11741261
}
11751262

1176-
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1177-
UnavailableBlocks.push_back(DepBB);
1178-
continue;
1179-
}
1180-
11811263
// The address being loaded in this non-local block may not be the same as
11821264
// the pointer operand of the load if PHI translation occurs. Make sure
11831265
// to consider the right address.
11841266
Value *Address = Deps[i].getAddress();
11851267

1268+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
1269+
if (auto R = tryToConvertLoadOfPtrSelect(DepBB, DepBB->end(), Address,
1270+
getDominatorTree(),
1271+
getAliasAnalysis())) {
1272+
ValuesPerBlock.push_back(
1273+
AvailableValueInBlock::get(DepBB, std::move(*R)));
1274+
continue;
1275+
}
1276+
UnavailableBlocks.push_back(DepBB);
1277+
continue;
1278+
}
1279+
11861280
AvailableValue AV;
11871281
if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) {
11881282
// subtlety: because we know this was a non-local dependency, we know
@@ -1921,8 +2015,9 @@ bool GVN::processLoad(LoadInst *L) {
19212015
if (Dep.isNonLocal())
19222016
return processNonLocalLoad(L);
19232017

2018+
Value *Address = L->getPointerOperand();
19242019
// Only handle the local case below
1925-
if (!Dep.isDef() && !Dep.isClobber()) {
2020+
if (!Dep.isDef() && !Dep.isClobber() && !isa<SelectInst>(Address)) {
19262021
// This might be a NonFuncLocal or an Unknown
19272022
LLVM_DEBUG(
19282023
// fast print dep, using operator<< on instruction is too slow.
@@ -1932,7 +2027,7 @@ bool GVN::processLoad(LoadInst *L) {
19322027
}
19332028

19342029
AvailableValue AV;
1935-
if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) {
2030+
if (AnalyzeLoadAvailability(L, Dep, Address, AV)) {
19362031
Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this);
19372032

19382033
// Replace the load!

llvm/test/Transforms/GVN/PRE/pre-load-through-select.ll

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ define i32 @test_pointer_phi_select_simp_1(i32* %a, i32* %b, i1 %cond) {
99
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
1010
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
1111
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
12+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
1213
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
1314
; CHECK-NEXT: br label [[EXIT:%.*]]
1415
; CHECK: else:
16+
; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, i32* [[A]], align 4
1517
; CHECK-NEXT: br label [[EXIT]]
1618
; CHECK: exit:
19+
; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ]
1720
; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ]
18-
; CHECK-NEXT: [[RES_2:%.*]] = load i32, i32* [[P]], align 4
1921
; CHECK-NEXT: ret i32 [[RES_2]]
2022
;
2123
entry:
@@ -118,13 +120,15 @@ define i32 @test_pointer_phi_select_simp_store_noclobber(i32* %a, i32* %b, i32*
118120
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
119121
; CHECK-NEXT: store i32 99, i32* [[C:%.*]], align 4
120122
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
123+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
121124
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
122125
; CHECK-NEXT: br label [[EXIT:%.*]]
123126
; CHECK: else:
127+
; CHECK-NEXT: [[RES_2_PRE:%.*]] = load i32, i32* [[A]], align 4
124128
; CHECK-NEXT: br label [[EXIT]]
125129
; CHECK: exit:
130+
; CHECK-NEXT: [[RES_2:%.*]] = phi i32 [ [[TMP0]], [[THEN]] ], [ [[RES_2_PRE]], [[ELSE]] ]
126131
; CHECK-NEXT: [[P:%.*]] = phi i32* [ [[MIN_SELECT]], [[THEN]] ], [ [[A]], [[ELSE]] ]
127-
; CHECK-NEXT: [[RES_2:%.*]] = load i32, i32* [[P]], align 4
128132
; CHECK-NEXT: ret i32 [[RES_2]]
129133
;
130134
entry:
@@ -648,9 +652,9 @@ define i32 @test_pointer_phi_select_single_block_store(i32* %a, i32* %b) {
648652
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
649653
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
650654
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
655+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
651656
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
652-
; CHECK-NEXT: [[RES_0:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
653-
; CHECK-NEXT: ret i32 [[RES_0]]
657+
; CHECK-NEXT: ret i32 [[TMP0]]
654658
;
655659
entry:
656660
%l.1 = load i32, i32* %a, align 4
@@ -723,3 +727,24 @@ entry:
723727
%res.0 = load i32, i32* %min.select, align 4
724728
ret i32 %res.0
725729
}
730+
731+
define i32 @test_pointer_phi_select_single_block_store_after(i32* %a, i32* %b, i32* %c) {
732+
; CHECK-LABEL: @test_pointer_phi_select_single_block_store_after(
733+
; CHECK-NEXT: entry:
734+
; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[A:%.*]], align 4
735+
; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[B:%.*]], align 4
736+
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
737+
; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[CMP_I_I_I]], i32 [[L_1]], i32 [[L_2]]
738+
; CHECK-NEXT: [[MIN_SELECT:%.*]] = select i1 [[CMP_I_I_I]], i32* [[A]], i32* [[B]]
739+
; CHECK-NEXT: store i32 99, i32* [[C:%.*]], align 4
740+
; CHECK-NEXT: ret i32 [[TMP0]]
741+
;
742+
entry:
743+
%l.1 = load i32, i32* %a, align 4
744+
%l.2 = load i32, i32* %b, align 4
745+
%cmp.i.i.i = icmp ult i32 %l.1, %l.2
746+
%min.select = select i1 %cmp.i.i.i, i32* %a, i32* %b
747+
%res.0 = load i32, i32* %min.select, align 4
748+
store i32 99, i32* %c
749+
ret i32 %res.0
750+
}

0 commit comments

Comments
 (0)