Skip to content

Commit 7723f5d

Browse files
author
git apple-llvm automerger
committed
Merge commit 'a23df1e871e4' from apple/main into swift/next
2 parents e43ba0d + a23df1e commit 7723f5d

12 files changed

+472
-147
lines changed

llvm/include/llvm/Transforms/IPO/IROutliner.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ class IROutliner {
154154
pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
155155
OutlinableGroup &CurrentGroup);
156156

157+
/// Identify the needed extracted inputs in a section, and add to the overall
158+
/// function if needed.
159+
///
160+
/// \param [in] M - The module to outline from.
161+
/// \param [in,out] Region - The region to be extracted
162+
void findAddInputsOutputs(Module &M, OutlinableRegion &Region);
163+
157164
/// Extract \p Region into its own function.
158165
///
159166
/// \param [in] Region - The region to be extracted into its own function.
@@ -182,8 +189,7 @@ class IROutliner {
182189
/// Custom InstVisitor to classify different instructions for whether it can
183190
/// be analyzed for similarity. This is needed as there may be instruction we
184191
/// can identify as having similarity, but are more complicated to outline.
185-
struct InstructionAllowed
186-
: public InstVisitor<InstructionAllowed, bool> {
192+
struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
187193
InstructionAllowed() {}
188194

189195
// TODO: Determine a scheme to resolve when the label is similar enough.
@@ -203,13 +209,9 @@ class IROutliner {
203209
// DebugInfo should be included in the regions, but should not be
204210
// analyzed for similarity as it has no bearing on the outcome of the
205211
// program.
206-
bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) {
207-
return true;
208-
}
212+
bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
209213
// TODO: Handle GetElementPtrInsts
210-
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
211-
return false;
212-
}
214+
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { return false; }
213215
// TODO: Handle specific intrinsics individually from those that can be
214216
// handled.
215217
bool IntrinsicInst(IntrinsicInst &II) { return false; }
@@ -226,9 +228,7 @@ class IROutliner {
226228
bool visitCallBrInst(CallBrInst &CBI) { return false; }
227229
// TODO: Handle interblock similarity.
228230
bool visitTerminator(Instruction &I) { return false; }
229-
bool visitInstruction(Instruction &I) {
230-
return true;
231-
}
231+
bool visitInstruction(Instruction &I) { return true; }
232232
};
233233

234234
/// A InstVisitor used to exclude certain instructions from being outlined.

llvm/lib/Transforms/IPO/IROutliner.cpp

Lines changed: 221 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ struct OutlinableGroup {
4141
/// Flag for whether we should not consider this group of OutlinableRegions
4242
/// for extraction.
4343
bool IgnoreGroup = false;
44+
45+
/// For the \ref Regions, we look at every Value. If it is a constant,
46+
/// we check whether it is the same in Region.
47+
///
48+
/// \param [in,out] NotSame contains the global value numbers where the
49+
/// constant is not always the same, and must be passed in as an argument.
50+
void findSameConstants(DenseSet<unsigned> &NotSame);
4451
};
4552

4653
/// Move the contents of \p SourceBB to before the last instruction of \p
@@ -144,6 +151,198 @@ void OutlinableRegion::reattachCandidate() {
144151
CandidateSplit = false;
145152
}
146153

154+
/// Find whether \p V matches the Constants previously found for the \p GVN.
155+
///
156+
/// \param V - The value to check for consistency.
157+
/// \param GVN - The global value number assigned to \p V.
158+
/// \param GVNToConstant - The mapping of global value number to Constants.
159+
/// \returns true if the Value matches the Constant mapped to by V and false if
160+
/// it \p V is a Constant but does not match.
161+
/// \returns None if \p V is not a Constant.
162+
static Optional<bool>
163+
constantMatches(Value *V, unsigned GVN,
164+
DenseMap<unsigned, Constant *> &GVNToConstant) {
165+
// See if we have a constants
166+
Constant *CST = dyn_cast<Constant>(V);
167+
if (!CST)
168+
return None;
169+
170+
// Holds a mapping from a global value number to a Constant.
171+
DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
172+
bool Inserted;
173+
174+
// If we have a constant, try to make a new entry in the GVNToConstant.
175+
std::tie(GVNToConstantIt, Inserted) =
176+
GVNToConstant.insert(std::make_pair(GVN, CST));
177+
// If it was found and is not equal, it is not the same. We do not
178+
// handle this case yet, and exit early.
179+
if (Inserted || (GVNToConstantIt->second == CST))
180+
return true;
181+
182+
return false;
183+
}
184+
185+
/// Find whether \p Region matches the global value numbering to Constant mapping
186+
/// found so far.
187+
///
188+
/// \param Region - The OutlinableRegion we are checking for constants
189+
/// \param NotSame - The set of global value numbers that do not have the same
190+
/// constant in each region.
191+
/// \returns true if all Constants are the same in every use of a Constant in \p
192+
/// Region and false if not
193+
static bool
194+
collectRegionsConstants(OutlinableRegion &Region,
195+
DenseMap<unsigned, Constant *> &GVNToConstant,
196+
DenseSet<unsigned> &NotSame) {
197+
IRSimilarityCandidate &C = *Region.Candidate;
198+
for (IRInstructionData &ID : C) {
199+
200+
// Iterate over the operands in an instruction. If the global value number,
201+
// assigned by the IRSimilarityCandidate, has been seen before, we check if
202+
// the the number has been found to be not the same value in each instance.
203+
for (Value *V : ID.OperVals) {
204+
Optional<unsigned> GVNOpt = C.getGVN(V);
205+
assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
206+
unsigned GVN = GVNOpt.getValue();
207+
208+
// If this global value has been found to not be the same, it could have
209+
// just been a register, check that it is not a constant value.
210+
if (NotSame.find(GVN) != NotSame.end()) {
211+
if (isa<Constant>(V))
212+
return false;
213+
continue;
214+
}
215+
216+
// If it has been the same so far, we check the value for if the
217+
// associated Constant value match the previous instances of the same
218+
// global value number. If the global value does not map to a Constant,
219+
// it is considered to not be the same value.
220+
Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
221+
if (ConstantMatches.hasValue()) {
222+
if (ConstantMatches.getValue())
223+
continue;
224+
else
225+
return false;
226+
}
227+
228+
// While this value is a register, it might not have been previously,
229+
// make sure we don't already have a constant mapped to this global value
230+
// number.
231+
if (GVNToConstant.find(GVN) != GVNToConstant.end())
232+
return false;
233+
234+
NotSame.insert(GVN);
235+
}
236+
}
237+
238+
return true;
239+
}
240+
241+
void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
242+
DenseMap<unsigned, Constant *> GVNToConstant;
243+
244+
for (OutlinableRegion *Region : Regions)
245+
if (!collectRegionsConstants(*Region, GVNToConstant, NotSame)) {
246+
IgnoreGroup = true;
247+
return;
248+
}
249+
}
250+
251+
/// Find the GVN for the inputs that have been found by the CodeExtractor,
252+
/// excluding the ones that will be removed by llvm.assumes as these will be
253+
/// removed by the CodeExtractor.
254+
///
255+
/// \param [in] C - The IRSimilarityCandidate containing the region we are
256+
/// analyzing.
257+
/// \param [in] CurrentInputs - The set of inputs found by the
258+
/// CodeExtractor.
259+
/// \param [out] CurrentInputNumbers - The global value numbers for the extracted
260+
/// arguments.
261+
static void mapInputsToGVNs(IRSimilarityCandidate &C,
262+
SetVector<Value *> &CurrentInputs,
263+
std::vector<unsigned> &EndInputNumbers) {
264+
// Get the global value number for each input.
265+
for (Value *Input : CurrentInputs) {
266+
assert(Input && "Have a nullptr as an input");
267+
assert(C.getGVN(Input).hasValue() &&
268+
"Could not find a numbering for the given input");
269+
EndInputNumbers.push_back(C.getGVN(Input).getValue());
270+
}
271+
}
272+
273+
/// Find the input GVNs and the output values for a region of Instructions.
274+
/// Using the code extractor, we collect the inputs to the extracted function.
275+
///
276+
/// The \p Region can be identifed as needing to be ignored in this function.
277+
/// It should be checked whether it should be ignored after a call to this
278+
/// function.
279+
///
280+
/// \param [in,out] Region - The region of code to be analyzed.
281+
/// \param [out] Inputs - The global value numbers for the extracted arguments.
282+
/// \param [out] ArgInputs - The values of the inputs to the extracted function.
283+
static void getCodeExtractorArguments(OutlinableRegion &Region,
284+
std::vector<unsigned> &InputGVNs,
285+
SetVector<Value *> &ArgInputs) {
286+
IRSimilarityCandidate &C = *Region.Candidate;
287+
288+
// OverallInputs are the inputs to the region found by the CodeExtractor,
289+
// SinkCands and HoistCands are used by the CodeExtractor to find sunken
290+
// allocas of values whose lifetimes are contained completely within the
291+
// outlined region. Outputs are values used outside of the outlined region
292+
// found by the CodeExtractor.
293+
SetVector<Value *> OverallInputs, SinkCands, HoistCands, Outputs;
294+
295+
// Use the code extractor to get the inputs and outputs, without sunken
296+
// allocas or removing llvm.assumes.
297+
CodeExtractor *CE = Region.CE;
298+
CE->findInputsOutputs(OverallInputs, Outputs, SinkCands);
299+
assert(Region.StartBB && "Region must have a start BasicBlock!");
300+
Function *OrigF = Region.StartBB->getParent();
301+
CodeExtractorAnalysisCache CEAC(*OrigF);
302+
BasicBlock *Dummy = nullptr;
303+
304+
// The region may be ineligible due to VarArgs in the parent function. In this
305+
// case we ignore the region.
306+
if (!CE->isEligible()) {
307+
Region.IgnoreRegion = true;
308+
return;
309+
}
310+
311+
// Find if any values are going to be sunk into the function when extracted
312+
CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
313+
CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
314+
315+
// TODO: Support regions with output values. Outputs add an extra layer of
316+
// resolution that adds too much complexity at this stage.
317+
if (Outputs.size() > 0) {
318+
Region.IgnoreRegion = true;
319+
return;
320+
}
321+
322+
// TODO: Support regions with sunken allocas: values whose lifetimes are
323+
// contained completely within the outlined region. These are not guaranteed
324+
// to be the same in every region, so we must elevate them all to arguments
325+
// when they appear. If these values are not equal, it means there is some
326+
// Input in OverallInputs that was removed for ArgInputs.
327+
if (ArgInputs.size() != OverallInputs.size()) {
328+
Region.IgnoreRegion = true;
329+
return;
330+
}
331+
332+
mapInputsToGVNs(C, OverallInputs, InputGVNs);
333+
}
334+
335+
void IROutliner::findAddInputsOutputs(
336+
Module &M, OutlinableRegion &Region) {
337+
std::vector<unsigned> Inputs;
338+
SetVector<Value *> ArgInputs;
339+
340+
getCodeExtractorArguments(Region, Inputs, ArgInputs);
341+
342+
if (Region.IgnoreRegion)
343+
return;
344+
}
345+
147346
void IROutliner::pruneIncompatibleRegions(
148347
std::vector<IRSimilarityCandidate> &CandidateVec,
149348
OutlinableGroup &CurrentGroup) {
@@ -271,6 +470,7 @@ unsigned IROutliner::doOutline(Module &M) {
271470
RHS[0].getLength() * RHS.size();
272471
});
273472

473+
DenseSet<unsigned> NotSame;
274474
// Iterate over the possible sets of similarity.
275475
for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
276476
OutlinableGroup CurrentGroup;
@@ -284,7 +484,18 @@ unsigned IROutliner::doOutline(Module &M) {
284484
if (CurrentGroup.Regions.size() < 2)
285485
continue;
286486

287-
// Create a CodeExtractor for each outlinable region.
487+
// Determine if there are any values that are the same constant throughout
488+
// each section in the set.
489+
NotSame.clear();
490+
CurrentGroup.findSameConstants(NotSame);
491+
492+
if (CurrentGroup.IgnoreGroup)
493+
continue;
494+
495+
// Create a CodeExtractor for each outlinable region. Identify inputs and
496+
// outputs for each section using the code extractor and create the argument
497+
// types for the Aggregate Outlining Function.
498+
std::vector<OutlinableRegion *> OutlinedRegions;
288499
for (OutlinableRegion *OS : CurrentGroup.Regions) {
289500
// Break the outlinable region out of its parent BasicBlock into its own
290501
// BasicBlocks (see function implementation).
@@ -293,10 +504,17 @@ unsigned IROutliner::doOutline(Module &M) {
293504
OS->CE = new (ExtractorAllocator.Allocate())
294505
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
295506
false, "outlined");
507+
findAddInputsOutputs(M, *OS);
508+
if (!OS->IgnoreRegion)
509+
OutlinedRegions.push_back(OS);
510+
else
511+
OS->reattachCandidate();
296512
}
297513

298-
// Create functions out of all the sections, and mark them as outlined
299-
std::vector<OutlinableRegion *> OutlinedRegions;
514+
CurrentGroup.Regions = std::move(OutlinedRegions);
515+
516+
// Create functions out of all the sections, and mark them as outlined.
517+
OutlinedRegions.clear();
300518
for (OutlinableRegion *OS : CurrentGroup.Regions) {
301519
OutlinedFunctionNum++;
302520
bool FunctionOutlined = extractSection(*OS);

llvm/test/Transforms/IROutliner/extraction.ll

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -48,26 +48,24 @@ entry:
4848
ret void
4949
}
5050

51+
; There are potential ouptuts in this sections, but we do not extract sections
52+
; with outputs right now, since they cannot be consolidated.
5153
define void @extract_outs1() #0 {
5254
; CHECK-LABEL: @extract_outs1(
5355
; CHECK-NEXT: entry:
54-
; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
55-
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
5656
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
5757
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
5858
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
5959
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
60-
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
61-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
62-
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
63-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
64-
; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
65-
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
66-
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
67-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
68-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
69-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4
70-
; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
60+
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
61+
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
62+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
63+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
64+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
65+
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
66+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
67+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4
68+
; CHECK-NEXT: call void @extract_outs1.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]])
7169
; CHECK-NEXT: ret void
7270
;
7371
entry:
@@ -88,25 +86,23 @@ entry:
8886
ret void
8987
}
9088

89+
; There are potential ouptuts in this sections, but we do not extract sections
90+
; with outputs right now, since they cannot be consolidated.
9191
define void @extract_outs2() #0 {
9292
; CHECK-LABEL: @extract_outs2(
9393
; CHECK-NEXT: entry:
94-
; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
95-
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
9694
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
9795
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
9896
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
9997
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
100-
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
101-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
102-
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
103-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
104-
; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
105-
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
106-
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
107-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
108-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
109-
; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
98+
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
99+
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
100+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
101+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
102+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
103+
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
104+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
105+
; CHECK-NEXT: call void @extract_outs2.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]])
110106
; CHECK-NEXT: ret void
111107
;
112108
entry:

0 commit comments

Comments
 (0)