Skip to content

Commit c52bcf3

Browse files
[IRSim][IROutliner] Limit to extracting regions that only require
inputs. Extracted regions can have both inputs and outputs. In addition, the CodeExtractor removes inputs that are only used in llvm.assumes, and sunken allocas (values are used entirely in the extracted region as denoted by lifetime intrinsics). We also cannot combine sections that have different constants in the same structural location, and these constants will have to elevated to argument. This patch limits the extracted regions to those that only require inputs, and do not have any other special cases. We test that we do not outline the wrong constants in: test/Transforms/IROutliner/outliner-different-constants.ll test/Transforms/IROutliner/outliner-different-globals.ll test/Transforms/IROutliner/outliner-constant-vs-registers.ll We test that correctly outline in: test/Transforms/IROutliner/outlining-same-globals.ll test/Transforms/IROutliner/outlining-same-constants.ll test/Transforms/IROutliner/outlining-different-structure.ll Reviewers: paquette, plofti Differential Revision: https://reviews.llvm.org/D86977
1 parent f47b073 commit c52bcf3

12 files changed

+472
-147
lines changed

llvm/include/llvm/Transforms/IPO/IROutliner.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ class IROutliner {
154154
pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
155155
OutlinableGroup &CurrentGroup);
156156

157+
/// Identify the needed extracted inputs in a section, and add to the overall
158+
/// function if needed.
159+
///
160+
/// \param [in] M - The module to outline from.
161+
/// \param [in,out] Region - The region to be extracted
162+
void findAddInputsOutputs(Module &M, OutlinableRegion &Region);
163+
157164
/// Extract \p Region into its own function.
158165
///
159166
/// \param [in] Region - The region to be extracted into its own function.
@@ -182,8 +189,7 @@ class IROutliner {
182189
/// Custom InstVisitor to classify different instructions for whether it can
183190
/// be analyzed for similarity. This is needed as there may be instruction we
184191
/// can identify as having similarity, but are more complicated to outline.
185-
struct InstructionAllowed
186-
: public InstVisitor<InstructionAllowed, bool> {
192+
struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
187193
InstructionAllowed() {}
188194

189195
// TODO: Determine a scheme to resolve when the label is similar enough.
@@ -203,13 +209,9 @@ class IROutliner {
203209
// DebugInfo should be included in the regions, but should not be
204210
// analyzed for similarity as it has no bearing on the outcome of the
205211
// program.
206-
bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) {
207-
return true;
208-
}
212+
bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
209213
// TODO: Handle GetElementPtrInsts
210-
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
211-
return false;
212-
}
214+
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { return false; }
213215
// TODO: Handle specific intrinsics individually from those that can be
214216
// handled.
215217
bool IntrinsicInst(IntrinsicInst &II) { return false; }
@@ -226,9 +228,7 @@ class IROutliner {
226228
bool visitCallBrInst(CallBrInst &CBI) { return false; }
227229
// TODO: Handle interblock similarity.
228230
bool visitTerminator(Instruction &I) { return false; }
229-
bool visitInstruction(Instruction &I) {
230-
return true;
231-
}
231+
bool visitInstruction(Instruction &I) { return true; }
232232
};
233233

234234
/// A InstVisitor used to exclude certain instructions from being outlined.

llvm/lib/Transforms/IPO/IROutliner.cpp

Lines changed: 221 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ struct OutlinableGroup {
4141
/// Flag for whether we should not consider this group of OutlinableRegions
4242
/// for extraction.
4343
bool IgnoreGroup = false;
44+
45+
/// For the \ref Regions, we look at every Value. If it is a constant,
46+
/// we check whether it is the same in Region.
47+
///
48+
/// \param [in,out] NotSame contains the global value numbers where the
49+
/// constant is not always the same, and must be passed in as an argument.
50+
void findSameConstants(DenseSet<unsigned> &NotSame);
4451
};
4552

4653
/// Move the contents of \p SourceBB to before the last instruction of \p
@@ -144,6 +151,198 @@ void OutlinableRegion::reattachCandidate() {
144151
CandidateSplit = false;
145152
}
146153

154+
/// Find whether \p V matches the Constants previously found for the \p GVN.
155+
///
156+
/// \param V - The value to check for consistency.
157+
/// \param GVN - The global value number assigned to \p V.
158+
/// \param GVNToConstant - The mapping of global value number to Constants.
159+
/// \returns true if the Value matches the Constant mapped to by V and false if
160+
/// it \p V is a Constant but does not match.
161+
/// \returns None if \p V is not a Constant.
162+
static Optional<bool>
163+
constantMatches(Value *V, unsigned GVN,
164+
DenseMap<unsigned, Constant *> &GVNToConstant) {
165+
// See if we have a constants
166+
Constant *CST = dyn_cast<Constant>(V);
167+
if (!CST)
168+
return None;
169+
170+
// Holds a mapping from a global value number to a Constant.
171+
DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
172+
bool Inserted;
173+
174+
// If we have a constant, try to make a new entry in the GVNToConstant.
175+
std::tie(GVNToConstantIt, Inserted) =
176+
GVNToConstant.insert(std::make_pair(GVN, CST));
177+
// If it was found and is not equal, it is not the same. We do not
178+
// handle this case yet, and exit early.
179+
if (Inserted || (GVNToConstantIt->second == CST))
180+
return true;
181+
182+
return false;
183+
}
184+
185+
/// Find whether \p Region matches the global value numbering to Constant mapping
186+
/// found so far.
187+
///
188+
/// \param Region - The OutlinableRegion we are checking for constants
189+
/// \param NotSame - The set of global value numbers that do not have the same
190+
/// constant in each region.
191+
/// \returns true if all Constants are the same in every use of a Constant in \p
192+
/// Region and false if not
193+
static bool
194+
collectRegionsConstants(OutlinableRegion &Region,
195+
DenseMap<unsigned, Constant *> &GVNToConstant,
196+
DenseSet<unsigned> &NotSame) {
197+
IRSimilarityCandidate &C = *Region.Candidate;
198+
for (IRInstructionData &ID : C) {
199+
200+
// Iterate over the operands in an instruction. If the global value number,
201+
// assigned by the IRSimilarityCandidate, has been seen before, we check if
202+
// the the number has been found to be not the same value in each instance.
203+
for (Value *V : ID.OperVals) {
204+
Optional<unsigned> GVNOpt = C.getGVN(V);
205+
assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
206+
unsigned GVN = GVNOpt.getValue();
207+
208+
// If this global value has been found to not be the same, it could have
209+
// just been a register, check that it is not a constant value.
210+
if (NotSame.find(GVN) != NotSame.end()) {
211+
if (isa<Constant>(V))
212+
return false;
213+
continue;
214+
}
215+
216+
// If it has been the same so far, we check the value for if the
217+
// associated Constant value match the previous instances of the same
218+
// global value number. If the global value does not map to a Constant,
219+
// it is considered to not be the same value.
220+
Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
221+
if (ConstantMatches.hasValue()) {
222+
if (ConstantMatches.getValue())
223+
continue;
224+
else
225+
return false;
226+
}
227+
228+
// While this value is a register, it might not have been previously,
229+
// make sure we don't already have a constant mapped to this global value
230+
// number.
231+
if (GVNToConstant.find(GVN) != GVNToConstant.end())
232+
return false;
233+
234+
NotSame.insert(GVN);
235+
}
236+
}
237+
238+
return true;
239+
}
240+
241+
void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
242+
DenseMap<unsigned, Constant *> GVNToConstant;
243+
244+
for (OutlinableRegion *Region : Regions)
245+
if (!collectRegionsConstants(*Region, GVNToConstant, NotSame)) {
246+
IgnoreGroup = true;
247+
return;
248+
}
249+
}
250+
251+
/// Find the GVN for the inputs that have been found by the CodeExtractor,
252+
/// excluding the ones that will be removed by llvm.assumes as these will be
253+
/// removed by the CodeExtractor.
254+
///
255+
/// \param [in] C - The IRSimilarityCandidate containing the region we are
256+
/// analyzing.
257+
/// \param [in] CurrentInputs - The set of inputs found by the
258+
/// CodeExtractor.
259+
/// \param [out] CurrentInputNumbers - The global value numbers for the extracted
260+
/// arguments.
261+
static void mapInputsToGVNs(IRSimilarityCandidate &C,
262+
SetVector<Value *> &CurrentInputs,
263+
std::vector<unsigned> &EndInputNumbers) {
264+
// Get the global value number for each input.
265+
for (Value *Input : CurrentInputs) {
266+
assert(Input && "Have a nullptr as an input");
267+
assert(C.getGVN(Input).hasValue() &&
268+
"Could not find a numbering for the given input");
269+
EndInputNumbers.push_back(C.getGVN(Input).getValue());
270+
}
271+
}
272+
273+
/// Find the input GVNs and the output values for a region of Instructions.
274+
/// Using the code extractor, we collect the inputs to the extracted function.
275+
///
276+
/// The \p Region can be identifed as needing to be ignored in this function.
277+
/// It should be checked whether it should be ignored after a call to this
278+
/// function.
279+
///
280+
/// \param [in,out] Region - The region of code to be analyzed.
281+
/// \param [out] Inputs - The global value numbers for the extracted arguments.
282+
/// \param [out] ArgInputs - The values of the inputs to the extracted function.
283+
static void getCodeExtractorArguments(OutlinableRegion &Region,
284+
std::vector<unsigned> &InputGVNs,
285+
SetVector<Value *> &ArgInputs) {
286+
IRSimilarityCandidate &C = *Region.Candidate;
287+
288+
// OverallInputs are the inputs to the region found by the CodeExtractor,
289+
// SinkCands and HoistCands are used by the CodeExtractor to find sunken
290+
// allocas of values whose lifetimes are contained completely within the
291+
// outlined region. Outputs are values used outside of the outlined region
292+
// found by the CodeExtractor.
293+
SetVector<Value *> OverallInputs, SinkCands, HoistCands, Outputs;
294+
295+
// Use the code extractor to get the inputs and outputs, without sunken
296+
// allocas or removing llvm.assumes.
297+
CodeExtractor *CE = Region.CE;
298+
CE->findInputsOutputs(OverallInputs, Outputs, SinkCands);
299+
assert(Region.StartBB && "Region must have a start BasicBlock!");
300+
Function *OrigF = Region.StartBB->getParent();
301+
CodeExtractorAnalysisCache CEAC(*OrigF);
302+
BasicBlock *Dummy = nullptr;
303+
304+
// The region may be ineligible due to VarArgs in the parent function. In this
305+
// case we ignore the region.
306+
if (!CE->isEligible()) {
307+
Region.IgnoreRegion = true;
308+
return;
309+
}
310+
311+
// Find if any values are going to be sunk into the function when extracted
312+
CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
313+
CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
314+
315+
// TODO: Support regions with output values. Outputs add an extra layer of
316+
// resolution that adds too much complexity at this stage.
317+
if (Outputs.size() > 0) {
318+
Region.IgnoreRegion = true;
319+
return;
320+
}
321+
322+
// TODO: Support regions with sunken allocas: values whose lifetimes are
323+
// contained completely within the outlined region. These are not guaranteed
324+
// to be the same in every region, so we must elevate them all to arguments
325+
// when they appear. If these values are not equal, it means there is some
326+
// Input in OverallInputs that was removed for ArgInputs.
327+
if (ArgInputs.size() != OverallInputs.size()) {
328+
Region.IgnoreRegion = true;
329+
return;
330+
}
331+
332+
mapInputsToGVNs(C, OverallInputs, InputGVNs);
333+
}
334+
335+
void IROutliner::findAddInputsOutputs(
336+
Module &M, OutlinableRegion &Region) {
337+
std::vector<unsigned> Inputs;
338+
SetVector<Value *> ArgInputs;
339+
340+
getCodeExtractorArguments(Region, Inputs, ArgInputs);
341+
342+
if (Region.IgnoreRegion)
343+
return;
344+
}
345+
147346
void IROutliner::pruneIncompatibleRegions(
148347
std::vector<IRSimilarityCandidate> &CandidateVec,
149348
OutlinableGroup &CurrentGroup) {
@@ -271,6 +470,7 @@ unsigned IROutliner::doOutline(Module &M) {
271470
RHS[0].getLength() * RHS.size();
272471
});
273472

473+
DenseSet<unsigned> NotSame;
274474
// Iterate over the possible sets of similarity.
275475
for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
276476
OutlinableGroup CurrentGroup;
@@ -284,7 +484,18 @@ unsigned IROutliner::doOutline(Module &M) {
284484
if (CurrentGroup.Regions.size() < 2)
285485
continue;
286486

287-
// Create a CodeExtractor for each outlinable region.
487+
// Determine if there are any values that are the same constant throughout
488+
// each section in the set.
489+
NotSame.clear();
490+
CurrentGroup.findSameConstants(NotSame);
491+
492+
if (CurrentGroup.IgnoreGroup)
493+
continue;
494+
495+
// Create a CodeExtractor for each outlinable region. Identify inputs and
496+
// outputs for each section using the code extractor and create the argument
497+
// types for the Aggregate Outlining Function.
498+
std::vector<OutlinableRegion *> OutlinedRegions;
288499
for (OutlinableRegion *OS : CurrentGroup.Regions) {
289500
// Break the outlinable region out of its parent BasicBlock into its own
290501
// BasicBlocks (see function implementation).
@@ -293,10 +504,17 @@ unsigned IROutliner::doOutline(Module &M) {
293504
OS->CE = new (ExtractorAllocator.Allocate())
294505
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
295506
false, "outlined");
507+
findAddInputsOutputs(M, *OS);
508+
if (!OS->IgnoreRegion)
509+
OutlinedRegions.push_back(OS);
510+
else
511+
OS->reattachCandidate();
296512
}
297513

298-
// Create functions out of all the sections, and mark them as outlined
299-
std::vector<OutlinableRegion *> OutlinedRegions;
514+
CurrentGroup.Regions = std::move(OutlinedRegions);
515+
516+
// Create functions out of all the sections, and mark them as outlined.
517+
OutlinedRegions.clear();
300518
for (OutlinableRegion *OS : CurrentGroup.Regions) {
301519
OutlinedFunctionNum++;
302520
bool FunctionOutlined = extractSection(*OS);

llvm/test/Transforms/IROutliner/extraction.ll

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -48,26 +48,24 @@ entry:
4848
ret void
4949
}
5050

51+
; There are potential ouptuts in this sections, but we do not extract sections
52+
; with outputs right now, since they cannot be consolidated.
5153
define void @extract_outs1() #0 {
5254
; CHECK-LABEL: @extract_outs1(
5355
; CHECK-NEXT: entry:
54-
; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
55-
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
5656
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
5757
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
5858
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
5959
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
60-
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
61-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
62-
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
63-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
64-
; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
65-
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
66-
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
67-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
68-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
69-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4
70-
; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
60+
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
61+
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
62+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
63+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
64+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
65+
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
66+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
67+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4
68+
; CHECK-NEXT: call void @extract_outs1.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]])
7169
; CHECK-NEXT: ret void
7270
;
7371
entry:
@@ -88,25 +86,23 @@ entry:
8886
ret void
8987
}
9088

89+
; There are potential ouptuts in this sections, but we do not extract sections
90+
; with outputs right now, since they cannot be consolidated.
9191
define void @extract_outs2() #0 {
9292
; CHECK-LABEL: @extract_outs2(
9393
; CHECK-NEXT: entry:
94-
; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
95-
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
9694
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
9795
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
9896
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
9997
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
100-
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
101-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
102-
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
103-
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
104-
; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
105-
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
106-
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
107-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
108-
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
109-
; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
98+
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
99+
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
100+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4
101+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4
102+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
103+
; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4
104+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4
105+
; CHECK-NEXT: call void @extract_outs2.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]])
110106
; CHECK-NEXT: ret void
111107
;
112108
entry:

0 commit comments

Comments
 (0)