Skip to content

Commit 4f7e5d2

Browse files
committed
[SROA] For non-speculatable loads of selects -- split block, insert then/else blocks, form two-entry PHI node, take 2
Currently, SROA is CFG-preserving. Not doing so does not affect any pipeline test. (???) Internally, SROA requires Dominator Tree, and uses it solely for the final `-mem2reg` call. By design, we can't really SROA alloca if their address escapes somehow, but we have logic to deal with `load` of `select`/`PHI`, where at least one of the possible addresses prevents promotion, by speculating the `load`s and `select`ing between loaded values. As one would expect, that requires ensuring that the speculation is actually legal. Even ignoring complexity bailouts, that logic does not deal with everything, e.g. `isSafeToLoadUnconditionally()` does not recurse into hands of `select`. There can also be cases where the load is genuinely non-speculate. So if we can't prove that the load can be speculated, unfold the select, produce two-entry phi node, and perform predicated load. Now, that transformation must obviously update Dominator Tree, since we require it later on. Doing so is trivial. Additionally, we don't want to do this for the final SROA invocation (D136806). In the end, this ends up having negative (!) compile-time cost: https://llvm-compile-time-tracker.com/compare.php?from=c6d7e80ec4c17a415673b1cfd25924f98ac83608&to=ddf9600365093ea50d7e278696cbfa01641c959d&stat=instructions:u Though indeed, this only deals with `select`s, `PHI`s are still using speculation. Should we update some more analysis? Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D138238 This reverts commit 7396118, and recommits 03e6d9d with a fixed assertion - we should check that DTU is there, not just assert false...
1 parent ad3870d commit 4f7e5d2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+948
-291
lines changed

llvm/include/llvm/Transforms/Scalar.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ FunctionPass *createBitTrackingDCEPass();
105105
//
106106
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
107107
//
108-
FunctionPass *createSROAPass();
108+
FunctionPass *createSROAPass(bool PreserveCFG = true);
109109

110110
//===----------------------------------------------------------------------===//
111111
//

llvm/include/llvm/Transforms/Scalar/SROA.h

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===//
1+
//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,6 +15,8 @@
1515
#ifndef LLVM_TRANSFORMS_SCALAR_SROA_H
1616
#define LLVM_TRANSFORMS_SCALAR_SROA_H
1717

18+
#include "llvm/ADT/MapVector.h"
19+
#include "llvm/ADT/PointerIntPair.h"
1820
#include "llvm/ADT/SetVector.h"
1921
#include "llvm/ADT/SmallVector.h"
2022
#include "llvm/IR/PassManager.h"
@@ -24,8 +26,10 @@
2426
namespace llvm {
2527

2628
class AllocaInst;
29+
class LoadInst;
2730
class AssumptionCache;
2831
class DominatorTree;
32+
class DomTreeUpdater;
2933
class Function;
3034
class LLVMContext;
3135
class PHINode;
@@ -41,8 +45,31 @@ class AllocaSlices;
4145
class Partition;
4246
class SROALegacyPass;
4347

48+
class SelectHandSpeculativity {
49+
unsigned char Storage = 0;
50+
using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
51+
using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
52+
public:
53+
SelectHandSpeculativity() = default;
54+
SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
55+
bool isSpeculatable(bool isTrueVal) const;
56+
bool areAllSpeculatable() const;
57+
bool areAnySpeculatable() const;
58+
bool areNoneSpeculatable() const;
59+
// For interop as int half of PointerIntPair.
60+
explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
61+
explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
62+
};
63+
static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
64+
65+
using PossiblySpeculatableLoad =
66+
PointerIntPair<LoadInst *, 2, sroa::SelectHandSpeculativity>;
67+
using PossiblySpeculatableLoads = SmallVector<PossiblySpeculatableLoad, 2>;
68+
4469
} // end namespace sroa
4570

71+
enum class SROAOptions : bool { ModifyCFG, PreserveCFG };
72+
4673
/// An optimization pass providing Scalar Replacement of Aggregates.
4774
///
4875
/// This pass takes allocations which can be completely analyzed (that is, they
@@ -63,8 +90,9 @@ class SROALegacyPass;
6390
/// SSA vector values.
6491
class SROAPass : public PassInfoMixin<SROAPass> {
6592
LLVMContext *C = nullptr;
66-
DominatorTree *DT = nullptr;
93+
DomTreeUpdater *DTU = nullptr;
6794
AssumptionCache *AC = nullptr;
95+
const bool PreserveCFG;
6896

6997
/// Worklist of alloca instructions to simplify.
7098
///
@@ -98,35 +126,58 @@ class SROAPass : public PassInfoMixin<SROAPass> {
98126
/// All of these PHIs have been checked for the safety of speculation and by
99127
/// being speculated will allow promoting allocas currently in the promotable
100128
/// queue.
101-
SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs;
129+
SetVector<PHINode *, SmallVector<PHINode *, 8>> SpeculatablePHIs;
102130

103-
/// A worklist of select instructions to speculate prior to promoting
131+
/// A worklist of select instructions to rewrite prior to promoting
104132
/// allocas.
133+
SmallMapVector<SelectInst *, sroa::PossiblySpeculatableLoads, 8>
134+
SelectsToRewrite;
135+
136+
/// Select instructions that use an alloca and are subsequently loaded can be
137+
/// rewritten to load both input pointers and then select between the result,
138+
/// allowing the load of the alloca to be promoted.
139+
/// From this:
140+
/// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
141+
/// %V = load <type>, ptr %P2
142+
/// to:
143+
/// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
144+
/// %V2 = load <type>, ptr %Other
145+
/// %V = select i1 %cond, <type> %V1, <type> %V2
105146
///
106-
/// All of these select instructions have been checked for the safety of
107-
/// speculation and by being speculated will allow promoting allocas
108-
/// currently in the promotable queue.
109-
SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
147+
/// We can do this to a select if its only uses are loads
148+
/// and if either the operand to the select can be loaded unconditionally,
149+
/// or if we are allowed to perform CFG modifications.
150+
/// If found an intervening bitcast with a single use of the load,
151+
/// allow the promotion.
152+
static std::optional<sroa::PossiblySpeculatableLoads>
153+
isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
110154

111155
public:
112-
SROAPass() = default;
156+
/// If \p PreserveCFG is set, then the pass is not allowed to modify CFG
157+
/// in any way, even if it would update CFG analyses.
158+
SROAPass(SROAOptions PreserveCFG);
113159

114160
/// Run the pass over the function.
115161
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
116162

163+
void printPipeline(raw_ostream &OS,
164+
function_ref<StringRef(StringRef)> MapClassName2PassName);
165+
117166
private:
118167
friend class sroa::AllocaSliceRewriter;
119168
friend class sroa::SROALegacyPass;
120169

121170
/// Helper used by both the public run method and by the legacy pass.
171+
PreservedAnalyses runImpl(Function &F, DomTreeUpdater &RunDTU,
172+
AssumptionCache &RunAC);
122173
PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT,
123174
AssumptionCache &RunAC);
124175

125176
bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS);
126177
AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS,
127178
sroa::Partition &P);
128179
bool splitAlloca(AllocaInst &AI, sroa::AllocaSlices &AS);
129-
bool runOnAlloca(AllocaInst &AI);
180+
std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
130181
void clobberUse(Use &U);
131182
bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
132183
bool promoteAllocas(Function &F);

llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,10 +464,13 @@ Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
464464
/// ElseBlock
465465
/// SplitBefore
466466
/// Tail
467+
///
468+
/// Updates DT if given.
467469
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
468470
Instruction **ThenTerm,
469471
Instruction **ElseTerm,
470-
MDNode *BranchWeights = nullptr);
472+
MDNode *BranchWeights = nullptr,
473+
DomTreeUpdater *DTU = nullptr);
471474

472475
/// Check whether BB is the merge point of a if-region.
473476
/// If so, return the branch instruction that determines which entry into

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,19 @@ Expected<GVNOptions> parseGVNOptions(StringRef Params) {
836836
return Result;
837837
}
838838

839+
Expected<SROAOptions> parseSROAOptions(StringRef Params) {
840+
if (Params.empty() || Params == "modify-cfg")
841+
return SROAOptions::ModifyCFG;
842+
if (Params == "preserve-cfg")
843+
return SROAOptions::PreserveCFG;
844+
return make_error<StringError>(
845+
formatv("invalid SROA pass parameter '{0}' (either preserve-cfg or "
846+
"modify-cfg can be specified)",
847+
Params)
848+
.str(),
849+
inconvertibleErrorCode());
850+
}
851+
839852
Expected<StackLifetime::LivenessType>
840853
parseStackLifetimeOptions(StringRef Params) {
841854
StackLifetime::LivenessType Result = StackLifetime::LivenessType::May;

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
328328

329329
// Form SSA out of local memory accesses after breaking apart aggregates into
330330
// scalars.
331-
FPM.addPass(SROAPass());
331+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
332332

333333
// Catch trivial redundancies
334334
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
@@ -427,7 +427,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
427427
/*UseBlockFrequencyInfo=*/false));
428428

429429
// Delete small array after loop unroll.
430-
FPM.addPass(SROAPass());
430+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
431431

432432
// Specially optimize memory movement as it doesn't look like dataflow in SSA.
433433
FPM.addPass(MemCpyOptPass());
@@ -478,7 +478,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
478478

479479
// Form SSA out of local memory accesses after breaking apart aggregates into
480480
// scalars.
481-
FPM.addPass(SROAPass());
481+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
482482

483483
// Catch trivial redundancies
484484
FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
@@ -613,7 +613,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
613613
/*UseBlockFrequencyInfo=*/false));
614614

615615
// Delete small array after loop unroll.
616-
FPM.addPass(SROAPass());
616+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
617617

618618
// Try vectorization/scalarization transforms that are both improvements
619619
// themselves and can allow further folds with GVN and InstCombine.
@@ -714,7 +714,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
714714
CGSCCPassManager &CGPipeline = MIWP.getPM();
715715

716716
FunctionPassManager FPM;
717-
FPM.addPass(SROAPass());
717+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
718718
FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
719719
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
720720
true))); // Merge & remove basic blocks.
@@ -963,7 +963,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
963963
// Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
964964
EarlyFPM.addPass(LowerExpectIntrinsicPass());
965965
EarlyFPM.addPass(SimplifyCFGPass());
966-
EarlyFPM.addPass(SROAPass());
966+
EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
967967
EarlyFPM.addPass(EarlyCSEPass());
968968
if (Level == OptimizationLevel::O3)
969969
EarlyFPM.addPass(CallSiteSplittingPass());
@@ -1113,7 +1113,10 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
11131113
// Now that we are done with loop unrolling, be it either by LoopVectorizer,
11141114
// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
11151115
// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1116-
FPM.addPass(SROAPass());
1116+
// NOTE: we are very late in the pipeline, and we don't have any LICM
1117+
// or SimplifyCFG passes scheduled after us, that would cleanup
1118+
// the CFG mess this may created if allowed to modify CFG, so forbid that.
1119+
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
11171120
}
11181121

11191122
if (!IsFullLTO) {
@@ -1204,7 +1207,10 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
12041207
// Now that we are done with loop unrolling, be it either by LoopVectorizer,
12051208
// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
12061209
// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1207-
FPM.addPass(SROAPass());
1210+
// NOTE: we are very late in the pipeline, and we don't have any LICM
1211+
// or SimplifyCFG passes scheduled after us, that would cleanup
1212+
// the CFG mess this may created if allowed to modify CFG, so forbid that.
1213+
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
12081214
FPM.addPass(InstCombinePass());
12091215
FPM.addPass(
12101216
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
@@ -1745,7 +1751,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
17451751
}
17461752

17471753
// Break up allocas
1748-
FPM.addPass(SROAPass());
1754+
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
17491755

17501756
// LTO provides additional opportunities for tailcall elimination due to
17511757
// link-time inlining, and visibility of nocapture attribute.

llvm/lib/Passes/PassRegistry.def

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,6 @@ FUNCTION_PASS("sink", SinkingPass())
376376
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
377377
FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
378378
FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
379-
FUNCTION_PASS("sroa", SROAPass())
380379
FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
381380
FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
382381
FUNCTION_PASS("tailcallelim", TailCallElimPass())
@@ -473,6 +472,13 @@ FUNCTION_PASS_WITH_PARAMS("gvn",
473472
"no-load-pre;load-pre;"
474473
"no-split-backedge-load-pre;split-backedge-load-pre;"
475474
"no-memdep;memdep")
475+
FUNCTION_PASS_WITH_PARAMS("sroa",
476+
"SROAPass",
477+
[](SROAOptions PreserveCFG) {
478+
return SROAPass(PreserveCFG);
479+
},
480+
parseSROAOptions,
481+
"preserve-cfg;modify-cfg")
476482
FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
477483
"StackLifetimePrinterPass",
478484
[](StackLifetime::LivenessType Type) {

0 commit comments

Comments
 (0)