|
| 1 | +//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// This pass moves instruction maked as auto-init closer to the basic block that |
| 10 | +// use it, eventually removing it from some control path of the function. |
| 11 | +// |
| 12 | +//===----------------------------------------------------------------------===// |
| 13 | + |
| 14 | +#include "llvm/Transforms/Utils/MoveAutoInit.h" |
| 15 | +#include "llvm/ADT/STLExtras.h" |
| 16 | +#include "llvm/ADT/Statistic.h" |
| 17 | +#include "llvm/ADT/StringSet.h" |
| 18 | +#include "llvm/Analysis/MemorySSA.h" |
| 19 | +#include "llvm/Analysis/MemorySSAUpdater.h" |
| 20 | +#include "llvm/IR/DebugInfo.h" |
| 21 | +#include "llvm/IR/Dominators.h" |
| 22 | +#include "llvm/IR/IRBuilder.h" |
| 23 | +#include "llvm/IR/Instructions.h" |
| 24 | +#include "llvm/IR/IntrinsicInst.h" |
| 25 | +#include "llvm/Support/CommandLine.h" |
| 26 | +#include "llvm/Transforms/Utils.h" |
| 27 | +#include "llvm/Transforms/Utils/LoopUtils.h" |
| 28 | + |
| 29 | +using namespace llvm; |
| 30 | + |
| 31 | +#define DEBUG_TYPE "move-auto-init" |
| 32 | + |
| 33 | +STATISTIC(NumMoved, "Number of instructions moved"); |
| 34 | + |
| 35 | +static cl::opt<unsigned> MoveAutoInitThreshold( |
| 36 | + "move-auto-init-threshold", cl::Hidden, cl::init(128), |
| 37 | + cl::desc("Maximum instructions to analyze per moved initialization")); |
| 38 | + |
| 39 | +static bool hasAutoInitMetadata(const Instruction &I) { |
| 40 | + return I.hasMetadata(LLVMContext::MD_annotation) && |
| 41 | + any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(), |
| 42 | + [](const MDOperand &Op) { |
| 43 | + return cast<MDString>(Op.get())->getString() == "auto-init"; |
| 44 | + }); |
| 45 | +} |
| 46 | + |
| 47 | +/// Finds a BasicBlock in the CFG where instruction `I` can be moved to while |
| 48 | +/// not changing the Memory SSA ordering and being guarded by at least one |
| 49 | +/// condition. |
| 50 | +static BasicBlock *usersDominator(Instruction *I, DominatorTree &DT, |
| 51 | + MemorySSA &MSSA) { |
| 52 | + BasicBlock *CurrentDominator = nullptr; |
| 53 | + MemoryLocation ML; |
| 54 | + if (auto *MI = dyn_cast<MemIntrinsic>(I)) |
| 55 | + ML = MemoryLocation::getForDest(MI); |
| 56 | + else if (auto *SI = dyn_cast<StoreInst>(I)) |
| 57 | + ML = MemoryLocation::get(SI); |
| 58 | + else |
| 59 | + assert(false && "memory location set"); |
| 60 | + |
| 61 | + MemoryUseOrDef &IMA = *MSSA.getMemoryAccess(I); |
| 62 | + BatchAAResults AA(MSSA.getAA()); |
| 63 | + |
| 64 | + SmallPtrSet<MemoryAccess *, 8> Visited; |
| 65 | + |
| 66 | + auto AsMemoryAccess = [](User *U) { return cast<MemoryAccess>(U); }; |
| 67 | + SmallVector<MemoryAccess *> WorkList(map_range(IMA.users(), AsMemoryAccess)); |
| 68 | + |
| 69 | + while (!WorkList.empty()) { |
| 70 | + MemoryAccess *MA = WorkList.pop_back_val(); |
| 71 | + if (!Visited.insert(MA).second) |
| 72 | + continue; |
| 73 | + |
| 74 | + if (Visited.size() > MoveAutoInitThreshold) |
| 75 | + return nullptr; |
| 76 | + |
| 77 | + bool FoundClobberingUser = false; |
| 78 | + if (auto *M = dyn_cast<MemoryUseOrDef>(MA)) { |
| 79 | + Instruction *MI = M->getMemoryInst(); |
| 80 | + |
| 81 | + // If this memory instruction may not clobber `I`, we can skip it. |
| 82 | + // LifetimeEnd is a valid user, but we do not want it in the user |
| 83 | + // dominator. |
| 84 | + if (AA.getModRefInfo(MI, ML) != ModRefInfo::NoModRef && |
| 85 | + !MI->isLifetimeStartOrEnd() && MI != I) { |
| 86 | + FoundClobberingUser = true; |
| 87 | + CurrentDominator = CurrentDominator |
| 88 | + ? DT.findNearestCommonDominator(CurrentDominator, |
| 89 | + MI->getParent()) |
| 90 | + : MI->getParent(); |
| 91 | + } |
| 92 | + } |
| 93 | + if (!FoundClobberingUser) { |
| 94 | + auto UsersAsMemoryAccesses = map_range(MA->users(), AsMemoryAccess); |
| 95 | + append_range(WorkList, UsersAsMemoryAccesses); |
| 96 | + } |
| 97 | + } |
| 98 | + return CurrentDominator; |
| 99 | +} |
| 100 | + |
| 101 | +static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) { |
| 102 | + BasicBlock &EntryBB = F.getEntryBlock(); |
| 103 | + SmallVector<std::pair<Instruction *, BasicBlock *>> JobList; |
| 104 | + |
| 105 | + // |
| 106 | + // Compute movable instructions. |
| 107 | + // |
| 108 | + for (Instruction &I : EntryBB) { |
| 109 | + if (!hasAutoInitMetadata(I)) |
| 110 | + continue; |
| 111 | + |
| 112 | + assert(!I.isVolatile() && "auto init instructions cannot be volatile."); |
| 113 | + |
| 114 | + BasicBlock *UsersDominator = usersDominator(&I, DT, MSSA); |
| 115 | + if (!UsersDominator) |
| 116 | + continue; |
| 117 | + |
| 118 | + if (UsersDominator == &EntryBB) |
| 119 | + continue; |
| 120 | + |
| 121 | + // Traverse the CFG to detect cycles `UsersDominator` would be part of. |
| 122 | + SmallPtrSet<BasicBlock *, 8> TransitiveSuccessors; |
| 123 | + SmallVector<BasicBlock *> WorkList(successors(UsersDominator)); |
| 124 | + bool HasCycle = false; |
| 125 | + while (!WorkList.empty()) { |
| 126 | + BasicBlock *CurrBB = WorkList.pop_back_val(); |
| 127 | + if (CurrBB == UsersDominator) |
| 128 | + // No early exit because we want to compute the full set of transitive |
| 129 | + // successors. |
| 130 | + HasCycle = true; |
| 131 | + for (BasicBlock *Successor : successors(CurrBB)) { |
| 132 | + if (!TransitiveSuccessors.insert(Successor).second) |
| 133 | + continue; |
| 134 | + WorkList.push_back(Successor); |
| 135 | + } |
| 136 | + } |
| 137 | + |
| 138 | + // Don't insert if that could create multiple execution of I, |
| 139 | + // but we can insert it in the non back-edge predecessors, if it exists. |
| 140 | + if (HasCycle) { |
| 141 | + BasicBlock *UsersDominatorHead = UsersDominator; |
| 142 | + while (BasicBlock *UniquePredecessor = |
| 143 | + UsersDominatorHead->getUniquePredecessor()) |
| 144 | + UsersDominatorHead = UniquePredecessor; |
| 145 | + |
| 146 | + if (UsersDominatorHead == &EntryBB) |
| 147 | + continue; |
| 148 | + |
| 149 | + BasicBlock *DominatingPredecessor = nullptr; |
| 150 | + for (BasicBlock *Pred : predecessors(UsersDominatorHead)) { |
| 151 | + // If one of the predecessor of the dominator also transitively is a |
| 152 | + // successor, moving to the dominator would do the inverse of loop |
| 153 | + // hoisting, and we don't want that. |
| 154 | + if (TransitiveSuccessors.count(Pred)) |
| 155 | + continue; |
| 156 | + |
| 157 | + DominatingPredecessor = |
| 158 | + DominatingPredecessor |
| 159 | + ? DT.findNearestCommonDominator(DominatingPredecessor, Pred) |
| 160 | + : Pred; |
| 161 | + } |
| 162 | + |
| 163 | + if (!DominatingPredecessor || DominatingPredecessor == &EntryBB) |
| 164 | + continue; |
| 165 | + |
| 166 | + UsersDominator = DominatingPredecessor; |
| 167 | + } |
| 168 | + |
| 169 | + // CatchSwitchInst blocks can only have one instruction, so they are not |
| 170 | + // good candidates for insertion. |
| 171 | + while (isa<CatchSwitchInst>(UsersDominator->getFirstInsertionPt())) { |
| 172 | + for (BasicBlock *Pred : predecessors(UsersDominator)) |
| 173 | + UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred); |
| 174 | + } |
| 175 | + |
| 176 | + // We finally found a place where I can be moved while not introducing extra |
| 177 | + // execution, and guarded by at least one condition. |
| 178 | + if (UsersDominator != &EntryBB) |
| 179 | + JobList.emplace_back(&I, UsersDominator); |
| 180 | + } |
| 181 | + |
| 182 | + // |
| 183 | + // Perform the actual substitution. |
| 184 | + // |
| 185 | + if (JobList.empty()) |
| 186 | + return false; |
| 187 | + |
| 188 | + MemorySSAUpdater MSSAU(&MSSA); |
| 189 | + |
| 190 | + // Reverse insertion to respect relative order between instructions: |
| 191 | + // if two instructions are moved from the same BB to the same BB, we insert |
| 192 | + // the second one in the front, then the first on top of it. |
| 193 | + for (auto &Job : reverse(JobList)) { |
| 194 | + Job.first->moveBefore(&*Job.second->getFirstInsertionPt()); |
| 195 | + MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(), |
| 196 | + MemorySSA::InsertionPlace::Beginning); |
| 197 | + } |
| 198 | + |
| 199 | + if (VerifyMemorySSA) |
| 200 | + MSSA.verifyMemorySSA(); |
| 201 | + |
| 202 | + NumMoved += JobList.size(); |
| 203 | + |
| 204 | + return true; |
| 205 | +} |
| 206 | + |
| 207 | +PreservedAnalyses MoveAutoInitPass::run(Function &F, |
| 208 | + FunctionAnalysisManager &AM) { |
| 209 | + |
| 210 | + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); |
| 211 | + auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); |
| 212 | + if (!runMoveAutoInit(F, DT, MSSA)) |
| 213 | + return PreservedAnalyses::all(); |
| 214 | + |
| 215 | + PreservedAnalyses PA; |
| 216 | + PA.preserve<DominatorTreeAnalysis>(); |
| 217 | + PA.preserve<MemorySSAAnalysis>(); |
| 218 | + PA.preserveSet<CFGAnalyses>(); |
| 219 | + return PA; |
| 220 | +} |
0 commit comments