|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 | 8 |
|
9 | 9 | #include "llvm/SYCLLowerIR/GlobalOffset.h"
|
10 |
| -#include "llvm/ADT/SmallSet.h" |
| 10 | +#include "llvm/IR/DIBuilder.h" |
11 | 11 | #include "llvm/IR/IRBuilder.h"
|
12 | 12 | #include "llvm/IR/Instructions.h"
|
13 | 13 | #include "llvm/IR/Intrinsics.h"
|
|
18 | 18 | #include "llvm/Target/TargetIntrinsicInfo.h"
|
19 | 19 | #include "llvm/TargetParser/Triple.h"
|
20 | 20 | #include "llvm/Transforms/Utils/Cloning.h"
|
| 21 | +#include <deque> |
21 | 22 |
|
22 | 23 | using namespace llvm;
|
23 | 24 |
|
@@ -91,6 +92,72 @@ static void validateKernels(Module &M, TargetHelpers::KernelCache &KCache) {
|
91 | 92 | }
|
92 | 93 | }
|
93 | 94 |
|
| 95 | +void GlobalOffsetPass::createClonesAndPopulateVMap( |
| 96 | + const TargetHelpers::KernelCache &KCache, |
| 97 | + Function *ImplicitOffsetIntrinsic) { |
| 98 | + std::deque<User *> WorkList; |
| 99 | + for (auto *U : ImplicitOffsetIntrinsic->users()) |
| 100 | + WorkList.emplace_back(U); |
| 101 | + |
| 102 | + while (!WorkList.empty()) { |
| 103 | + auto *WI = WorkList.front(); |
| 104 | + WorkList.pop_front(); |
| 105 | + auto *Call = dyn_cast<CallInst>(WI); |
| 106 | + if (!Call) |
| 107 | + continue; // Not interesting. |
| 108 | + |
| 109 | + auto *Func = Call->getFunction(); |
| 110 | + if (0 != GlobalVMap.count(Func)) |
| 111 | + continue; // Already processed. |
| 112 | + |
| 113 | + const bool IsKernel = KCache.isKernel(*Func); |
| 114 | + FunctionType *FuncTy = Func->getFunctionType(); |
| 115 | + Type *ImplicitArgumentType = |
| 116 | + IsKernel ? KernelImplicitArgumentType->getPointerTo() |
| 117 | + : ImplicitOffsetPtrType; |
| 118 | + |
| 119 | + // Construct an argument list containing all of the previous arguments. |
| 120 | + SmallVector<Type *, 8> Arguments; |
| 121 | + for (const auto &A : Func->args()) |
| 122 | + Arguments.push_back(A.getType()); |
| 123 | + |
| 124 | + // Add the offset argument. Must be the same type as returned by |
| 125 | + // `llvm.{amdgcn|nvvm}.implicit.offset`. |
| 126 | + Arguments.push_back(ImplicitArgumentType); |
| 127 | + |
| 128 | + // Build the new function. |
| 129 | + if (FuncTy->isVarArg()) |
| 130 | + llvm_unreachable("Variadic arguments prohibited in SYCL"); |
| 131 | + FunctionType *NewFuncTy = FunctionType::get(FuncTy->getReturnType(), |
| 132 | + Arguments, FuncTy->isVarArg()); |
| 133 | + Function *NewFunc = Function::Create(NewFuncTy, Func->getLinkage(), |
| 134 | + Func->getAddressSpace()); |
| 135 | + NewFunc->setName(Func->getName() + "_with_offset"); |
| 136 | + // Remove the subprogram, if exists, as it will be pointing to an incorrect |
| 137 | + // data. |
| 138 | + if (Func->getSubprogram()) |
| 139 | + NewFunc->setSubprogram(nullptr); |
| 140 | + |
| 141 | + // Keep original function ordering, clone goes right after the original. |
| 142 | + Func->getParent()->getFunctionList().insertAfter(Func->getIterator(), |
| 143 | + NewFunc); |
| 144 | + |
| 145 | + // Populate the global value to value map with function arguments as well |
| 146 | + // as the cloned function itself. |
| 147 | + for (Function::arg_iterator FuncArg = Func->arg_begin(), |
| 148 | + FuncEnd = Func->arg_end(), |
| 149 | + NewFuncArg = NewFunc->arg_begin(); |
| 150 | + FuncArg != FuncEnd; ++FuncArg, ++NewFuncArg) { |
| 151 | + GlobalVMap[FuncArg] = NewFuncArg; |
| 152 | + } |
| 153 | + GlobalVMap[Func] = NewFunc; |
| 154 | + |
| 155 | + // Extend the work list with the users of the function. |
| 156 | + for (auto *U : Func->users()) |
| 157 | + WorkList.emplace_back(U); |
| 158 | + } |
| 159 | +} |
| 160 | + |
94 | 161 | // New PM implementation.
|
95 | 162 | PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
|
96 | 163 | // Only run this pass on SYCL device code
|
@@ -128,6 +195,8 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
|
128 | 195 | // Validate kernels
|
129 | 196 | validateKernels(M, KCache);
|
130 | 197 |
|
| 198 | + createClonesAndPopulateVMap(KCache, ImplicitOffsetIntrinsic); |
| 199 | + |
131 | 200 | // Add implicit parameters to all direct and indirect users of the offset
|
132 | 201 | addImplicitParameterToCallers(M, ImplicitOffsetIntrinsic, nullptr, KCache);
|
133 | 202 | }
|
@@ -163,6 +232,7 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
|
163 | 232 | assert(ImplicitOffsetIntrinsic->use_empty() &&
|
164 | 233 | "Not all uses of intrinsic removed");
|
165 | 234 | ImplicitOffsetIntrinsic->eraseFromParent();
|
| 235 | + |
166 | 236 | return PreservedAnalyses::none();
|
167 | 237 | }
|
168 | 238 |
|
@@ -226,10 +296,10 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
|
226 | 296 | if (AlreadyProcessed) {
|
227 | 297 | NewFunc = Caller;
|
228 | 298 | } else {
|
229 |
| - std::tie(NewFunc, ImplicitOffset) = |
230 |
| - addOffsetArgumentToFunction(M, Caller, |
231 |
| - /*KernelImplicitArgumentType*/ nullptr, |
232 |
| - /*KeepOriginal=*/true); |
| 299 | + std::tie(NewFunc, ImplicitOffset) = addOffsetArgumentToFunction( |
| 300 | + M, Caller, |
| 301 | + /*KernelImplicitArgumentType*/ nullptr, |
| 302 | + /*KeepOriginal=*/true, /*IsKernel=*/false); |
233 | 303 | }
|
234 | 304 | CallToOld = cast<CallInst>(GlobalVMap[CallToOld]);
|
235 | 305 | if (!CalleeWithImplicitParam) {
|
@@ -296,32 +366,17 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
|
296 | 366 | AttributeList NAttrs =
|
297 | 367 | AttributeList::get(Func->getContext(), FuncAttrs.getFnAttrs(),
|
298 | 368 | FuncAttrs.getRetAttrs(), ArgumentAttributes);
|
299 |
| - assert(!FuncTy->isVarArg() && "Variadic arguments prohibited in SYCL"); |
300 |
| - FunctionType *NewFuncTy = |
301 |
| - FunctionType::get(FuncTy->getReturnType(), Arguments, FuncTy->isVarArg()); |
302 |
| - |
303 |
| - Function *NewFunc = |
304 |
| - Function::Create(NewFuncTy, Func->getLinkage(), Func->getAddressSpace()); |
305 |
| - |
306 |
| - // Keep original function ordering. |
307 |
| - M.getFunctionList().insertAfter(Func->getIterator(), NewFunc); |
| 369 | + assert(GlobalVMap.count(Func) != 0 && |
| 370 | + "All relevant functions must be prepared ahead of time."); |
| 371 | + Function *NewFunc = dyn_cast<Function>(GlobalVMap[Func]); |
308 | 372 |
|
309 | 373 | Value *ImplicitOffset = nullptr;
|
310 | 374 | bool ImplicitOffsetAllocaInserted = false;
|
311 | 375 | if (KeepOriginal) {
|
312 |
| - // TODO: Are there better naming alternatives that allow for unmangling? |
313 |
| - NewFunc->setName(Func->getName() + "_with_offset"); |
314 |
| - |
315 |
| - for (Function::arg_iterator FuncArg = Func->arg_begin(), |
316 |
| - FuncEnd = Func->arg_end(), |
317 |
| - NewFuncArg = NewFunc->arg_begin(); |
318 |
| - FuncArg != FuncEnd; ++FuncArg, ++NewFuncArg) { |
319 |
| - GlobalVMap[FuncArg] = NewFuncArg; |
320 |
| - } |
321 |
| - |
322 | 376 | SmallVector<ReturnInst *, 8> Returns;
|
323 | 377 | CloneFunctionInto(NewFunc, Func, GlobalVMap,
|
324 | 378 | CloneFunctionChangeType::GlobalChanges, Returns);
|
| 379 | + |
325 | 380 | // In order to keep the signatures of functions called by the kernel
|
326 | 381 | // unified, the pass has to copy global offset to an array allocated in
|
327 | 382 | // addrspace(3). This is done as kernels can't allocate and fill the
|
|
0 commit comments