Skip to content

Commit 3e3f204

Browse files
committed
PR updates
1 parent 62350a4 commit 3e3f204

File tree

4 files changed

+134
-89
lines changed

4 files changed

+134
-89
lines changed

llvm/include/llvm/SYCLLowerIR/GlobalOffset.h

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This pass operates on SYCL kernels being compiled to CUDA. It looks for uses
10-
// of the `llvm.nvvm.implicit.offset` intrinsic and replaces it with a offset
11-
// parameter which will be threaded through from the kernel entry point.
9+
// This pass operates on SYCL kernels. It looks for uses of the
10+
// `llvm.{amdgcn|nvvm}.implicit.offset` intrinsic and replaces it with an
11+
// offset parameter which will be threaded through from the kernel entry point.
1212
//
1313
//===----------------------------------------------------------------------===//
1414

@@ -36,22 +36,80 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
3636
static StringRef getPassName() { return "Add implicit SYCL global offset"; }
3737

3838
private:
39-
void processKernelEntryPoint(Module &M, Function *Func);
39+
/// After the execution of this function, the module to which the kernel
40+
/// `Func` belongs contains a clone of the original kernel with the signature
41+
/// extended with the implicit offset parameter and `_with_offset` appended
42+
/// to the name.
43+
/// An alloca of 3 zeros (corresponding to offsets in x, y and z) is added to
44+
/// the original kernel, in order to keep the interface of kernel's call
45+
/// graph unified, regardless of the fact if the global offset has been used.
46+
///
47+
/// \param Func Kernel to be processed.
48+
void processKernelEntryPoint(Function *Func);
49+
50+
/// This function adds an implicit parameter to the function containing a
51+
/// call instruction to the implicit offset intrinsic or another function
52+
/// (which eventually calls the instrinsic). If the call instruction is to
53+
/// the implicit offset intrinsic, then the intrinisic is replaced with the
54+
/// parameter that was added.
55+
///
56+
/// Once the function, say `F`, containing a call to `Callee` has the
57+
/// implicit parameter added, callers of `F` are processed by recursively
58+
/// calling this function, passing `F` to `CalleeWithImplicitParam`.
59+
///
60+
/// Since the cloning of entry points may alter the users of a function, the
61+
/// cloning must be done as early as possible, as to ensure that no users are
62+
/// added to previous callees in the call-tree.
63+
///
64+
/// \param Callee is the function (to which this transformation has already
65+
/// been applied), or to the implicit offset intrinsic.
66+
///
67+
/// \param CalleeWithImplicitParam indicates whether Callee is to the
68+
/// implicit intrinsic (when `nullptr`) or to another function (not
69+
/// `nullptr`) - this is used to know whether calls to it needs to have the
70+
/// implicit parameter added to it or replaced with the implicit parameter.
4071
void addImplicitParameterToCallers(Module &M, Value *Callee,
4172
Function *CalleeWithImplicitParam);
73+
74+
/// For a given function `Func` extend signature to contain an implicit
75+
/// offset argument.
76+
///
77+
/// \param Func A function to add offset to.
78+
///
79+
/// \param ImplicitArgumentType Architecture dependant type of the implicit
80+
/// argument holding the global offset.
81+
///
82+
/// \param KeepOriginal If set to true, rather than splicing the old `Func`,
83+
/// keep it intact and create a clone of it with `_wit_offset` appended to
84+
/// the name.
85+
///
86+
/// \returns A pair of new function with the offset argument added and a
87+
/// pointer to the implicit argument (either a func argument or a bitcast
88+
/// turning it to the correct type).
4289
std::pair<Function *, Value *>
4390
addOffsetArgumentToFunction(Module &M, Function *Func,
4491
Type *ImplicitArgumentType = nullptr,
4592
bool KeepOriginal = false);
93+
94+
/// This function makes sure that a given kernel entry point has no llvm
95+
/// uses.
96+
///
97+
/// \param KernelPayloads A collection of kernel functions present in a
98+
/// module `M`.
99+
///
100+
/// \returns A map of kernel functions to corresponding metadata nodes.
46101
DenseMap<Function *, MDNode *>
47102
validateKernels(Module &M, SmallVectorImpl<KernelPayload> &KernelPayloads);
48103

49104
private:
50-
// Keep track of which functions have been processed to avoid processing twice
105+
/// Keep track of which functions have been processed to avoid processing
106+
/// twice.
51107
llvm::DenseMap<Function *, Value *> ProcessedFunctions;
52-
// Keep a map of all entry point functions with metadata
108+
/// Keep a map of all entry point functions with metadata.
53109
llvm::DenseMap<Function *, MDNode *> EntryPointMetadata;
110+
/// A type of implicit argument added to the kernel signature.
54111
llvm::Type *KernelImplicitArgumentType = nullptr;
112+
/// A type used for the alloca holding the values of global offsets.
55113
llvm::Type *ImplicitOffsetPtrType = nullptr;
56114

57115
ArchType AT;

llvm/include/llvm/SYCLLowerIR/LocalAccessorToSharedMemory.h

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This pass operates on SYCL kernels being compiled to CUDA. It modifies
10-
// kernel entry points which take pointers to shared memory and modifies them
11-
// to take offsets into shared memory (represented by a symbol in the shared
12-
// address space). The SYCL runtime is expected to provide offsets rather than
13-
// pointers to these functions.
9+
// This pass operates on SYCL kernels. It modifies kernel entry points which
10+
// take pointers to shared memory and alters them to take offsets into shared
11+
// memory (represented by a symbol in the shared address space). The SYCL
12+
// runtime is expected to provide offsets rather than pointers to these
13+
// functions.
1414
//
1515
//===----------------------------------------------------------------------===//
1616

@@ -41,13 +41,26 @@ class LocalAccessorToSharedMemoryPass
4141
}
4242

4343
private:
44+
/// This function replaces pointers to shared memory with offsets to a global
45+
/// symbol in shared memory.
46+
/// It alters the signature of the kernel (pointer vs offset value) as well
47+
/// as the access (dereferencing the argument pointer vs GEP to the global
48+
/// symbol).
49+
///
50+
/// \param F The kernel to be processed.
51+
///
52+
/// \returns A new function with global symbol accesses.
4453
Function *processKernel(Module &M, Function *F);
54+
55+
/// Update kernel metadata to reflect the change in the signature.
56+
///
57+
/// \param A map of original kernels to the modified ones.
4558
void postProcessKernels(
4659
SmallVectorImpl<std::pair<Function *, KernelPayload>> &NewToOldKernels);
4760

4861
private:
49-
// The value for NVVM's ADDRESS_SPACE_SHARED and AMD's LOCAL_ADDRESS happen to
50-
// be 3.
62+
/// The value for NVVM's ADDRESS_SPACE_SHARED and AMD's LOCAL_ADDRESS happen
63+
/// to be 3.
5164
const unsigned SharedASValue = 3;
5265
};
5366

llvm/lib/SYCLLowerIR/GlobalOffset.cpp

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
7171
return PreservedAnalyses::all();
7272

7373
AT = TargetHelpers::getArchType(M);
74-
llvm::Function *ImplicitOffsetIntrinsic = M.getFunction(Intrinsic::getName(
74+
Function *ImplicitOffsetIntrinsic = M.getFunction(Intrinsic::getName(
7575
AT == ArchType::Cuda
7676
? static_cast<unsigned>(Intrinsic::nvvm_implicit_offset)
7777
: static_cast<unsigned>(Intrinsic::amdgcn_implicit_offset)));
@@ -82,12 +82,13 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
8282
// For AMD allocas and pointers have to be to CONSTANT_PRIVATE (5), NVVM is
8383
// happy with ADDRESS_SPACE_GENERIC (0).
8484
TargetAS = AT == ArchType::Cuda ? 0 : 5;
85+
/// The value for NVVM's ADDRESS_SPACE_SHARED and AMD's LOCAL_ADDRESS happen
86+
/// to be 3, use it for the implicit argument pointer type.
8587
KernelImplicitArgumentType =
8688
ArrayType::get(Type::getInt32Ty(M.getContext()), 3);
8789
ImplicitOffsetPtrType =
8890
Type::getInt32Ty(M.getContext())->getPointerTo(TargetAS);
89-
assert((!ImplicitOffsetIntrinsic ||
90-
ImplicitOffsetIntrinsic->getReturnType() == ImplicitOffsetPtrType) &&
91+
assert((ImplicitOffsetIntrinsic->getReturnType() == ImplicitOffsetPtrType) &&
9192
"Implicit offset intrinsic does not return the expected type");
9293

9394
SmallVector<KernelPayload, 4> KernelPayloads;
@@ -108,10 +109,11 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
108109
return PreservedAnalyses::none();
109110
}
110111

111-
void GlobalOffsetPass::processKernelEntryPoint(Module &M, Function *Func) {
112+
void GlobalOffsetPass::processKernelEntryPoint(Function *Func) {
112113
assert(EntryPointMetadata.count(Func) != 0 &&
113114
"Function must be an entry point");
114115

116+
auto &M = *Func->getParent();
115117
LLVMContext &Ctx = M.getContext();
116118
MDNode *FuncMetadata = EntryPointMetadata[Func];
117119

@@ -128,7 +130,7 @@ void GlobalOffsetPass::processKernelEntryPoint(Module &M, Function *Func) {
128130
M, Func, KernelImplicitArgumentType->getPointerTo(),
129131
/*KeepOriginal=*/true)
130132
.first;
131-
Argument *NewArgument = NewFunc->arg_begin() + (NewFunc->arg_size() - 1);
133+
Argument *NewArgument = std::prev(NewFunc->arg_end());
132134
// Pass byval to the kernel for NVIDIA, AMD's calling convention disallows
133135
// byval args, use byref.
134136
auto Attr =
@@ -161,26 +163,6 @@ void GlobalOffsetPass::processKernelEntryPoint(Module &M, Function *Func) {
161163
ImplicitOffsetType, ImplicitOffset, 0, 0);
162164
}
163165

164-
// This function adds an implicit parameter to the function containing a call
165-
// instruction to the implicit offset intrinsic or another function (which
166-
// eventually calls the instrinsic). If the call instruction is to the
167-
// implicit offset intrinsic, then the intrinisic is replaced with the
168-
// parameter that was added.
169-
//
170-
// `Callee` is the function (to which this transformation has already been
171-
// applied), or to the implicit offset intrinsic. `CalleeWithImplicitParam`
172-
// indicates whether Callee is to the implicit intrinsic (when `nullptr`) or
173-
// to another function (not `nullptr`) - this is used to know whether calls to
174-
// it needs to have the implicit parameter added to it or replaced with the
175-
// implicit parameter.
176-
//
177-
// Once the function, say `F`, containing a call to `Callee` has the implicit
178-
// parameter added, callers of `F` are processed by recursively calling this
179-
// function, passing `F` to `CalleeWithImplicitParam`.
180-
//
181-
// Since the cloning of entry points may alter the users of a function, the
182-
// cloning must be done as early as possible, as to ensure that no users are
183-
// added to previous callees in the call-tree.
184166
void GlobalOffsetPass::addImplicitParameterToCallers(
185167
Module &M, Value *Callee, Function *CalleeWithImplicitParam) {
186168

@@ -193,7 +175,7 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
193175

194176
Function *Caller = Call->getFunction();
195177
if (EntryPointMetadata.count(Caller) != 0) {
196-
processKernelEntryPoint(M, Caller);
178+
processKernelEntryPoint(Caller);
197179
}
198180
}
199181

@@ -223,40 +205,41 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
223205
CallToOld->replaceAllUsesWith(ImplicitOffset);
224206
} else {
225207
// Build up a list of arguments to call the modified function using.
226-
llvm::SmallVector<Value *, 8> ImplicitOffsets;
208+
SmallVector<Value *, 8> ImplicitOffsets;
227209
for (Use &U : CallToOld->args()) {
228210
ImplicitOffsets.push_back(U);
229211
}
230212
ImplicitOffsets.push_back(ImplicitOffset);
231213

232214
// Replace call to other function (which now has a new parameter),
233215
// with a call including the new parameter to that same function.
234-
auto *NewCaller = CallInst::Create(
216+
auto *NewCallInst = CallInst::Create(
235217
/* Ty= */ CalleeWithImplicitParam->getFunctionType(),
236218
/* Func= */ CalleeWithImplicitParam,
237219
/* Args= */ ImplicitOffsets,
238220
/* NameStr= */ Twine(),
239221
/* InsertBefore= */ CallToOld);
240-
NewCaller->setTailCallKind(CallToOld->getTailCallKind());
241-
NewCaller->copyMetadata(*CallToOld);
242-
CallToOld->replaceAllUsesWith(NewCaller);
222+
NewCallInst->setTailCallKind(CallToOld->getTailCallKind());
223+
NewCallInst->copyMetadata(*CallToOld);
224+
CallToOld->replaceAllUsesWith(NewCallInst);
243225

244226
if (CallToOld->hasName()) {
245-
NewCaller->takeName(CallToOld);
227+
NewCallInst->takeName(CallToOld);
246228
}
247229
}
248230

249231
// Remove the caller now that it has been replaced.
250232
CallToOld->eraseFromParent();
251233

252-
if (!AlreadyProcessed) {
253-
// Process callers of the old function.
254-
addImplicitParameterToCallers(M, Caller, NewFunc);
234+
if (AlreadyProcessed)
235+
continue;
255236

256-
// Now that the old function is dead, delete it.
257-
Caller->dropAllReferences();
258-
Caller->eraseFromParent();
259-
}
237+
// Process callers of the old function.
238+
addImplicitParameterToCallers(M, Caller, NewFunc);
239+
240+
// Now that the old function is dead, delete it.
241+
Caller->dropAllReferences();
242+
Caller->eraseFromParent();
260243
}
261244
}
262245

@@ -270,13 +253,9 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
270253
// Construct an argument list containing all of the previous arguments.
271254
SmallVector<Type *, 8> Arguments;
272255
SmallVector<AttributeSet, 8> ArgumentAttributes;
273-
274-
unsigned i = 0;
275-
for (Function::arg_iterator FuncArg = Func->arg_begin(),
276-
FuncEnd = Func->arg_end();
277-
FuncArg != FuncEnd; ++FuncArg, ++i) {
278-
Arguments.push_back(FuncArg->getType());
279-
ArgumentAttributes.push_back(FuncAttrs.getParamAttrs(i));
256+
for (const auto &I : enumerate(Func->args())) {
257+
Arguments.push_back(I.value().getType());
258+
ArgumentAttributes.push_back(FuncAttrs.getParamAttrs(I.index()));
280259
}
281260

282261
// Add the offset argument. Must be the same type as returned by
@@ -325,8 +304,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
325304
IRBuilder<> Builder(EntryBlock, EntryBlock->getFirstInsertionPt());
326305
Type *ImplicitOffsetType =
327306
ArrayType::get(Type::getInt32Ty(M.getContext()), 3);
328-
Value *OrigImplicitOffset =
329-
NewFunc->arg_begin() + (NewFunc->arg_size() - 1);
307+
Value *OrigImplicitOffset = std::prev(NewFunc->arg_end());
330308
AllocaInst *ImplicitOffsetAlloca =
331309
Builder.CreateAlloca(ImplicitOffsetType, TargetAS);
332310
auto DL = M.getDataLayout();
@@ -346,7 +324,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
346324
ImplicitArgumentType = ImplicitOffset->getType();
347325
ImplicitOffsetAllocaInserted = true;
348326
} else {
349-
ImplicitOffset = NewFunc->arg_begin() + (NewFunc->arg_size() - 1);
327+
ImplicitOffset = std::prev(NewFunc->arg_end());
350328
}
351329
} else {
352330
NewFunc->copyAttributesFrom(Func);
@@ -371,7 +349,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
371349
for (auto MD : MDs)
372350
NewFunc->addMetadata(MD.first, *MD.second);
373351

374-
ImplicitOffset = NewFunc->arg_begin() + (NewFunc->arg_size() - 1);
352+
ImplicitOffset = std::prev(NewFunc->arg_end());
375353
}
376354
assert(ImplicitOffset && "Value of implicit offset must be set.");
377355

@@ -381,7 +359,7 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
381359
// Make sure bitcast is inserted after alloca, if present.
382360
BasicBlock::iterator InsertionPt =
383361
ImplicitOffsetAllocaInserted
384-
? std::next(((AllocaInst *)ImplicitOffset)->getIterator())
362+
? std::next(cast<AllocaInst>(ImplicitOffset)->getIterator())
385363
: EntryBlock->getFirstInsertionPt();
386364
IRBuilder<> Builder(EntryBlock, InsertionPt);
387365
ImplicitOffset = Builder.CreateBitCast(
@@ -409,7 +387,7 @@ DenseMap<Function *, MDNode *> GlobalOffsetPass::validateKernels(
409387
return !GV->hasOneUse() || !Used.count(GV);
410388
};
411389

412-
llvm::DenseMap<Function *, MDNode *> EntryPointMetadata;
390+
DenseMap<Function *, MDNode *> EntryPointMetadata;
413391
for (auto &KP : KernelPayloads) {
414392
if (HasUseOtherThanLLVMUsed(KP.Kernel))
415393
llvm_unreachable("Kernel entry point can't have uses.");

0 commit comments

Comments
 (0)