Skip to content

Commit 8a4810a

Browse files
[SYCL][PTX][CUDA] Fixes debug info cloning for implicit global offset (#2129)
Signed-off-by: Steffen Larsen <[email protected]>
1 parent 97a3150 commit 8a4810a

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

llvm/lib/Target/NVPTX/SYCL/GlobalOffset.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ class GlobalOffset : public ModulePass {
259259
Function *NewFunc = Function::Create(NewFuncTy, Func->getLinkage(),
260260
Func->getAddressSpace());
261261

262+
// Keep original function ordering.
263+
M.getFunctionList().insertAfter(Func->getIterator(), NewFunc);
264+
262265
if (KeepOriginal) {
263266
// TODO: Are there better naming alternatives that allow for unmangling?
264267
NewFunc->setName(Func->getName() + "_with_offset");
@@ -272,7 +275,7 @@ class GlobalOffset : public ModulePass {
272275
}
273276

274277
SmallVector<ReturnInst *, 8> Returns;
275-
CloneFunctionInto(NewFunc, Func, VMap, /*ModuleLevelChanges=*/false,
278+
CloneFunctionInto(NewFunc, Func, VMap, /*ModuleLevelChanges=*/true,
276279
Returns);
277280
} else {
278281
NewFunc->copyAttributesFrom(Func);
@@ -298,9 +301,6 @@ class GlobalOffset : public ModulePass {
298301
NewFunc->addMetadata(MD.first, *MD.second);
299302
}
300303

301-
// Keep original function ordering.
302-
M.getFunctionList().insertAfter(Func->getIterator(), NewFunc);
303-
304304
Value *ImplicitOffset = NewFunc->arg_begin() + (NewFunc->arg_size() - 1);
305305
// Add bitcast to match the return type of the intrinsic if needed.
306306
if (ImplicitArgumentType != ImplicitOffsetPtrType) {
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %clangxx -g -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
3+
// REQUIRES: cuda
4+
5+
// NOTE: Tests that the implicit global offset pass copies debug information
6+
7+
#include <CL/sycl.hpp>
8+
using namespace cl::sycl;
9+
10+
int main() {
11+
queue q;
12+
buffer<uint64_t, 1> t1(10);
13+
q.submit([&](handler &cgh) {
14+
auto table = t1.get_access<access::mode::write>(cgh);
15+
cgh.parallel_for<class kernel>(10, [=](id<1> gtid) {
16+
table[gtid] = gtid[0];
17+
});
18+
});
19+
q.wait();
20+
}

0 commit comments

Comments
 (0)