Skip to content

Commit 57e935a

Browse files
authored
[SYCL] Recognise visited nodes in local accessor to shared mem pass (#5859)
It's possible that clang would store multiple pointers to the same `MDNode` in the `annotations` metadata, keep track of visited ones while populating the kernels. Fixes #5600
1 parent 1445528 commit 57e935a

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
//===----------------------------------------------------------------------===//
1616

1717
#include "llvm/SYCLLowerIR/LocalAccessorToSharedMemory.h"
18+
#include "llvm/ADT/SmallSet.h"
1819
#include "llvm/IR/Constants.h"
1920
#include "llvm/IR/GlobalValue.h"
2021
#include "llvm/IR/Instructions.h"
@@ -248,10 +249,15 @@ class LocalAccessorToSharedMemory : public ModulePass {
248249
if (!NvvmMetadata)
249250
return;
250251

252+
// It is possible that the annotations node contains multiple pointers to
253+
// the same metadata, recognise visited ones.
254+
SmallSet<MDNode *, 4> Visited;
251255
for (auto *MetadataNode : NvvmMetadata->operands()) {
252-
if (MetadataNode->getNumOperands() != 3)
256+
if (Visited.contains(MetadataNode) || MetadataNode->getNumOperands() != 3)
253257
continue;
254258

259+
Visited.insert(MetadataNode);
260+
255261
// NVPTX identifies kernel entry points using metadata nodes of the form:
256262
// !X = !{<function>, !"kernel", i32 1}
257263
const MDOperand &TypeOperand = MetadataNode->getOperand(1);
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
2+
; ModuleID = 'multiple-annotations.bc'
3+
source_filename = "multiple-annotations.ll"
4+
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
; This test checks that the transformation is applied in the basic case with multiple identical annotations nodes.
8+
9+
; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4
10+
11+
; Function Attrs: noinline
12+
define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) {
13+
; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) {
14+
entry:
15+
; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0
16+
; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)*
17+
%0 = load i32, i32 addrspace(3)* %a
18+
; CHECK: %2 = load i32, i32 addrspace(3)* %a
19+
%1 = load i32, i32 addrspace(1)* %b
20+
; CHECK: %3 = load i32, i32 addrspace(1)* %b
21+
%2 = add i32 %c, %c
22+
; CHECK: %4 = add i32 %c, %c
23+
ret void
24+
}
25+
26+
!nvvm.annotations = !{!0, !0}
27+
!nvvmir.version = !{!5}
28+
29+
!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1}
30+
; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1}
31+
!1 = !{null, !"align", i32 8}
32+
!2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
33+
!3 = !{null, !"align", i32 16}
34+
!4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088}
35+
!5 = !{i32 1, i32 4}

0 commit comments

Comments
 (0)