[SYCL] Recognise visited nodes in local accessor to shared mem pass (#5859)

jchlanda · web-flow · commit 57e935ac1894 · 2022-03-23T14:57:43.000+03:00
It's possible that clang would store multiple pointers to the same `MDNode` in the `annotations` metadata, keep track of visited ones while populating the kernels. Fixes #5600
diff --git a/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp b/llvm/lib/SYCLLowerIR/LocalAccessorToSharedMemory.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/SYCLLowerIR/LocalAccessorToSharedMemory.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
@@ -248,10 +249,15 @@ class LocalAccessorToSharedMemory : public ModulePass {
     if (!NvvmMetadata)
       return;
 
+    // It is possible that the annotations node contains multiple pointers to
+    // the same metadata, recognise visited ones.
+    SmallSet<MDNode *, 4> Visited;
     for (auto *MetadataNode : NvvmMetadata->operands()) {
-      if (MetadataNode->getNumOperands() != 3)
+      if (Visited.contains(MetadataNode) || MetadataNode->getNumOperands() != 3)
         continue;
 
+      Visited.insert(MetadataNode);
+
       // NVPTX identifies kernel entry points using metadata nodes of the form:
       //   !X = !{<function>, !"kernel", i32 1}
       const MDOperand &TypeOperand = MetadataNode->getOperand(1);
diff --git a/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll b/llvm/test/CodeGen/NVPTX/local-accessor-to-shared-memory-multiple-annotations.ll
@@ -0,0 +1,35 @@
+; RUN: opt -enable-new-pm=0 -localaccessortosharedmemory -sycl-enable-local-accessor %s -S -o - | FileCheck %s
+; ModuleID = 'multiple-annotations.bc'
+source_filename = "multiple-annotations.ll"
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; This test checks that the transformation is applied in the basic case with multiple identical annotations nodes.
+
+; CHECK: @_ZTS14example_kernel_shared_mem = external addrspace(3) global [0 x i8], align 4
+
+; Function Attrs: noinline
+define weak_odr dso_local void @_ZTS14example_kernel(i32 addrspace(3)* %a, i32 addrspace(1)* %b, i32 %c) {
+; CHECK: define weak_odr dso_local void @_ZTS14example_kernel(i32 %0, i32 addrspace(1)* %b, i32 %c) {
+entry:
+; CHECK: %1 = getelementptr inbounds [0 x i8], [0 x i8] addrspace(3)* @_ZTS14example_kernel_shared_mem, i32 0, i32 %0
+; CHECK: %a = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)*
+  %0 = load i32, i32 addrspace(3)* %a
+; CHECK: %2 = load i32, i32 addrspace(3)* %a
+  %1 = load i32, i32 addrspace(1)* %b
+; CHECK: %3 = load i32, i32 addrspace(1)* %b
+  %2 = add i32 %c, %c
+; CHECK: %4 = add i32 %c, %c
+  ret void
+}
+
+!nvvm.annotations = !{!0, !0}
+!nvvmir.version = !{!5}
+
+!0 = distinct !{void (i32 addrspace(3)*, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1}
+; CHECK: !0 = distinct !{void (i32, i32 addrspace(1)*, i32)* @_ZTS14example_kernel, !"kernel", i32 1}
+!1 = !{null, !"align", i32 8}
+!2 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080}
+!3 = !{null, !"align", i32 16}
+!4 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088}
+!5 = !{i32 1, i32 4}