intel · sommerlukas · Dec 11, 2024 · Dec 10, 2024 · Dec 11, 2024
@@ -205,6 +205,7 @@ static bool dynamicWGLocalMemory(Module &M) {
         GlobalVariable::NotThreadLocal, // ThreadLocalMode
         LocalAS                         // AddressSpace
     );
+    LocalMemArrayGV->setUnnamedAddr(GlobalVariable::UnnamedAddr::Local);
     constexpr int DefaultMaxAlignment = 128;
     if (!TT.isSPIROrSPIRV())
       LocalMemArrayGV->setAlignment(Align{DefaultMaxAlignment});

@@ -6,7 +6,7 @@
 target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
 target triple = "spir64-unknown-unknown"
 
-; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr addrspace(3) global ptr addrspace(3) undef
+; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr local_unnamed_addr addrspace(3) global ptr addrspace(3) undef
 
 ; Function Attrs: convergent norecurse
 ; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0, ptr addrspace(3) noalias "sycl-implicit-local-arg" %[[IMPLICT_ARG:[a-zA-Z0-9]+]]{{.*}} !kernel_arg_addr_space ![[ADDR_SPACE_MD:[0-9]+]]

@@ -5,7 +5,7 @@
 
 target triple = "nvptx64-nvidia-cuda"
 
-; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = external addrspace(3) global [0 x i8], align 128
+; CHECK: @__sycl_dynamicLocalMemoryPlaceholder_GV = external local_unnamed_addr addrspace(3) global [0 x i8], align 128
 
 ; Function Attrs: convergent norecurse
 ; CHECK: @_ZTS7KernelA(ptr addrspace(1) %0)

@@ -2,7 +2,7 @@
 // RUN: %{run} %t.out
 //
 
-// UNSUPPORTED: gpu-intel-gen12, cpu
+// UNSUPPORTED: gpu-intel-gen12
 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
 
 // Test work_group_dynamic extension with allocation size specified at runtime
@@ -35,8 +35,8 @@ int main() {
     sycl_ext::properties properties{static_size};
     auto LocalAccessor =
         sycl::local_accessor<int>(WgSize * RepeatWG * sizeof(int), Cgh);
-    Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties,
-                     [=](nd_item<1> Item) {
+    Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)),
+                     properties, [=](nd_item<1> Item) {
                        int *Ptr = reinterpret_cast<int *>(
                            sycl_ext::get_work_group_scratch_memory());
                        size_t GroupOffset =

@@ -2,7 +2,7 @@
 // RUN: %{run} %t.out
 //
 
-// UNSUPPORTED: gpu-intel-gen12, cpu
+// UNSUPPORTED: gpu-intel-gen12
 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
 
 // Test work_group_dynamic extension with allocation size specified at runtime
@@ -33,8 +33,8 @@ int main() {
     sycl_ext::work_group_scratch_size static_size(WgSize * RepeatWG *
                                                   sizeof(int));
     sycl_ext::properties properties{static_size};
-    Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties,
-                     [=](nd_item<1> Item) {
+    Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)),
+                     properties, [=](nd_item<1> Item) {
                        int *Ptr = reinterpret_cast<int *>(
                            sycl_ext::get_work_group_scratch_memory());
                        size_t GroupOffset =

@@ -2,7 +2,7 @@
 // RUN: %{run} %t.out
 //
 
-// UNSUPPORTED: gpu-intel-gen12, cpu
+// UNSUPPORTED: gpu-intel-gen12
 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/16072
 
 // Test work_group_dynamic extension with allocation size specified at runtime.
@@ -32,8 +32,8 @@ int main() {
     sycl_ext::work_group_scratch_size static_size(WgSize * RepeatWG *
                                                   sizeof(int));
     sycl_ext::properties properties{static_size};
-    Cgh.parallel_for(nd_range<1>(range<1>(Size), range<1>(WgSize)), properties,
-                     [=](nd_item<1> Item) {
+    Cgh.parallel_for(nd_range<1>(range<1>(WgSize * WgCount), range<1>(WgSize)),
+                     properties, [=](nd_item<1> Item) {
                        int *Ptr = reinterpret_cast<int *>(
                            sycl_ext::get_work_group_scratch_memory());
                        size_t GroupOffset =