intel · vladimirlaz · Mar 10, 2022 · Mar 2, 2022 · Mar 5, 2022 · Mar 7, 2022
@@ -1,150 +1,7 @@
-// Each combination of 64/32 bit atomic, relaxed/acquire/release/acq_rel
-// semantic order and sub_group/work_group/device/system scope is tested
-// separately. This is controlled by macros, defined by RUN commands. Defaults
-// (no macro for a group) are: 32 bit, relaxed and device.
-
 // See https://github.com/intel/llvm-test-suite/issues/867 for detailed status
 // UNSUPPORTED: hip
 
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DWORK_GROUP -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 -DSYSTEM -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DWORK_GROUP -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQUIRE -DSYSTEM -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DWORK_GROUP -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DRELEASE -DSYSTEM -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DWORK_GROUP -DATOMIC64
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM
-// RUN: %HOST_RUN_PLACEHOLDER %t.out
-// RUN: %GPU_RUN_PLACEHOLDER %t.out
-// RUN: %CPU_RUN_PLACEHOLDER %t.out
-// RUN: %ACC_RUN_PLACEHOLDER %t.out
-
-// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70 -DACQ_REL -DSYSTEM -DATOMIC64
+// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_70
 // RUN: %HOST_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out

@@ -243,14 +243,18 @@ void add_test(queue q, size_t N) {
       (space == access::address_space::generic_space && !TEST_GENERIC_IN_LOCAL);
   constexpr bool do_ext_tests = space != access::address_space::generic_space;
   if constexpr (do_local_tests) {
+#ifdef RUN_DEPRECATED
     if constexpr (do_ext_tests) {
       add_fetch_local_test<::sycl::ext::oneapi::atomic_ref, space, T,
                            Difference, order, scope>(q, N);
     }
+#else
     add_fetch_local_test<::sycl::atomic_ref, space, T, Difference, order,
                          scope>(q, N);
+#endif
   }
   if constexpr (do_global_tests) {
+#ifdef RUN_DEPRECATED
     if constexpr (do_ext_tests) {
       add_fetch_test<::sycl::ext::oneapi::atomic_ref, space, T, Difference,
                      order, scope>(q, N);
@@ -263,6 +267,7 @@ void add_test(queue q, size_t N) {
                           order, scope>(q, N);
       }
     }
+#else
     add_fetch_test<::sycl::atomic_ref, space, T, Difference, order, scope>(q,
                                                                            N);
     add_plus_equal_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
@@ -273,6 +278,7 @@ void add_test(queue q, size_t N) {
       add_post_inc_test<::sycl::atomic_ref, space, T, Difference, order, scope>(
           q, N);
     }
+#endif
   }
 }
 
@@ -281,74 +287,46 @@ template <access::address_space space, typename T, typename Difference = T,
 void add_test_scopes(queue q, size_t N) {
   std::vector<memory_scope> scopes =
       q.get_device().get_info<info::device::atomic_memory_scope_capabilities>();
-#if defined(SYSTEM)
-  if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
+  if (std::find(scopes.begin(), scopes.end(), memory_scope::system) !=
       scopes.end()) {
-    std::cout << "Skipping test\n";
-    return;
+    add_test<space, T, Difference, order, memory_scope::system>(q, N);
   }
-  add_test<space, T, Difference, order, memory_scope::system>(q, N);
-#elif defined(WORK_GROUP)
-  if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
+  if (std::find(scopes.begin(), scopes.end(), memory_scope::work_group) !=
       scopes.end()) {
-    std::cout << "Skipping test\n";
-    return;
+    add_test<space, T, Difference, order, memory_scope::work_group>(q, N);
   }
-  add_test<space, T, Difference, order, memory_scope::work_group>(q, N);
-#elif defined(SUB_GROUP)
-  if (std::find(scopes.begin(), scopes.end(), memory_scope::system) ==
+  if (std::find(scopes.begin(), scopes.end(), memory_scope::sub_group) !=
       scopes.end()) {
-    std::cout << "Skipping test\n";
-    return;
+    add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
   }
-  add_test<space, T, Difference, order, memory_scope::sub_group>(q, N);
-#else
   add_test<space, T, Difference, order, memory_scope::device>(q, N);
-#endif
 }
 
 template <access::address_space space, typename T, typename Difference = T>
 void add_test_orders_scopes(queue q, size_t N) {
   std::vector<memory_order> orders =
       q.get_device().get_info<info::device::atomic_memory_order_capabilities>();
-#if defined(ACQ_REL)
-  if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) ==
+  if (std::find(orders.begin(), orders.end(), memory_order::acq_rel) !=
       orders.end()) {
-    std::cout << "Skipping test\n";
-    return;
+    add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
   }
-  add_test_scopes<space, T, Difference, memory_order::acq_rel>(q, N);
-#elif defined(ACQUIRE)
-  if (std::find(orders.begin(), orders.end(), memory_order::acquire) ==
+  if (std::find(orders.begin(), orders.end(), memory_order::acquire) !=
       orders.end()) {
-    std::cout << "Skipping test\n";
-    return;
+    add_test_scopes<space, T, Difference, memory_order::acquire>(q, N);
   }
-  add_test_scopes<space, T, Difference, memory_order::acquire>(q, N);
-#elif defined(RELEASE)
-  if (std::find(orders.begin(), orders.end(), memory_order::release) ==
+  if (std::find(orders.begin(), orders.end(), memory_order::release) !=
       orders.end()) {
-    std::cout << "Skipping test\n";
-    return;
+    add_test_scopes<space, T, Difference, memory_order::release>(q, N);
   }
-  add_test_scopes<space, T, Difference, memory_order::release>(q, N);
-#else
   add_test_scopes<space, T, Difference, memory_order::relaxed>(q, N);
-#endif
 }
 
 template <access::address_space space> void add_test_all() {
   queue q;
 
   constexpr int N = 32;
-#ifdef ATOMIC64
-  if (!q.get_device().has(aspect::atomic64)) {
-    std::cout << "Skipping test\n";
-    return;
-  }
-
+#ifdef FULL_ATOMIC64_COVERAGE
   add_test_orders_scopes<space, double>(q, N);
-#ifndef FP_TESTS_ONLY
   if constexpr (sizeof(long) == 8) {
     add_test_orders_scopes<space, long>(q, N);
     add_test_orders_scopes<space, unsigned long>(q, N);
@@ -361,9 +339,8 @@ template <access::address_space space> void add_test_all() {
     add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
   }
 #endif
-#else
   add_test_orders_scopes<space, float>(q, N);
-#ifndef FP_TESTS_ONLY
+#ifdef FULL_ATOMIC32_COVERAGE
   add_test_orders_scopes<space, int>(q, N);
   add_test_orders_scopes<space, unsigned int>(q, N);
   if constexpr (sizeof(long) == 4) {
@@ -374,7 +351,5 @@ template <access::address_space space> void add_test_all() {
     add_test_orders_scopes<space, char *, ptrdiff_t>(q, N);
   }
 #endif
-#endif
-
   std::cout << "Test passed." << std::endl;
 }