intel · vladimirlaz · Nov 9, 2020 · Nov 6, 2020 · Nov 6, 2020 · Nov 6, 2020
@@ -1,10 +1,10 @@
-//==----- aot.cpp - Simple vector addition (AOT compilation example)  --------==//
+//==--- aot.cpp - Simple vector addition (AOT compilation example) --------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===------------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
 #include <CL/sycl.hpp>
 
@@ -14,8 +14,7 @@
 constexpr cl::sycl::access::mode sycl_read = cl::sycl::access::mode::read;
 constexpr cl::sycl::access::mode sycl_write = cl::sycl::access::mode::write;
 
-template <typename T>
-class SimpleVadd;
+template <typename T> class SimpleVadd;
 
 template <typename T, size_t N>
 void simple_vadd(const std::array<T, N> &VA, const std::array<T, N> &VB,

@@ -1,10 +1,10 @@
-//==----- accelerator.cpp - AOT compilation for fpga devices using aoc  ------==//
+//==--- accelerator.cpp - AOT compilation for fpga devices using aoc ------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===------------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
 // REQUIRES: aoc, accelerator
 

@@ -1,10 +1,10 @@
-//==----- cpu.cpp - AOT compilation for cpu devices using opencl-aot  --------==//
+//==--- cpu.cpp - AOT compilation for cpu devices using opencl-aot --------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===------------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
 // REQUIRES: opencl-aot, cpu
 

@@ -1,10 +1,10 @@
-//==----- gpu.cpp - AOT compilation for gen devices using GEN compiler  ------==//
+//==--- gpu.cpp - AOT compilation for gen devices using GEN compiler ------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//===------------------------------------------------------------------------===//
+//===---------------------------------------------------------------------===//
 
 // REQUIRES: ocloc, gpu
 // UNSUPPORTED: cuda

@@ -0,0 +1,67 @@
+//==-- multiple-devices.cpp - Appropriate AOT-compiled image selection -----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: opencl-aot, ocloc, aoc, cpu, gpu, accelerator
+// UNSUPPORTED: cuda
+// CUDA is not compatible with SPIR.
+
+// 1-command compilation case
+// Targeting CPU, GPU, FPGA
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice,spir64_gen-unknown-unknown-sycldevice,spir64_fpga-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device *" %S/Inputs/aot.cpp -o %t_all.out
+// RUN: %HOST_RUN_PLACEHOLDER %t_all.out
+// RUN: %CPU_RUN_PLACEHOLDER %t_all.out
+// RUN: %GPU_RUN_PLACEHOLDER %t_all.out
+// RUN: %ACC_RUN_PLACEHOLDER %t_all.out
+
+// Produce object file, spirv, device images to combine these differently
+// at link-time, thus testing various AOT-compiled images configurations
+// RUN: %clangxx -fsycl %S/Inputs/aot.cpp -c -o %t.o
+// RUN: %clangxx -fsycl -fsycl-link-targets=spir64-unknown-unknown-sycldevice %t.o -o %t.spv
+// AOT-compile device binary images
+// RUN: opencl-aot %t.spv -o=%t_cpu.ir --device=cpu
+// RUN: ocloc -file %t.spv -spirv_input -output %t_gen.out -output_no_suffix -device cfl
+// RUN: aoc %t.spv -o %t_fpga.aocx -sycl -dep-files=%t.d
+
+// CPU, GPU
+// RUN: %clangxx -fsycl -fsycl-add-targets=spir64_x86_64:%t_cpu.ir,spir64_gen:%t_gen.out %t.o -o %t_cpu_gpu.out
+// RUN: %HOST_RUN_PLACEHOLDER %t_cpu_gpu.out
+// RUN: %CPU_RUN_PLACEHOLDER %t_cpu_gpu.out
+// RUN: %GPU_RUN_PLACEHOLDER %t_cpu_gpu.out
+
+// CPU, FPGA
+// RUN: %clangxx -fsycl -fsycl-add-targets=spir64_x86_64:%t_cpu.ir,spir64_fpga:%t_fpga.aocx %t.o -o %t_cpu_fpga.out
+// RUN: %HOST_RUN_PLACEHOLDER %t_cpu_fpga.out
+// RUN: %CPU_RUN_PLACEHOLDER %t_cpu_fpga.out
+// RUN: %ACC_RUN_PLACEHOLDER %t_cpu_fpga.out
+
+// GPU, FPGA
+// RUN: %clangxx -fsycl -fsycl-add-targets=spir64_gen:%t_gen.out,spir64_fpga:%t_fpga.aocx %t.o -o %t_gpu_fpga.out
+// RUN: %HOST_RUN_PLACEHOLDER %t_gpu_fpga.out
+// RUN: %GPU_RUN_PLACEHOLDER %t_gpu_fpga.out
+// RUN: %ACC_RUN_PLACEHOLDER %t_gpu_fpga.out
+
+// No AOT-compiled image for CPU
+// RUN: %clangxx -fsycl -fsycl-add-targets=spir64:%t.spv,spir64_gen:%t_gen.out,spir64_fpga:%t_fpga.aocx %t.o -o %t_spv_gpu_fpga.out
+// RUN: %CPU_RUN_PLACEHOLDER %t_spv_gpu_fpga.out
+// Check that execution on AOT-compatible devices is unaffected
+// RUN: %GPU_RUN_PLACEHOLDER %t_spv_gpu_fpga.out
+// RUN: %ACC_RUN_PLACEHOLDER %t_spv_gpu_fpga.out
+
+// No AOT-compiled image for GPU
+// RUN: %clangxx -fsycl -fsycl-add-targets=spir64:%t.spv,spir64_x86_64:%t_cpu.ir,spir64_fpga:%t_fpga.aocx %t.o -o %t_spv_cpu_fpga.out
+// RUN: %GPU_RUN_PLACEHOLDER %t_spv_cpu_fpga.out
+// Check that execution on AOT-compatible devices is unaffected
+// RUN: %CPU_RUN_PLACEHOLDER %t_spv_cpu_fpga.out
+// RUN: %ACC_RUN_PLACEHOLDER %t_spv_cpu_fpga.out
+
+// No AOT-compiled image for FPGA
+// RUN: %clangxx -fsycl -fsycl-add-targets=spir64:%t.spv,spir64_x86_64:%t_cpu.ir,spir64_gen:%t_gen.out %t.o -o %t_spv_cpu_gpu.out
+// RUN: %ACC_RUN_PLACEHOLDER %t_spv_cpu_gpu.out
+// Check that execution on AOT-compatible devices is unaffected
+// RUN: %CPU_RUN_PLACEHOLDER %t_spv_cpu_gpu.out
+// RUN: %GPU_RUN_PLACEHOLDER %t_spv_cpu_gpu.out
@@ -0,0 +1,64 @@
+// REQUIRES: opencl-aot, cpu
+//
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s -o %t.out
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+//
+// The test checks that the specialization constant feature works with ahead
+// of time compilation.
+
+#include <CL/sycl.hpp>
+
+#include <iostream>
+#include <vector>
+
+class MyInt32Const;
+
+using namespace sycl;
+
+class Kernel;
+
+int main(int argc, char **argv) {
+  cl::sycl::queue q(default_selector{}, [](exception_list l) {
+    for (auto ep : l) {
+      try {
+        std::rethrow_exception(ep);
+      } catch (cl::sycl::exception &e0) {
+        std::cout << e0.what();
+      } catch (std::exception &e1) {
+        std::cout << e1.what();
+      } catch (...) {
+        std::cout << "*** catch (...)\n";
+      }
+    }
+  });
+
+  std::cout << "Running on " << q.get_device().get_info<info::device::name>()
+            << "\n";
+  cl::sycl::program prog(q.get_context());
+
+  cl::sycl::ONEAPI::experimental::spec_constant<int32_t, MyInt32Const> i32 =
+      prog.set_spec_constant<MyInt32Const>(10);
+
+  prog.build_with_kernel_type<Kernel>();
+
+  std::vector<int> vec(1);
+  {
+    cl::sycl::buffer<int, 1> buf(vec.data(), vec.size());
+
+    q.submit([&](cl::sycl::handler &cgh) {
+      auto acc = buf.get_access<cl::sycl::access::mode::write>(cgh);
+      cgh.single_task<Kernel>(prog.get_kernel<Kernel>(),
+                              [=]() { acc[0] = i32.get(); });
+    });
+  }
+  bool passed = true;
+  int val = vec[0];
+  int gold = 0; // with AOT, spec constant is set to C++ default for the type
+
+  if (val != gold) {
+    std::cout << "*** ERROR: " << val << " != " << gold << "(gold)\n";
+    passed = false;
+  }
+  std::cout << (passed ? "passed\n" : "FAILED\n");
+  return passed ? 0 : 1;
+}
@@ -0,0 +1,21 @@
+#include "split-per-source.h"
+
+void runKernelsFromFile2() {
+  cl::sycl::queue Q;
+  int Data = 0;
+  {
+    cl::sycl::program Prg(Q.get_context());
+    cl::sycl::buffer<int, 1> Buf(&Data, cl::sycl::range<1>(1));
+    Prg.build_with_kernel_type<File2Kern1>();
+    cl::sycl::kernel Krn = Prg.get_kernel<File2Kern1>();
+
+    assert(!Prg.has_kernel<File1Kern1>());
+    assert(!Prg.has_kernel<File1Kern2>());
+
+    Q.submit([&](cl::sycl::handler &Cgh) {
+      auto Acc = Buf.get_access<cl::sycl::access::mode::read_write>(Cgh);
+      Cgh.single_task<File2Kern1>(Krn, [=]() { Acc[0] = 3; });
+    });
+  }
+  assert(Data == 3);
+}
@@ -0,0 +1,7 @@
+#include <CL/sycl.hpp>
+
+class File1Kern1;
+class File1Kern2;
+class File2Kern1;
+
+void runKernelsFromFile2();
@@ -0,0 +1,4 @@
+// REQUIRES: aoc, accelerator
+
+// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -I %S/Inputs -o %t.out %S/split-per-source-main.cpp %S/Inputs/split-per-source-second-file.cpp
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
@@ -0,0 +1,4 @@
+// REQUIRES: opencl-aot, cpu
+
+// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice -I %S/Inputs -o %t.out %S/split-per-source-main.cpp %S/Inputs/split-per-source-second-file.cpp
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
@@ -0,0 +1,11 @@
+// REQUIRES: ocloc, gpu
+// UNSUPPORTED: cuda
+// CUDA does neither support device code splitting nor SPIR.
+//
+// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source \
+// RUN:   -fsycl-targets=spir64_gen-unknown-unknown-sycldevice \
+// RUN:   -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice \
+// RUN:   "-device *" -I %S/Inputs -o %t.out \
+// RUN:   %S/split-per-source-main.cpp \
+// RUN:   %S/Inputs/split-per-source-second-file.cpp
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
@@ -0,0 +1,67 @@
+// UNSUPPORTED: cuda
+// CUDA does not support device code splitting.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel -o %t.out %s
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include <CL/sycl.hpp>
+
+class Kern1;
+class Kern2;
+class Kern3;
+
+int main() {
+  cl::sycl::queue Q;
+  int Data = 0;
+  {
+    cl::sycl::buffer<int, 1> Buf(&Data, cl::sycl::range<1>(1));
+    cl::sycl::program Prg(Q.get_context());
+    Prg.build_with_kernel_type<Kern1>();
+    cl::sycl::kernel Krn = Prg.get_kernel<Kern1>();
+
+    assert(!Prg.has_kernel<Kern2>());
+    assert(!Prg.has_kernel<Kern3>());
+
+    Q.submit([&](cl::sycl::handler &Cgh) {
+      auto Acc = Buf.get_access<cl::sycl::access::mode::read_write>(Cgh);
+      Cgh.single_task<Kern1>(Krn, [=]() { Acc[0] = 1; });
+    });
+  }
+  assert(Data == 1);
+
+  {
+    cl::sycl::buffer<int, 1> Buf(&Data, cl::sycl::range<1>(1));
+    cl::sycl::program Prg(Q.get_context());
+    Prg.build_with_kernel_type<Kern2>();
+    cl::sycl::kernel Krn = Prg.get_kernel<Kern2>();
+
+    assert(!Prg.has_kernel<Kern1>());
+    assert(!Prg.has_kernel<Kern3>());
+
+    Q.submit([&](cl::sycl::handler &Cgh) {
+      auto Acc = Buf.get_access<cl::sycl::access::mode::read_write>(Cgh);
+      Cgh.single_task<Kern2>(Krn, [=]() { Acc[0] = 2; });
+    });
+  }
+  assert(Data == 2);
+
+  {
+    cl::sycl::buffer<int, 1> Buf(&Data, cl::sycl::range<1>(1));
+    cl::sycl::program Prg(Q.get_context());
+    Prg.build_with_kernel_type<Kern3>();
+    cl::sycl::kernel Krn = Prg.get_kernel<Kern3>();
+
+    assert(!Prg.has_kernel<Kern1>());
+    assert(!Prg.has_kernel<Kern2>());
+
+    Q.submit([&](cl::sycl::handler &Cgh) {
+      auto Acc = Buf.get_access<cl::sycl::access::mode::read_write>(Cgh);
+      Cgh.single_task<Kern3>(Krn, [=]() { Acc[0] = 3; });
+    });
+  }
+  assert(Data == 3);
+
+  return 0;
+}
@@ -0,0 +1,53 @@
+// UNSUPPORTED: cuda
+// CUDA does not support device code splitting.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_source -I %S/Inputs -o %t.out %s %S/Inputs/split-per-source-second-file.cpp
+// RUN: %CPU_RUN_PLACEHOLDER %t.out
+// RUN: %GPU_RUN_PLACEHOLDER %t.out
+// RUN: %ACC_RUN_PLACEHOLDER %t.out
+
+#include "Inputs/split-per-source.h"
+
+int main() {
+  cl::sycl::queue Q;
+  int Data = 0;
+  {
+    cl::sycl::buffer<int, 1> Buf(&Data, cl::sycl::range<1>(1));
+    cl::sycl::program Prg(Q.get_context());
+    Prg.build_with_kernel_type<File1Kern1>();
+    cl::sycl::kernel Krn = Prg.get_kernel<File1Kern1>();
+
+    assert(Prg.has_kernel<File1Kern2>());
+    // TODO uncomment once the KernelInfo in multiple translation units
+    // bug is fixed.
+    // assert(!Prg.has_kernel<File2Kern1>());
+
+    Q.submit([&](cl::sycl::handler &Cgh) {
+      auto Acc = Buf.get_access<cl::sycl::access::mode::read_write>(Cgh);
+      Cgh.single_task<File1Kern1>(/*Krn,*/ [=]() { Acc[0] = 1; });
+    });
+  }
+  assert(Data == 1);
+
+  {
+    cl::sycl::buffer<int, 1> Buf(&Data, cl::sycl::range<1>(1));
+    cl::sycl::program Prg(Q.get_context());
+    Prg.build_with_kernel_type<File1Kern2>();
+    cl::sycl::kernel Krn = Prg.get_kernel<File1Kern2>();
+
+    assert(Prg.has_kernel<File1Kern1>());
+    // TODO uncomment once the KernelInfo in multiple translation units
+    // bug is fixed.
+    // assert(!Prg.has_kernel<File2Kern1>());
+
+    Q.submit([&](cl::sycl::handler &Cgh) {
+      auto Acc = Buf.get_access<cl::sycl::access::mode::read_write>(Cgh);
+      Cgh.single_task<File1Kern2>(/*Krn,*/ [=]() { Acc[0] = 2; });
+    });
+  }
+  assert(Data == 2);
+
+  runKernelsFromFile2();
+
+  return 0;
+}