Skip to content

Commit 98acd59

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents e375b37 + 7c9bd09 commit 98acd59

File tree

15 files changed

+162
-102
lines changed

15 files changed

+162
-102
lines changed

.github/workflows/windows_test_comment_trigger.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
const pr = await github.rest.pulls.get({
2929
owner: context.issue.owner,
3030
repo: context.issue.repo,
31-
pull_number: number
31+
pull_number: context.issue.number
3232
});
3333
return pr.data.head.sha
3434
- name: update_pr_status_pending

libclc/generic/libspirv/integer/ctz.cl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ _CLC_OVERLOAD _CLC_DEF ulong __spirv_ocl_ctz(ulong x) {
3434
}
3535

3636
_CLC_OVERLOAD _CLC_DEF char __spirv_ocl_ctz(char x) {
37-
return __spirv_ocl_ctz((ushort)(uchar)x) - 8;
37+
return x ? __spirv_ocl_ctz((ushort)(uchar)x) : 8;
3838
}
3939

4040
_CLC_OVERLOAD _CLC_DEF schar __spirv_ocl_ctz(schar x) {
41-
return __spirv_ocl_ctz((ushort)(uchar)x) - 8;
41+
return x ? __spirv_ocl_ctz((ushort)(uchar)x) : 8;
4242
}
4343

4444
_CLC_OVERLOAD _CLC_DEF uchar __spirv_ocl_ctz(uchar x) {
45-
return __spirv_ocl_ctz((ushort)x) - 8;
45+
return x ? __spirv_ocl_ctz((ushort)x) : 8;
4646
}
4747

4848
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, __spirv_ocl_ctz, char)

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -635,9 +635,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
635635

636636
// Try vectorization/scalarization transforms that are both improvements
637637
// themselves and can allow further folds with GVN and InstCombine.
638-
// Disable for SYCL until SPIR-V reader is updated for all drivers.
639-
if (!SYCLOptimizationMode)
640-
FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
638+
FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
641639

642640
// Eliminate redundancies.
643641
FPM.addPass(MergedLoadStoreMotionPass());

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,25 @@ translateSpirvGlobalUses(LoadInst *LI, StringRef SpirvGlobalName,
11951195
return;
11961196
}
11971197

1198+
// As an optimization of accesses of the first element for vector SPIRV
1199+
// globals sometimes the load will return a scalar and the uses can be of any
1200+
// pattern. In this case, generate GenX calls to access the first element and
1201+
// update the use to instead use the GenX call result rather than the load
1202+
// result.
1203+
if (!LI->getType()->isVectorTy()) {
1204+
// Copy users to seperate container for safe modification
1205+
// during iteration.
1206+
SmallVector<User *> Users(LI->users());
1207+
for (User *LU : Users) {
1208+
Instruction *Inst = cast<Instruction>(LU);
1209+
NewInst =
1210+
generateSpirvGlobalGenX(Inst, SpirvGlobalName, /*IndexValue=*/0);
1211+
LU->replaceUsesOfWith(LI, NewInst);
1212+
}
1213+
InstsToErase.push_back(LI);
1214+
return;
1215+
}
1216+
11981217
// Only loads from _vector_ SPIRV globals reach here now. Their users are
11991218
// expected to be ExtractElementInst only, and they are
12001219
// replaced in this loop. When loads from _scalar_ SPIRV globals are handled
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: opt -opaque-pointers < %s -passes=LowerESIMD -S | FileCheck %s
2+
3+
; This test checks we lower vector SPIRV globals correctly if
4+
; it is accessed as a scalar as an optimization to get the first element
5+
6+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
7+
target triple = "spir64-unknown-unknown"
8+
9+
@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
10+
11+
define spir_kernel void @"__spirv_GlobalInvocationId_xyz"(i64 addrspace(1)* %_arg_) {
12+
; CHECK-LABEL: @__spirv_GlobalInvocationId_xyz(
13+
; CHECK-NEXT: entry:
14+
; CHECK-NEXT: [[DOTESIMD6:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32()
15+
; CHECK-NEXT: [[LOCAL_ID_X:%.*]] = extractelement <3 x i32> [[DOTESIMD6]], i32 0
16+
; CHECK-NEXT: [[LOCAL_ID_X_CAST_TY:%.*]] = zext i32 [[LOCAL_ID_X]] to i64
17+
; CHECK-NEXT: [[DOTESIMD7:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32()
18+
; CHECK-NEXT: [[WGSIZE_X:%.*]] = extractelement <3 x i32> [[DOTESIMD7]], i32 0
19+
; CHECK-NEXT: [[WGSIZE_X_CAST_TY:%.*]] = zext i32 [[WGSIZE_X]] to i64
20+
; CHECK-NEXT: [[GROUP_ID_X:%.*]] = call i32 @llvm.genx.group.id.x()
21+
; CHECK-NEXT: [[GROUP_ID_X_CAST_TY:%.*]] = zext i32 [[GROUP_ID_X]] to i64
22+
; CHECK-NEXT: [[MUL8:%.*]] = mul i64 [[WGSIZE_X_CAST_TY]], [[GROUP_ID_X_CAST_TY]]
23+
; CHECK-NEXT: [[ADD9:%.*]] = add i64 [[LOCAL_ID_X_CAST_TY]], [[MUL8]]
24+
; CHECK-NEXT: [[MUL10:%.*]] = shl nuw nsw i64 [[ADD9]], 5
25+
26+
; Verify that the attribute is deleted from GenX declaration
27+
; CHECK-NOT: readnone
28+
entry:
29+
%0 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32
30+
%mul.i = shl nuw nsw i64 %0, 5
31+
ret void
32+
}

sycl/cmake/modules/AddBoostMp11Headers.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ add_custom_target(boost_mp11-headers
4545
# ${BOOST_MP11_DESTINATION_DIR}
4646
add_custom_command(
4747
OUTPUT ${OUT_HEADERS_BOOST_MP11}
48-
DEPENDS ${HEADERS_BOOST_MP11}
48+
DEPENDS ${HEADERS_BOOST_MP11} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/PreprocessBoostMp11Headers.cmake
4949
COMMAND ${CMAKE_COMMAND}
5050
-DIN=${BOOST_MP11_SOURCE_DIR}/include/boost
5151
-DOUT=${BOOST_MP11_DESTINATION_DIR}

sycl/cmake/modules/PreprocessBoostMp11Headers.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ function(preprocess_mp11_header)
3737
# `namespace sycl { namespace detail { namespace boost { ... } } }`
3838
string(REGEX REPLACE
3939
"(\n[ \t]*namespace[ \t\n\r]+boost)"
40-
"namespace sycl\n{\nnamespace detail\n{\\1"
40+
"namespace sycl\n{\ninline namespace _V1\n{\nnamespace detail\n{\\1"
4141
FILE_CONTENTS "${FILE_CONTENTS}")
4242
# ... use '} // namespace boost' as a marker for end-of-scope '}' replacement
4343
string(REGEX REPLACE
4444
"(\n[ \t]*}[ \t]*//[ \t]*namespace[ \t]+boost[ \t]*\n)"
45-
"\\1} // namespace detail\n} // namespace sycl\n"
45+
"\\1} // namespace detail\n} // namespace _V1\n} // namespace sycl\n"
4646
FILE_CONTENTS "${FILE_CONTENTS}")
4747
# 3) replace `boost` in `#include <boost/...>` or `#include "boost/..."` with
4848
# `sycl/detail/boost`

sycl/include/sycl/ext/oneapi/properties/property_utils.hpp

Lines changed: 11 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <sycl/detail/property_helper.hpp>
1212
#include <sycl/ext/oneapi/properties/property.hpp>
13+
#include <sycl/detail/boost/mp11.hpp>
1314

1415
#include <tuple>
1516

@@ -106,93 +107,20 @@ template <typename RHS> struct SelectNonVoid<void, RHS> {
106107
using type = RHS;
107108
};
108109

109-
// Merges two tuples by recursively extracting the type with the minimum
110-
// PropertyID in the two tuples and prepending it to the merging of the
111-
// remaining elements.
112-
template <typename T1, typename T2> struct Merge {};
113-
template <typename... LTs> struct Merge<std::tuple<LTs...>, std::tuple<>> {
114-
using type = std::tuple<LTs...>;
110+
// Sort types accoring to their PropertyID.
111+
struct SortByPropertyId {
112+
template <typename T1, typename T2>
113+
using fn = sycl::detail::boost::mp11::mp_bool<(PropertyID<T1>::value <
114+
PropertyID<T2>::value)>;
115115
};
116-
template <typename... RTs> struct Merge<std::tuple<>, std::tuple<RTs...>> {
117-
using type = std::tuple<RTs...>;
118-
};
119-
template <typename... LTs, typename... RTs>
120-
struct Merge<std::tuple<LTs...>, std::tuple<RTs...>> {
121-
using l_head = GetFirstType<LTs...>;
122-
using r_head = GetFirstType<RTs...>;
123-
static constexpr bool left_has_min =
124-
PropertyID<l_head>::value < PropertyID<r_head>::value;
125-
using l_split = HeadSplit<std::tuple<LTs...>, left_has_min>;
126-
using r_split = HeadSplit<std::tuple<RTs...>, !left_has_min>;
127-
using min = typename SelectNonVoid<typename l_split::htype,
128-
typename r_split::htype>::type;
129-
using merge_tails =
130-
typename Merge<typename l_split::ttype, typename r_split::ttype>::type;
131-
using type = typename PrependTuple<min, merge_tails>::type;
132-
};
133-
134-
// Creates pairs of tuples with a single element from a tuple with N elements.
135-
// Resulting tuple will have ceil(N/2) elements.
136-
template <typename...> struct CreateTuplePairs {
137-
using type = typename std::tuple<>;
138-
};
139-
template <typename T> struct CreateTuplePairs<T> {
140-
using type = typename std::tuple<std::pair<std::tuple<T>, std::tuple<>>>;
141-
};
142-
template <typename L, typename R, typename... Rest>
143-
struct CreateTuplePairs<L, R, Rest...> {
144-
using type =
145-
typename PrependTuple<std::pair<std::tuple<L>, std::tuple<R>>,
146-
typename CreateTuplePairs<Rest...>::type>::type;
147-
};
148-
149-
// Merges pairs of tuples and creates new pairs of the merged pairs. Let N be
150-
// the number of pairs in the supplied tuple, then the resulting tuple will
151-
// contain ceil(N/2) pairs of tuples.
152-
template <typename T> struct MergePairs {
153-
using type = std::tuple<>;
154-
};
155-
template <typename... LTs, typename... RTs, typename... Rest>
156-
struct MergePairs<
157-
std::tuple<std::pair<std::tuple<LTs...>, std::tuple<RTs...>>, Rest...>> {
158-
using merged = typename Merge<std::tuple<LTs...>, std::tuple<RTs...>>::type;
159-
using type = std::tuple<std::pair<merged, std::tuple<>>>;
160-
};
161-
template <typename... LLTs, typename... LRTs, typename... RLTs,
162-
typename... RRTs, typename... Rest>
163-
struct MergePairs<
164-
std::tuple<std::pair<std::tuple<LLTs...>, std::tuple<LRTs...>>,
165-
std::pair<std::tuple<RLTs...>, std::tuple<RRTs...>>, Rest...>> {
166-
using lmerged =
167-
typename Merge<std::tuple<LLTs...>, std::tuple<LRTs...>>::type;
168-
using rmerged =
169-
typename Merge<std::tuple<RLTs...>, std::tuple<RRTs...>>::type;
170-
using type = typename PrependTuple<
171-
std::pair<lmerged, rmerged>,
172-
typename MergePairs<std::tuple<Rest...>>::type>::type;
173-
};
174-
175-
// Recursively merges all pairs of tuples until only a single pair of tuples
176-
// is left, where the right element of the pair is an empty tuple.
177-
template <typename T> struct MergeAll {};
178-
template <typename... Ts> struct MergeAll<std::tuple<Ts...>> {
179-
using type = std::tuple<Ts...>;
180-
};
181-
template <typename... Ts>
182-
struct MergeAll<std::tuple<std::pair<std::tuple<Ts...>, std::tuple<>>>> {
183-
using type = std::tuple<Ts...>;
184-
};
185-
template <typename T, typename... Ts> struct MergeAll<std::tuple<T, Ts...>> {
186-
using reduced = typename MergePairs<std::tuple<T, Ts...>>::type;
187-
using type = typename MergeAll<reduced>::type;
188-
};
189-
190-
// Performs merge-sort on types with PropertyID.
191116
template <typename... Ts> struct Sorted {
192117
static_assert(detail::AllPropertyValues<std::tuple<Ts...>>::value,
193118
"Unrecognized property in property list.");
194-
using split = typename CreateTuplePairs<Ts...>::type;
195-
using type = typename MergeAll<split>::type;
119+
using properties = sycl::detail::boost::mp11::mp_list<Ts...>;
120+
using sortedProperties =
121+
sycl::detail::boost::mp11::mp_sort_q<properties, SortByPropertyId>;
122+
using type =
123+
sycl::detail::boost::mp11::mp_rename<sortedProperties, std::tuple>;
196124
};
197125

198126
// Checks if the types in a tuple are sorted w.r.t. their PropertyID.

sycl/include/sycl/marray.hpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,25 @@ template <typename DataT, typename ArgT, typename... ArgTN>
7272
struct ArrayCreator<DataT, ArgT, ArgTN...> {
7373
static constexpr std::array<DataT, GetMArrayArgsSize<ArgT, ArgTN...>::value>
7474
Create(const ArgT &Arg, const ArgTN &...Args) {
75-
return ConcatArrays(std::array<DataT, 1>{static_cast<DataT>(Arg)},
76-
ArrayCreator<DataT, ArgTN...>::Create(Args...));
75+
std::array<DataT, 1> ImmArray{static_cast<DataT>(Arg)};
76+
if constexpr (sizeof...(Args))
77+
return ConcatArrays(ImmArray,
78+
ArrayCreator<DataT, ArgTN...>::Create(Args...));
79+
else
80+
return ImmArray;
7781
}
7882
};
7983
template <typename DataT, typename T, std::size_t N, typename... ArgTN>
8084
struct ArrayCreator<DataT, marray<T, N>, ArgTN...> {
8185
static constexpr std::array<DataT,
8286
GetMArrayArgsSize<marray<T, N>, ArgTN...>::value>
8387
Create(const marray<T, N> &Arg, const ArgTN &...Args) {
84-
return ConcatArrays(MArrayToArray<DataT>(Arg),
85-
ArrayCreator<DataT, ArgTN...>::Create(Args...));
88+
auto ImmArray = MArrayToArray<DataT>(Arg);
89+
if constexpr (sizeof...(Args))
90+
return ConcatArrays(ImmArray,
91+
ArrayCreator<DataT, ArgTN...>::Create(Args...));
92+
else
93+
return ImmArray;
8694
}
8795
};
8896
template <typename DataT> struct ArrayCreator<DataT> {

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,6 +1966,53 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
19661966
name.data());
19671967
}
19681968

1969+
case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH: {
1970+
int major = 0;
1971+
sycl::detail::pi::assertion(
1972+
cuDeviceGetAttribute(&major,
1973+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
1974+
device->get()) == CUDA_SUCCESS);
1975+
1976+
int minor = 0;
1977+
sycl::detail::pi::assertion(
1978+
cuDeviceGetAttribute(&minor,
1979+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
1980+
device->get()) == CUDA_SUCCESS);
1981+
1982+
// Some specific devices seem to need special handling. See reference
1983+
// https://github.com/jeffhammond/HPCInfo/blob/master/cuda/gpu-detect.cu
1984+
bool is_xavier_agx = major == 7 && minor == 2;
1985+
bool is_orin_agx = major == 8 && minor == 7;
1986+
1987+
int memory_clock_khz = 0;
1988+
if (is_xavier_agx) {
1989+
memory_clock_khz = 2133000;
1990+
} else if (is_orin_agx) {
1991+
memory_clock_khz = 3200000;
1992+
} else {
1993+
sycl::detail::pi::assertion(
1994+
cuDeviceGetAttribute(&memory_clock_khz,
1995+
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
1996+
device->get()) == CUDA_SUCCESS);
1997+
}
1998+
1999+
int memory_bus_width = 0;
2000+
if (is_orin_agx) {
2001+
memory_bus_width = 256;
2002+
} else {
2003+
sycl::detail::pi::assertion(
2004+
cuDeviceGetAttribute(&memory_bus_width,
2005+
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
2006+
device->get()) == CUDA_SUCCESS);
2007+
}
2008+
2009+
uint64_t memory_bandwidth =
2010+
uint64_t(memory_clock_khz) * memory_bus_width * 250;
2011+
2012+
return getInfo(param_value_size, param_value, param_value_size_ret,
2013+
memory_bandwidth);
2014+
}
2015+
19692016
// TODO: Investigate if this information is available on CUDA.
19702017
case PI_DEVICE_INFO_PCI_ADDRESS:
19712018
case PI_DEVICE_INFO_GPU_EU_COUNT:
@@ -1974,7 +2021,6 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
19742021
case PI_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
19752022
case PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
19762023
case PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
1977-
case PI_DEVICE_INFO_MAX_MEM_BANDWIDTH:
19782024
return PI_ERROR_INVALID_VALUE;
19792025

19802026
default:
@@ -2985,7 +3031,7 @@ pi_result cuda_piEnqueueKernelLaunch(
29853031
return PI_ERROR_INVALID_WORK_GROUP_SIZE;
29863032

29873033
if (local_work_size[dim] > maxThreadsPerBlock[dim])
2988-
return PI_ERROR_INVALID_WORK_ITEM_SIZE;
3034+
return PI_ERROR_INVALID_WORK_GROUP_SIZE;
29893035
// Checks that local work sizes are a divisor of the global work sizes
29903036
// which includes that the local work sizes are neither larger than
29913037
// the global work sizes and not 0.

sycl/plugins/hip/pi_hip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2867,7 +2867,7 @@ pi_result hip_piEnqueueKernelLaunch(
28672867
if (providedLocalWorkGroupSize) {
28682868
auto isValid = [&](int dim) {
28692869
if (local_work_size[dim] > maxThreadsPerBlock[dim])
2870-
return PI_ERROR_INVALID_WORK_ITEM_SIZE;
2870+
return PI_ERROR_INVALID_WORK_GROUP_SIZE;
28712871
// Checks that local work sizes are a divisor of the global work sizes
28722872
// which includes that the local work sizes are neither larger than the
28732873
// global work sizes and not 0.

sycl/source/detail/context_impl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ std::vector<RT::PiEvent> context_impl::initializeDeviceGlobals(
339339
// Write the pointer to the device global and store the event in the
340340
// initialize events list.
341341
RT::PiEvent InitEvent;
342-
void *USMPtr = DeviceGlobalUSM.getPtr();
342+
void *const &USMPtr = DeviceGlobalUSM.getPtr();
343343
Plugin.call<PiApiKind::piextEnqueueDeviceGlobalVariableWrite>(
344344
QueueImpl->getHandleRef(), NativePrg,
345345
DeviceGlobalEntry->MUniqueId.c_str(), false, sizeof(void *), 0,

sycl/source/detail/device_global_map_entry.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ struct DeviceGlobalUSMMem {
6464
DeviceGlobalUSMMem(void *Ptr) : MPtr(Ptr) {}
6565
~DeviceGlobalUSMMem();
6666

67-
void *getPtr() const noexcept { return MPtr; }
67+
void *const &getPtr() const noexcept { return MPtr; }
6868

6969
// Gets the zero-initialization event if it exists. If not the OwnedPiEvent
7070
// will contain no event.

sycl/test-e2e/AOT/cpu.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
//==--- cpu.cpp - AOT compilation for cpu devices using opencl-aot --------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
9+
// REQUIRES: opencl-aot, cpu
10+
11+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 %S/Inputs/aot.cpp -o %t.out
12+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
13+
14+
// Test that opencl-aot can handle multiple build options.
15+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 %S/Inputs/aot.cpp -Xsycl-target-backend "--bo=-g" -Xsycl-target-backend "--bo=-cl-opt-disable" -o %t2.out

sycl/test-e2e/AOT/gpu.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//==--- gpu.cpp - AOT compilation for gen devices using GEN compiler ------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
9+
// REQUIRES: ocloc, gpu
10+
// UNSUPPORTED: cuda
11+
// CUDA is not compatible with SPIR.
12+
//
13+
// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %S/Inputs/aot.cpp -o %t.out
14+
// RUN: %GPU_RUN_PLACEHOLDER %t.out

0 commit comments

Comments
 (0)