Skip to content

Commit 61356e3

Browse files
Merge from 'sycl' to 'sycl-web' (3 commits)
CONFLICT (content): Merge conflict in clang/lib/Basic/Targets/NVPTX.cpp
2 parents 25ae464 + 37a7b45 commit 61356e3

27 files changed

+459
-47
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,10 @@ LANGOPT(
318318
"SYCL compiler assumes value fits within MAX_INT for member function of "
319319
"get/operator[], get_id/operator[] and get_global_id/get_global_linear_id "
320320
"in SYCL class id, iterm and nd_iterm")
321+
LANGOPT(SYCLCUDACompat, 1, 0,
322+
"Enable CUDA definitions and implicit includes when building for the "
323+
"NVPTX backend. This mode can help SYCL program to run using the CUDA "
324+
"infrastructure on Nvidia's platforms. ")
321325
ENUM_LANGOPT(SYCLRangeRounding, SYCLRangeRoundingPreference, 2,
322326
SYCLRangeRoundingPreference::On,
323327
"Preference for SYCL parallel_for range rounding")

clang/include/clang/Driver/Options.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7104,6 +7104,13 @@ defm sycl_decompose_functor
71047104
NegFlag<SetFalse, [], [ClangOption, CLOption], "Do not">,
71057105
BothFlags<[], [ClangOption, CLOption, CC1Option],
71067106
" decompose SYCL functor if possible (experimental, CUDA only)">>;
7107+
defm sycl_cuda_compat
7108+
: BoolFOption<"sycl-cuda-compatibility", LangOpts<"SYCLCUDACompat">, DefaultFalse,
7109+
PosFlag<SetTrue, [], [ClangOption, CLOption, CC1Option], "Enable CUDA compatibility mode (experimental). "
7110+
"Enable the use of CUDA device code with SYCL device code. "
7111+
"Under this mode, a SYCL device function can call a CUDA device function (but not the other way around). "
7112+
"This implies the definition of CUDA macros and the inclusion of implicit header files.">,
7113+
NegFlag<SetFalse, [], [ClangOption, CLOption, CC1Option], "Disable CUDA compatibility mode.">>;
71077114
def flink_huge_device_code : Flag<["-"], "flink-huge-device-code">,
71087115
HelpText<"Generate and use a custom linker script for huge device code "
71097116
"sections">;

clang/include/clang/Sema/SemaBase.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class SemaBase {
111111
CudaAll = CudaDevice | CudaHost,
112112
/// SYCL specific diagnostic.
113113
Sycl = 1 << 4,
114+
SyclCudaCompat = Sycl | CudaAll,
114115
/// ESIMD specific diagnostic.
115116
Esimd = 1 << 5,
116117
/// A flag representing 'all'. This can be used to avoid the check

clang/include/clang/Sema/SemaCUDA.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ class SemaCUDA : public SemaBase {
157157

158158
// CUDA function call preference. Must be ordered numerically from
159159
// worst to best.
160+
// Note: in SYCL-CUDA compatibility mode: Native, SameSide and HostDevice
161+
// doesn't follow the naming, only the ranking system (e.g. 1st, 2nd or 3rd
162+
// choice). See table near IdentifyPreference.
160163
enum CUDAFunctionPreference {
161164
CFP_Never, // Invalid caller/callee combination.
162165
CFP_WrongSide, // Calls from host-device to host or device

clang/lib/Basic/LangOptions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
183183
}
184184

185185
Opts.HIP = Lang == Language::HIP;
186-
Opts.CUDA = Lang == Language::CUDA || Opts.HIP;
186+
Opts.CUDA = Lang == Language::CUDA || Opts.HIP || Opts.SYCLCUDACompat;
187187
if (Opts.HIP) {
188188
// HIP toolchain does not support 'Fast' FPOpFusion in backends since it
189189
// fuses multiplication/addition instructions without contract flag from

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
305305
}
306306
llvm_unreachable("unhandled OffloadArch");
307307
}();
308-
if (Opts.SYCLIsDevice) {
308+
309+
if (Opts.SYCLIsDevice)
309310
Builder.defineMacro("__SYCL_CUDA_ARCH__", CUDAArchCode);
310-
} else {
311+
// Don't define __CUDA_ARCH__ if in SYCL device mode unless we are in
312+
// SYCL-CUDA compatibility mode.
313+
// For all other cases, define the macro.
314+
if (!Opts.SYCLIsDevice || Opts.SYCLCUDACompat)
311315
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
312-
}
313316
switch(GPU) {
314317
case OffloadArch::SM_90a:
315318
case OffloadArch::SM_100a:
@@ -321,6 +324,10 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
321324
// Do nothing if this is not an enhanced architecture.
322325
break;
323326
}
327+
if (GPU == OffloadArch::SM_90a)
328+
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
329+
if (GPU == OffloadArch::SM_100a)
330+
Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1");
324331
}
325332
}
326333

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,16 +1864,6 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
18641864
if (Body && isa_and_nonnull<CoroutineBodyStmt>(Body))
18651865
llvm::append_range(FnArgs, FD->parameters());
18661866

1867-
// Generate a dummy __host__ function for compiling CUDA sources in SYCL.
1868-
if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
1869-
getLangOpts().SYCLIsHost && !FD->hasAttr<CUDAHostAttr>() &&
1870-
FD->hasAttr<CUDADeviceAttr>()) {
1871-
if (FD->getReturnType()->isVoidType())
1872-
Builder.CreateRetVoid();
1873-
else
1874-
Builder.CreateRet(llvm::UndefValue::get(Fn->getReturnType()));
1875-
return;
1876-
}
18771867
// When compiling a CUDA file in SYCL device mode,
18781868
// set weak ODR linkage for possibly duplicated functions.
18791869
if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
@@ -1890,7 +1880,22 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
18901880

18911881
// Generate the body of the function.
18921882
PGO.assignRegionCounters(GD, CurFn);
1893-
if (isa<CXXDestructorDecl>(FD))
1883+
if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
1884+
getLangOpts().SYCLIsHost && !FD->hasAttr<CUDAHostAttr>() &&
1885+
FD->hasAttr<CUDADeviceAttr>()) {
1886+
// SYCL host compilation with CUDA compatibility enabled requires
1887+
// the creation of a host stub function for functions declared with
1888+
// the __device__ specifier but without the __host__ specifier.
1889+
// This is caused by the fact that SYCL doesn't use specifier like CUDA and
1890+
// so may have what can appear to be call from host to device. As we can't
1891+
// prevent the emission of such call, we need to produce a symbol for
1892+
// function with the __device__.
1893+
if (FD->getReturnType()->isVoidType())
1894+
Builder.CreateRetVoid();
1895+
else
1896+
Builder.CreateRet(llvm::UndefValue::get(Fn->getReturnType()));
1897+
Builder.ClearInsertionPoint();
1898+
} else if (isa<CXXDestructorDecl>(FD))
18941899
EmitDestructorBody(Args);
18951900
else if (isa<CXXConstructorDecl>(FD))
18961901
EmitConstructorBody(Args);

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ using namespace clang::driver::tools;
7575
using namespace clang;
7676
using namespace llvm::opt;
7777

78+
static bool isSYCLCudaCompatEnabled(const ArgList &Args) {
79+
return Args.hasFlag(options::OPT_fsycl_cuda_compat,
80+
options::OPT_fno_sycl_cuda_compat, false);
81+
}
82+
7883
static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
7984
if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC,
8085
options::OPT_fminimize_whitespace,
@@ -1174,7 +1179,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
11741179

11751180
if (JA.isOffloading(Action::OFK_SYCL)) {
11761181
getToolChain().addSYCLIncludeArgs(Args, CmdArgs);
1177-
if (Inputs[0].getType() == types::TY_CUDA) {
1182+
if (Inputs[0].getType() == types::TY_CUDA ||
1183+
isSYCLCudaCompatEnabled(Args)) {
11781184
// Include __clang_cuda_runtime_wrapper.h in .cu SYCL compilation.
11791185
getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
11801186
}
@@ -5508,6 +5514,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
55085514
bool IsFPGASYCLOffloadDevice =
55095515
IsSYCLDevice && Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga;
55105516
const bool IsSYCLNativeCPU = isSYCLNativeCPU(TC);
5517+
const bool IsSYCLCUDACompat = isSYCLCudaCompatEnabled(Args);
55115518

55125519
// Perform the SYCL host compilation using an external compiler if the user
55135520
// requested.
@@ -5877,6 +5884,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
58775884
CmdArgs.push_back("-fno-sycl-esimd-build-host-code");
58785885
}
58795886

5887+
if (IsSYCLCUDACompat) {
5888+
Args.addOptInFlag(CmdArgs, options::OPT_fsycl_cuda_compat,
5889+
options::OPT_fno_sycl_cuda_compat);
5890+
// FIXME: clang's CUDA headers require this ...
5891+
// remove when clang/lib/Headers/__clang_cuda_builtin_vars.h no longer
5892+
// requires it.
5893+
CmdArgs.push_back("-fdeclspec");
5894+
// Note: assumes CUDA 9.0 or more (required by SYCL for CUDA)
5895+
CmdArgs.push_back("-fcuda-allow-variadic-functions");
5896+
}
5897+
58805898
// Set options for both host and device
58815899
if (SYCLStdArg) {
58825900
SYCLStdArg->render(Args, CmdArgs);
@@ -5943,6 +5961,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
59435961
bool HasFPGA = false;
59445962
for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE; ++TI) {
59455963
llvm::Triple SYCLTriple = TI->second->getTriple();
5964+
if (SYCLTriple.isNVPTX() && IsSYCLCUDACompat && !IsSYCLDevice) {
5965+
CmdArgs.push_back("-aux-triple");
5966+
CmdArgs.push_back(Args.MakeArgString(SYCLTriple.normalize()));
5967+
// We need to figure out which CUDA version we're compiling for, as that
5968+
// determines how we load and launch GPU kernels.
5969+
auto *CTC = static_cast<const toolchains::CudaToolChain *>(TI->second);
5970+
assert(CTC && "Expected valid CUDA Toolchain.");
5971+
if (CTC->CudaInstallation.version() != CudaVersion::UNKNOWN)
5972+
CmdArgs.push_back(Args.MakeArgString(
5973+
Twine("-target-sdk-version=") +
5974+
CudaVersionToString(CTC->CudaInstallation.version())));
5975+
break;
5976+
}
59465977
if (SYCLTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) {
59475978
HasFPGA = true;
59485979
if (!IsSYCLDevice) {

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4221,6 +4221,9 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
42214221
Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header);
42224222
Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins);
42234223

4224+
Opts.SYCLCUDACompat =
4225+
Args.hasArg(OPT_fsycl_cuda_compat, OPT_fno_sycl_cuda_compat, false);
4226+
42244227
LangOptions::setLangDefaults(Opts, IK.getLanguage(), T, Includes, LangStd);
42254228

42264229
// The key paths of codegen options defined in Options.td start with

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,10 +1511,15 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
15111511
}
15121512

15131513
// CUDA device path compilaton
1514-
if (LangOpts.CUDAIsDevice && !LangOpts.HIP && !LangOpts.isSYCL()) {
1514+
// Enabled if CUDA device compilation mode is on unless HIP is
1515+
// active or SYCL is active without CUDA compatibility enabled.
1516+
bool EnableCUDADevicePath = LangOpts.CUDAIsDevice && !LangOpts.HIP &&
1517+
(!LangOpts.isSYCL() || LangOpts.SYCLCUDACompat);
1518+
if (EnableCUDADevicePath) {
15151519
// The CUDA_ARCH value is set for the GPU target specified in the NVPTX
15161520
// backend's target defines.
1517-
// Note: SYCL targeting nvptx-cuda relies on __SYCL_CUDA_ARCH__ instead.
1521+
// Note: SYCL targeting nvptx-cuda without SYCL-CUDA compatibility relies on
1522+
// __SYCL_CUDA_ARCH__ only instead.
15181523
Builder.defineMacro("__CUDA_ARCH__");
15191524
}
15201525

clang/lib/Sema/Sema.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2150,9 +2150,19 @@ Sema::targetDiag(SourceLocation Loc, unsigned DiagID, const FunctionDecl *FD) {
21502150
return LangOpts.OpenMPIsTargetDevice
21512151
? OpenMP().diagIfOpenMPDeviceCode(Loc, DiagID, FD)
21522152
: OpenMP().diagIfOpenMPHostCode(Loc, DiagID, FD);
2153-
if (getLangOpts().CUDA)
2153+
2154+
// If SYCLCUDACompat is active, use the SYCL logic instead of CUDA when
2155+
// compiling the device side but the CUDA logic when compiling the host side.
2156+
// When compiling the device side, we need this as CUDA looks for the presence
2157+
// of __device__, __host__ etc. attributes to emit or defer diagnostics. These
2158+
// aren't always there as SYCL doesn't use such attribute.
2159+
if (getLangOpts().CUDA && !getLangOpts().SYCLCUDACompat)
21542160
return getLangOpts().CUDAIsDevice ? CUDA().DiagIfDeviceCode(Loc, DiagID)
21552161
: CUDA().DiagIfHostCode(Loc, DiagID);
2162+
// On the host side, __device__ acts as a guard like __SYCL_DEVICE_ONLY__
2163+
// macro, so use the CUDA logic here.
2164+
if (getLangOpts().SYCLIsHost && getLangOpts().SYCLCUDACompat)
2165+
return CUDA().DiagIfHostCode(Loc, DiagID);
21562166

21572167
if (getLangOpts().SYCLIsDevice)
21582168
return SYCL().DiagIfDeviceCode(Loc, DiagID);

clang/lib/Sema/SemaCUDA.cpp

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,20 @@ SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
222222
// | hd | hd | HD | HD | (b) |
223223
//
224224
// In combined SYCL - CUDA mode
225-
// Sh - SYCL is host
226-
// Sd - SYCL is device
225+
// Sh - SYCL is host (SYCLIsDevice == false and SYCLIsHost == true)
226+
// Sd - SYCL is device (SYCLIsDevice == true and SYCLIsHost == false)
227227
//
228228
// Priority order: N, SS, HD, WS, --
229229
//
230+
// Note: we deviate from the actual meaning for
231+
// N, SS, HD, WS, --.
232+
// Wrong side (WS) and -- (Never) are still used to raise error (delayed and
233+
// immediate respectively). Native (N), SameSide (SS) and HostDevice (HD) are
234+
// used to rank preference as 1st, 2nd or 3rd choice (N > SS > HD) to determine
235+
// the best viable function.
236+
//
237+
// Extra (x) specifies an alternative handling location from the one in H.
238+
//
230239
// | | | host | cuda-dev | sycl-dev | |
231240
// | F | T | Ph - Sh | Pd - Sh | Ph - Sd | H |
232241
// |----+----+----------+------------+-----------+-----+
@@ -238,14 +247,14 @@ SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
238247
// | g | g | -- | -- | -- | (a) |
239248
// | g | h | -- | -- | -- | (e) |
240249
// | g | hd | HD | HD | HD | (c) |
241-
// | h | d | HD(y) | WS(v) | N(x) | ( ) |
250+
// | h | d | HD(y1)| WS(z) | N (x1)| ( ) |
242251
// | h | g | N | N | N | (c) |
243-
// | h | h | N | N | SS(p) | ( ) |
244-
// | h | hd | HD | HD | HD | ( ) |
245-
// | hd | d | HD(y) | SS | N(x) | ( ) |
246-
// | hd | g | SS | -- | --(z) |(d/a)|
247-
// | hd | h | SS | WS | SS | (d) |
248-
// | hd | hd | HD | HD | HD | (b) |
252+
// | h | h | N | N | SS(x2)| (c) |
253+
// | h | hd | SS(y5)| HD | HD | (b) |
254+
// | hd | d | HD(y3)| SS | N (x1)| (d) |
255+
// | hd | g | N (y2)| -- | --(x3)|(d/a)|
256+
// | hd | h | N (y2)| WS | HD(x4)| (d) |
257+
// | hd | hd | SS(y4)| HD | SS(x5)| (b) |
249258

250259
SemaCUDA::CUDAFunctionPreference
251260
SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
@@ -266,7 +275,7 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
266275
// Pd - Sh -> CUDA device compilation for SYCL+CUDA
267276
if (getLangOpts().SYCLIsHost && getLangOpts().CUDA &&
268277
getLangOpts().CUDAIsDevice) {
269-
// (v) allows a __host__ function to call a __device__ one. This is allowed
278+
// (z) allows a __host__ function to call a __device__ one. This is allowed
270279
// for sycl-device compilation, since a regular function (implicitly
271280
// __host__) called by a SYCL kernel could end up calling a __device__ one.
272281
// In any case, __host__ functions are not emitted by the cuda-dev
@@ -280,36 +289,59 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
280289
if (getLangOpts().SYCLIsDevice && getLangOpts().CUDA &&
281290
!getLangOpts().CUDAIsDevice) {
282291
// (x), and (p) prefer __device__ function in SYCL-device compilation.
283-
// (x) allows to pick a __device__ function.
292+
// (x1) allows to pick a __device__ function.
284293
if ((CallerTarget == CUDAFunctionTarget::Host ||
285294
CallerTarget == CUDAFunctionTarget::HostDevice) &&
286295
CalleeTarget == CUDAFunctionTarget::Device)
287296
return CFP_Native;
288-
// (p) lowers the preference of __host__ functions for favoring __device__
297+
// (x2) lowers the preference of __host__ functions for favoring __device__
289298
// ones.
290299
if (CallerTarget == CUDAFunctionTarget::Host &&
291300
CalleeTarget == CUDAFunctionTarget::Host)
292301
return CFP_SameSide;
293302

294-
// (z)
303+
// (x3)
295304
if (CallerTarget == CUDAFunctionTarget::HostDevice &&
296305
CalleeTarget == CUDAFunctionTarget::Global)
297306
return CFP_Never;
307+
// (x4)
308+
if (CallerTarget == CUDAFunctionTarget::HostDevice &&
309+
CalleeTarget == CUDAFunctionTarget::Host)
310+
return CFP_HostDevice;
311+
// (x5)
312+
if (CallerTarget == CUDAFunctionTarget::HostDevice &&
313+
CalleeTarget == CUDAFunctionTarget::HostDevice)
314+
return CFP_SameSide;
298315
}
299316

300-
// Ph - Sh -> host compilation for SYCL+CUDA
317+
// (y) Ph - Sh -> host compilation for SYCL+CUDA
301318
if (getLangOpts().SYCLIsHost && getLangOpts().CUDA &&
302319
!getLangOpts().CUDAIsDevice) {
303-
// (y) allows __host__ and __host__ __device__ functions to call a
304-
// __device__ one. This could happen, if a __device__ function is defined
305-
// without having a corresponding __host__. In this case, a dummy __host__
306-
// function is generated. This dummy function is required since the lambda
307-
// that forms the SYCL kernel (having host device attr.) needs to be
308-
// compiled also for the host. (CallerTarget == CUDAFunctionTarget::Host) is added in case a
309-
// regular function (implicitly __host__) is called by a SYCL kernel lambda.
310-
if ((CallerTarget == CUDAFunctionTarget::Host || CallerTarget == CUDAFunctionTarget::HostDevice) &&
320+
// In host mode, allows __host__ and __host__ __device__ functions
321+
// to call a __device__ one, but we shouldn't emit the call as __device__
322+
// functions are replaced with a trap. __host__ -> __device__ is normally
323+
// CFP_Never, but we need to make it a defer diagnostic.
324+
// (y1) h -> d
325+
if (CallerTarget == CUDAFunctionTarget::Host &&
311326
CalleeTarget == CUDAFunctionTarget::Device)
312327
return CFP_HostDevice;
328+
// (y2) hd -> h or hd ->g
329+
if (CallerTarget == CUDAFunctionTarget::HostDevice &&
330+
(CalleeTarget == CUDAFunctionTarget::Host ||
331+
CalleeTarget == CUDAFunctionTarget::Global))
332+
return CFP_Native;
333+
// (y3) hd -> d
334+
if (CallerTarget == CUDAFunctionTarget::HostDevice &&
335+
CalleeTarget == CUDAFunctionTarget::Device)
336+
return CFP_HostDevice;
337+
// (y4) hd -> hd
338+
if (CallerTarget == CUDAFunctionTarget::HostDevice &&
339+
CalleeTarget == CUDAFunctionTarget::HostDevice)
340+
return CFP_SameSide;
341+
// (y5) h -> hd
342+
if (CallerTarget == CUDAFunctionTarget::Host &&
343+
CalleeTarget == CUDAFunctionTarget::HostDevice)
344+
return CFP_SameSide;
313345
}
314346

315347
// If one of the targets is invalid, the check always fails, no matter what

clang/lib/Sema/SemaDecl.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20573,9 +20573,13 @@ Sema::DeviceDiagnosticReason Sema::getEmissionReason(const FunctionDecl *FD) {
2057320573
if (FD->hasAttr<SYCLSimdAttr>())
2057420574
return Sema::DeviceDiagnosticReason::Esimd;
2057520575
if (FD->hasAttr<SYCLDeviceAttr>() || FD->hasAttr<SYCLKernelAttr>())
20576-
return Sema::DeviceDiagnosticReason::Sycl;
20576+
return getLangOpts().SYCLCUDACompat
20577+
? Sema::DeviceDiagnosticReason::SyclCudaCompat
20578+
: Sema::DeviceDiagnosticReason::Sycl;
2057720579
// FIXME: Refine the logic for CUDA and OpenMP.
20578-
if (getLangOpts().CUDA)
20580+
// In SYCL-CUDA compat mode, don't return CudaDevice or CudaHost but return
20581+
// All just like in normal SYCL.
20582+
if (getLangOpts().CUDA && !getLangOpts().SYCLCUDACompat)
2057920583
return getLangOpts().CUDAIsDevice ? Sema::DeviceDiagnosticReason::CudaDevice
2058020584
: Sema::DeviceDiagnosticReason::CudaHost;
2058120585
if (getLangOpts().OpenMP)
@@ -20657,7 +20661,9 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(const FunctionDecl *FD,
2065720661
(T == CUDAFunctionTarget::Device || T == CUDAFunctionTarget::Global))
2065820662
return FunctionEmissionStatus::CUDADiscarded;
2065920663

20660-
if (IsEmittedForExternalSymbol())
20664+
// Defer to SYCLIsDevice if in cuda compat mode
20665+
if ((LangOpts.CUDAIsDevice || !LangOpts.SYCLCUDACompat) &&
20666+
IsEmittedForExternalSymbol())
2066120667
return FunctionEmissionStatus::Emitted;
2066220668

2066320669
// If FD is a virtual destructor of an explicit instantiation

0 commit comments

Comments
 (0)