@@ -100,6 +100,15 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
100
100
llvm_unreachable (" invalid C++ ABI kind" );
101
101
}
102
102
103
+ static bool SYCLCUDAIsHost (const clang::LangOptions &LangOpts) {
104
+ // Return true for the host compilation of SYCL CUDA sources.
105
+ return LangOpts.SYCLIsHost && LangOpts.CUDA && !LangOpts.CUDAIsDevice ;
106
+ }
107
+ static bool SYCLCUDAIsSYCLDevice (const clang::LangOptions &LangOpts) {
108
+ // Return true for the SYCL device compilation of SYCL CUDA sources.
109
+ return LangOpts.SYCLIsDevice && LangOpts.CUDA && !LangOpts.CUDAIsDevice ;
110
+ }
111
+
103
112
CodeGenModule::CodeGenModule (ASTContext &C,
104
113
IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
105
114
const HeaderSearchOptions &HSO,
@@ -2913,13 +2922,23 @@ void CodeGenModule::EmitDeferred() {
2913
2922
for (GlobalDecl &D : CurDeclsToEmit) {
2914
2923
// Emit a dummy __host__ function if a legit one is not already present in
2915
2924
// case of SYCL compilation of CUDA sources.
2916
- if (LangOpts. CUDA && !LangOpts. CUDAIsDevice && LangOpts. SYCLIsHost ) {
2925
+ if (SYCLCUDAIsHost ( LangOpts) ) {
2917
2926
GlobalDecl OtherD;
2918
2927
if (lookupRepresentativeDecl (getMangledName (D), OtherD) &&
2919
2928
(D.getCanonicalDecl ().getDecl () !=
2920
- OtherD.getCanonicalDecl ().getDecl ())) {
2929
+ OtherD.getCanonicalDecl ().getDecl ()) &&
2930
+ D.getCanonicalDecl ().getDecl ()->hasAttr <CUDADeviceAttr>())
2931
+ continue ;
2932
+ }
2933
+ // Emit a dummy __host__ function if a legit one is not already present in
2934
+ // case of SYCL compilation of CUDA sources.
2935
+ if (SYCLCUDAIsSYCLDevice (LangOpts)) {
2936
+ GlobalDecl OtherD;
2937
+ if (lookupRepresentativeDecl (getMangledName (D), OtherD) &&
2938
+ (D.getCanonicalDecl ().getDecl () !=
2939
+ OtherD.getCanonicalDecl ().getDecl ()) &&
2940
+ D.getCanonicalDecl ().getDecl ()->hasAttr <CUDAHostAttr>())
2921
2941
continue ;
2922
- }
2923
2942
}
2924
2943
const ValueDecl *VD = cast<ValueDecl>(D.getDecl ());
2925
2944
// If emitting for SYCL device, emit the deferred alias
@@ -3571,16 +3590,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
3571
3590
// their device-side incarnations.
3572
3591
3573
3592
// So device-only functions are the only things we skip, except for SYCL.
3574
- if (isa<FunctionDecl>(Global) && !Global->hasAttr <CUDAHostAttr>() &&
3575
- Global->hasAttr <CUDADeviceAttr>()) {
3576
- // In SYCL, every (CUDA) __device__ function needs to have a __host__
3577
- // counterpart that will be emitted in case of it is not already
3578
- // present.
3579
- if (LangOpts.SYCLIsHost && MustBeEmitted (Global) &&
3580
- MayBeEmittedEagerly (Global))
3581
- addDeferredDeclToEmit (GD);
3593
+ if (!LangOpts.isSYCL () && isa<FunctionDecl>(Global) &&
3594
+ !Global->hasAttr <CUDAHostAttr>() && Global->hasAttr <CUDADeviceAttr>())
3582
3595
return ;
3583
- }
3596
+
3584
3597
assert ((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
3585
3598
" Expected Variable or Function" );
3586
3599
}
@@ -3605,8 +3618,13 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
3605
3618
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
3606
3619
// Forward declarations are emitted lazily on first use.
3607
3620
if (!FD->doesThisDeclarationHaveABody ()) {
3608
- if (!FD->doesDeclarationForceExternallyVisibleDefinition ())
3609
- return ;
3621
+ if (!FD->doesDeclarationForceExternallyVisibleDefinition ()) {
3622
+ // Force the declaration in SYCL compilation of CUDA sources.
3623
+ if (!((SYCLCUDAIsHost (LangOpts) && Global->hasAttr <CUDAHostAttr>()) ||
3624
+ (SYCLCUDAIsSYCLDevice (LangOpts) &&
3625
+ Global->hasAttr <CUDADeviceAttr>())))
3626
+ return ;
3627
+ }
3610
3628
3611
3629
StringRef MangledName = getMangledName (GD);
3612
3630
@@ -3665,6 +3683,20 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
3665
3683
// function. If the global must always be emitted, do it eagerly if possible
3666
3684
// to benefit from cache locality.
3667
3685
if (MustBeEmitted (Global) && MayBeEmittedEagerly (Global)) {
3686
+ // Avoid emitting the same __host__ __device__ functions,
3687
+ // in SYCL-CUDA-host compilation, and
3688
+ if (SYCLCUDAIsHost (LangOpts) && isa<FunctionDecl>(Global) &&
3689
+ !Global->hasAttr <CUDAHostAttr>() && Global->hasAttr <CUDADeviceAttr>()) {
3690
+ addDeferredDeclToEmit (GD);
3691
+ return ;
3692
+ }
3693
+ // in SYCL-CUDA-device compilation.
3694
+ if (SYCLCUDAIsSYCLDevice (LangOpts) && isa<FunctionDecl>(Global) &&
3695
+ Global->hasAttr <CUDAHostAttr>() && !Global->hasAttr <CUDADeviceAttr>()) {
3696
+ addDeferredDeclToEmit (GD);
3697
+ return ;
3698
+ }
3699
+
3668
3700
// Emit the definition if it can't be deferred.
3669
3701
EmitGlobalDefinition (GD);
3670
3702
return ;
@@ -3688,6 +3720,39 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
3688
3720
addDeferredDeclToEmit (GD);
3689
3721
EmittedDeferredDecls[MangledName] = GD;
3690
3722
} else {
3723
+
3724
+ // For SYCL compilation of CUDA sources,
3725
+ if (LangOpts.isSYCL () && LangOpts.CUDA && !LangOpts.CUDAIsDevice ) {
3726
+ // in case of SYCL-CUDA-host,
3727
+ if (LangOpts.SYCLIsHost ) {
3728
+ if (Global->hasAttr <CUDAHostAttr>()) {
3729
+ // remove already present __device__ function.
3730
+ auto DDI = DeferredDecls.find (MangledName);
3731
+ if (DDI != DeferredDecls.end ())
3732
+ DeferredDecls.erase (DDI);
3733
+ } else if (Global->hasAttr <CUDADeviceAttr>()) {
3734
+ // do not insert a __device__ function if a __host__ one is present.
3735
+ auto DDI = DeferredDecls.find (MangledName);
3736
+ if (DDI != DeferredDecls.end ())
3737
+ return ;
3738
+ }
3739
+ }
3740
+ // in case of SYCL-CUDA-device,
3741
+ if (LangOpts.SYCLIsDevice ) {
3742
+ if (Global->hasAttr <CUDADeviceAttr>()) {
3743
+ // remove already present __host__ function.
3744
+ auto DDI = DeferredDecls.find (MangledName);
3745
+ if (DDI != DeferredDecls.end ())
3746
+ DeferredDecls.erase (DDI);
3747
+ } else if (Global->hasAttr <CUDAHostAttr>()) {
3748
+ // do not insert a __host__ function if a __device__ one is present.
3749
+ auto DDI = DeferredDecls.find (MangledName);
3750
+ if (DDI != DeferredDecls.end ())
3751
+ return ;
3752
+ }
3753
+ }
3754
+ }
3755
+
3691
3756
// Otherwise, remember that we saw a deferred decl with this name. The
3692
3757
// first use of the mangled name will cause it to move into
3693
3758
// DeferredDeclsToEmit.
@@ -4399,8 +4464,16 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
4399
4464
// This is the first use or definition of a mangled name. If there is a
4400
4465
// deferred decl with this name, remember that we need to emit it at the end
4401
4466
// of the file.
4467
+ // In SYCL compilation of CUDA sources, avoid the emission if the
4468
+ // __device__/__host__ attributes do not match.
4402
4469
auto DDI = DeferredDecls.find (MangledName);
4403
- if (DDI != DeferredDecls.end ()) {
4470
+ if (DDI != DeferredDecls.end () &&
4471
+ (!(getLangOpts ().isSYCL () && getLangOpts ().CUDA &&
4472
+ !getLangOpts ().CUDAIsDevice ) ||
4473
+ ((DDI->second ).getDecl ()->hasAttr <CUDAHostAttr>() ==
4474
+ D->hasAttr <CUDAHostAttr>() &&
4475
+ (DDI->second ).getDecl ()->hasAttr <CUDADeviceAttr>() ==
4476
+ D->hasAttr <CUDADeviceAttr>()))) {
4404
4477
// Move the potentially referenced deferred decl to the
4405
4478
// DeferredDeclsToEmit list, and remove it from DeferredDecls (since we
4406
4479
// don't need it anymore).
0 commit comments