Skip to content

Commit 6d50a79

Browse files
authored
[flang][cuda] Implicitly load cudadevice module in device/global subprogram (#92038)
This is a re-worked version of #91668. It adds the `cudadevice` module and set the `device` attributes on its functions/subroutines so there is no need for special case in semantic check. `cudadevice` module is implicitly USE'd in `global`/`device` subprogram.
1 parent 1a5bc7c commit 6d50a79

File tree

7 files changed

+166
-5
lines changed

7 files changed

+166
-5
lines changed

flang/include/flang/Semantics/semantics.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,10 @@ class SemanticsContext {
215215
void UseFortranBuiltinsModule();
216216
const Scope *GetBuiltinsScope() const { return builtinsScope_; }
217217

218-
void UsePPCBuiltinTypesModule();
219218
const Scope &GetCUDABuiltinsScope();
219+
const Scope &GetCUDADeviceScope();
220+
221+
void UsePPCBuiltinTypesModule();
220222
void UsePPCBuiltinsModule();
221223
Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
222224
const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
292294
const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
293295
Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
294296
std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
297+
std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
295298
const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
296299
std::list<parser::Program> modFileParseTrees_;
297300
std::unique_ptr<CommonBlockMap> commonBlockMap_;

flang/lib/Semantics/resolve-names.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3797,6 +3797,26 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
37973797
subp->set_cudaSubprogramAttrs(attr);
37983798
}
37993799
}
3800+
if (auto attrs{subp->cudaSubprogramAttrs()}) {
3801+
if (*attrs == common::CUDASubprogramAttrs::Global ||
3802+
*attrs == common::CUDASubprogramAttrs::Device) {
3803+
const Scope &scope{currScope()};
3804+
const Scope *mod{FindModuleContaining(scope)};
3805+
if (mod && mod->GetName().value() == "cudadevice") {
3806+
return false;
3807+
}
3808+
// Implicitly USE the cudadevice module by copying its symbols in the
3809+
// current scope.
3810+
const Scope &cudaDeviceScope{context().GetCUDADeviceScope()};
3811+
for (auto sym : cudaDeviceScope.GetSymbols()) {
3812+
if (!currScope().FindSymbol(sym->name())) {
3813+
auto &localSymbol{MakeSymbol(
3814+
sym->name(), Attrs{}, UseDetails{sym->name(), *sym})};
3815+
localSymbol.flags() = sym->flags();
3816+
}
3817+
}
3818+
}
3819+
}
38003820
}
38013821
return false;
38023822
}

flang/lib/Semantics/semantics.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
543543
return **cudaBuiltinsScope_;
544544
}
545545

546+
const Scope &SemanticsContext::GetCUDADeviceScope() {
547+
if (!cudaDeviceScope_) {
548+
cudaDeviceScope_ = GetBuiltinModule("cudadevice");
549+
CHECK(cudaDeviceScope_.value() != nullptr);
550+
}
551+
return **cudaDeviceScope_;
552+
}
553+
546554
void SemanticsContext::UsePPCBuiltinsModule() {
547555
if (ppcBuiltinsScope_ == nullptr) {
548556
ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");

flang/module/cudadevice.f90

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
!===-- module/cudedevice.f90 -----------------------------------------------===!
2+
!
3+
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
! See https://llvm.org/LICENSE.txt for license information.
5+
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
!
7+
!===------------------------------------------------------------------------===!
8+
9+
! CUDA Fortran procedures available in device subprogram
10+
11+
module cudadevice
12+
implicit none
13+
14+
! Set PRIVATE by default to explicitly only export what is meant
15+
! to be exported by this MODULE.
16+
private
17+
18+
! Synchronization Functions
19+
20+
interface
21+
attributes(device) subroutine syncthreads()
22+
end subroutine
23+
end interface
24+
public :: syncthreads
25+
26+
interface
27+
attributes(device) integer function syncthreads_and(value)
28+
integer :: value
29+
end function
30+
end interface
31+
public :: syncthreads_and
32+
33+
interface
34+
attributes(device) integer function syncthreads_count(value)
35+
integer :: value
36+
end function
37+
end interface
38+
public :: syncthreads_count
39+
40+
interface
41+
attributes(device) integer function syncthreads_or(value)
42+
integer :: value
43+
end function
44+
end interface
45+
public :: syncthreads_or
46+
47+
interface
48+
attributes(device) subroutine syncwarp(mask)
49+
integer :: mask
50+
end subroutine
51+
end interface
52+
public :: syncwarp
53+
54+
! Memory Fences
55+
56+
interface
57+
attributes(device) subroutine threadfence()
58+
end subroutine
59+
end interface
60+
public :: threadfence
61+
62+
interface
63+
attributes(device) subroutine threadfence_block()
64+
end subroutine
65+
end interface
66+
public :: threadfence_block
67+
68+
interface
69+
attributes(device) subroutine threadfence_system()
70+
end subroutine
71+
end interface
72+
public :: threadfence_system
73+
74+
end module
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
2+
3+
! Test CUDA Fortran intrinsic can pass semantic
4+
5+
attributes(global) subroutine devsub()
6+
implicit none
7+
integer :: ret
8+
9+
! 3.6.4. Synchronization Functions
10+
call syncthreads()
11+
call syncwarp(1)
12+
call threadfence()
13+
call threadfence_block()
14+
call threadfence_system()
15+
ret = syncthreads_and(1)
16+
ret = syncthreads_count(1)
17+
ret = syncthreads_or(1)
18+
end
19+
20+
! CHECK-LABEL: Subprogram scope: devsub
21+
! CHECK: syncthreads (Subroutine): Use from syncthreads in cudadevice
22+
! CHECK: syncthreads_and (Function): Use from syncthreads_and in cudadevice
23+
! CHECK: syncthreads_count (Function): Use from syncthreads_count in cudadevice
24+
! CHECK: syncthreads_or (Function): Use from syncthreads_or in cudadevice
25+
! CHECK: syncwarp (Subroutine): Use from syncwarp in cudadevice
26+
! CHECK: threadfence (Subroutine): Use from threadfence in cudadevice
27+
! CHECK: threadfence_block (Subroutine): Use from threadfence_block in cudadevice
28+
! CHECK: threadfence_system (Subroutine): Use from threadfence_system in cudadevice
29+
30+
subroutine host()
31+
call syncthreads()
32+
end subroutine
33+
34+
! CHECK-LABEL: Subprogram scope: host
35+
! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
! RUN: %python %S/test_errors.py %s %flang_fc1
2+
3+
module dev
4+
integer, device :: syncthreads
5+
6+
contains
7+
8+
attributes(device) subroutine sub1()
9+
syncthreads = 1 ! syncthreads not overwritten by cudadevice
10+
end subroutine
11+
12+
attributes(global) subroutine sub2()
13+
!ERROR: 'threadfence' is use-associated from module 'cudadevice' and cannot be re-declared
14+
integer :: threadfence
15+
end subroutine
16+
end module
17+

flang/tools/f18/CMakeLists.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ set(MODULES
1212
"__ppc_intrinsics"
1313
"mma"
1414
"__cuda_builtins"
15+
"cudadevice"
1516
"ieee_arithmetic"
1617
"ieee_exceptions"
1718
"ieee_features"
@@ -26,11 +27,15 @@ set(MODULES
2627
if (NOT CMAKE_CROSSCOMPILING)
2728
foreach(filename ${MODULES})
2829
set(depends "")
30+
set(opts "")
2931
if(${filename} STREQUAL "__fortran_builtins" OR
3032
${filename} STREQUAL "__ppc_types")
3133
elseif(${filename} STREQUAL "__ppc_intrinsics" OR
3234
${filename} STREQUAL "mma")
3335
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
36+
elseif(${filename} STREQUAL "cudadevice")
37+
set(opts -fc1 -xcuda)
38+
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
3439
else()
3540
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
3641
if(NOT ${filename} STREQUAL "__fortran_type_info")
@@ -43,9 +48,8 @@ if (NOT CMAKE_CROSSCOMPILING)
4348
endif()
4449

4550
# The module contains PPC vector types that needs the PPC target.
46-
set(opts "")
47-
if(${filename} STREQUAL "__ppc_intrinsics" OR
48-
${filename} STREQUAL "mma")
51+
if(${filename} STREQUAL "__ppc_intrinsics" OR
52+
${filename} STREQUAL "mma")
4953
if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD)
5054
set(opts "--target=ppc64le")
5155
else()
@@ -58,7 +62,7 @@ if (NOT CMAKE_CROSSCOMPILING)
5862
# TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support
5963
add_custom_command(OUTPUT ${base}.mod
6064
COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR}
61-
COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
65+
COMMAND flang-new ${opts} -cpp -fsyntax-only -module-dir ${FLANG_INTRINSIC_MODULES_DIR}
6266
${FLANG_SOURCE_DIR}/module/${filename}.f90
6367
DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends}
6468
)

0 commit comments

Comments
 (0)