Skip to content

[flang][cuda] Implicitly load cudadevice module in device/global subprogram #91668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion flang/include/flang/Semantics/semantics.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,10 @@ class SemanticsContext {
void UseFortranBuiltinsModule();
const Scope *GetBuiltinsScope() const { return builtinsScope_; }

void UsePPCBuiltinTypesModule();
const Scope &GetCUDABuiltinsScope();
const Scope &GetCUDADeviceScope();

void UsePPCBuiltinTypesModule();
void UsePPCBuiltinsModule();
Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
Expand Down Expand Up @@ -292,6 +294,7 @@ class SemanticsContext {
const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
std::list<parser::Program> modFileParseTrees_;
std::unique_ptr<CommonBlockMap> commonBlockMap_;
Expand Down
5 changes: 5 additions & 0 deletions flang/lib/Semantics/check-cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ struct DeviceExprChecker
}
}
}
if (sym->owner().IsModule() &&
sym->owner().parent().IsIntrinsicModules() &&
DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") {
return {};
}
} else if (x.GetSpecificIntrinsic()) {
// TODO(CUDA): Check for unsupported intrinsics here
return {};
Expand Down
13 changes: 13 additions & 0 deletions flang/lib/Semantics/resolve-names.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3797,6 +3797,19 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
subp->set_cudaSubprogramAttrs(attr);
}
}
if (auto attrs{subp->cudaSubprogramAttrs()}) {
if (*attrs == common::CUDASubprogramAttrs::Global ||
*attrs == common::CUDASubprogramAttrs::Device) {
// Implicitly USE the cudadevice module by copying its symbols in the
// current scope.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/symbol/symbols/

What about clashes with names that are already in scope (or declared later)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So according to the reference compiler we should not overwrite what is already in scope so I updated the copy of symbol and added a test.
For what is declared after, in device or global procedure, we will have an error msg. I added a test also for this case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the semantics of these names are not really what you get from either a USE statement or from intrinsics.

There is already precedence here with names like threadIdx that are automatically imported into device subprogram scopes, so I guess this isn't making things any worse.

const Scope &scope{context().GetCUDADeviceScope()};
for (auto sym : scope.GetSymbols()) {
if (!currScope().FindSymbol(sym->name())) {
currScope().CopySymbol(sym);
}
}
}
}
}
return false;
}
Expand Down
8 changes: 8 additions & 0 deletions flang/lib/Semantics/semantics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
return **cudaBuiltinsScope_;
}

const Scope &SemanticsContext::GetCUDADeviceScope() {
if (!cudaDeviceScope_) {
cudaDeviceScope_ = GetBuiltinModule("cudadevice");
CHECK(cudaDeviceScope_.value() != nullptr);
}
return **cudaDeviceScope_;
}

void SemanticsContext::UsePPCBuiltinsModule() {
if (ppcBuiltinsScope_ == nullptr) {
ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
Expand Down
74 changes: 74 additions & 0 deletions flang/module/__cuda_device_builtins.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
!===-- module/__cuda_device_builtins.f90 -----------------------------------===!
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These could all be in __fortran_builtins, which already contains some built-in CUDA definitions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I added a new file is that we can check on the module name instead of having to check on the module name and the procedure prefix.
I can moved them in __fortran_builtins but we would need to check if the procedure names starts with __cuda_device_builtins_ as well.

!
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
! See https://llvm.org/LICENSE.txt for license information.
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
!
!===------------------------------------------------------------------------===!

! CUDA Fortran procedures available in device subprogram

module __CUDA_device_builtins

implicit none

! Set PRIVATE by default to explicitly only export what is meant
! to be exported by this MODULE.
private

! Synchronization Functions

interface
subroutine __cuda_device_builtins_syncthreads()
end subroutine
end interface
public :: __cuda_device_builtins_syncthreads

interface
integer function __cuda_device_builtins_syncthreads_and(value)
integer :: value
end function
end interface
public :: __cuda_device_builtins_syncthreads_and

interface
integer function __cuda_device_builtins_syncthreads_count(value)
integer :: value
end function
end interface
public :: __cuda_device_builtins_syncthreads_count

interface
integer function __cuda_device_builtins_syncthreads_or(int_value)
end function
end interface
public :: __cuda_device_builtins_syncthreads_or

interface
subroutine __cuda_device_builtins_syncwarp(mask)
integer :: mask
end subroutine
end interface
public :: __cuda_device_builtins_syncwarp

! Memory Fences

interface
subroutine __cuda_device_builtins_threadfence()
end subroutine
end interface
public :: __cuda_device_builtins_threadfence

interface
subroutine __cuda_device_builtins_threadfence_block()
end subroutine
end interface
public :: __cuda_device_builtins_threadfence_block

interface
subroutine __cuda_device_builtins_threadfence_system()
end subroutine
end interface
public :: __cuda_device_builtins_threadfence_system

end module
21 changes: 21 additions & 0 deletions flang/module/cudadevice.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
!===-- module/cudedevice.f90 -----------------------------------------------===!
!
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
! See https://llvm.org/LICENSE.txt for license information.
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
!
!===------------------------------------------------------------------------===!

! CUDA Fortran procedures available in device subprogram

module cudadevice
use __cuda_device_builtins, only: &
syncthreads => __cuda_device_builtins_syncthreads, &
syncthreads_and => __cuda_device_builtins_syncthreads_and, &
syncthreads_count => __cuda_device_builtins_syncthreads_count, &
syncthreads_or => __cuda_device_builtins_syncthreads_or, &
syncwarp => __cuda_device_builtins_syncwarp, &
threadfence => __cuda_device_builtins_threadfence, &
threadfence_block => __cuda_device_builtins_threadfence_block, &
threadfence_system => __cuda_device_builtins_threadfence_system
end module
35 changes: 35 additions & 0 deletions flang/test/Semantics/cuf-device-procedures01.cuf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s

! Test CUDA Fortran intrinsic can pass semantic

attributes(global) subroutine devsub()
implicit none
integer :: ret

! 3.6.4. Synchronization Functions
call syncthreads()
call syncwarp(1)
call threadfence()
call threadfence_block()
call threadfence_system()
ret = syncthreads_and(1)
ret = syncthreads_count(1)
ret = syncthreads_or(1)
end

! CHECK-LABEL: Subprogram scope: devsub
! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins
! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins
! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins
! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins
! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins
! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins
! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins
! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins

subroutine host()
call syncthreads()
end subroutine

! CHECK-LABEL: Subprogram scope: host
! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
17 changes: 17 additions & 0 deletions flang/test/Semantics/cuf-device-procedures02.cuf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
! RUN: %python %S/test_errors.py %s %flang_fc1

module dev
integer, device :: syncthreads

contains

attributes(device) subroutine sub1()
syncthreads = 1 ! syncthreads not overwritten by cudadevice
end subroutine

attributes(global) subroutine sub2()
!ERROR: 'threadfence' is use-associated from module '__cuda_device_builtins' and cannot be re-declared
integer :: threadfence
end subroutine
end module

4 changes: 4 additions & 0 deletions flang/tools/f18/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ set(MODULES
"__ppc_intrinsics"
"mma"
"__cuda_builtins"
"__cuda_device_builtins"
"cudadevice"
"ieee_arithmetic"
"ieee_exceptions"
"ieee_features"
Expand All @@ -31,6 +33,8 @@ if (NOT CMAKE_CROSSCOMPILING)
elseif(${filename} STREQUAL "__ppc_intrinsics" OR
${filename} STREQUAL "mma")
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
elseif(${filename} STREQUAL "cudadevice")
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device_builtins.mod)
else()
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
if(NOT ${filename} STREQUAL "__fortran_type_info")
Expand Down