-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[flang][cuda] Implicitly load cudadevice module in device/global subprogram #91668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3797,6 +3797,19 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) { | |
subp->set_cudaSubprogramAttrs(attr); | ||
} | ||
} | ||
if (auto attrs{subp->cudaSubprogramAttrs()}) { | ||
if (*attrs == common::CUDASubprogramAttrs::Global || | ||
*attrs == common::CUDASubprogramAttrs::Device) { | ||
// Implicitly USE the cudadevice module by copying its symbols in the | ||
// current scope. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/symbol/symbols/ What about clashes with names that are already in scope (or declared later)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So according to the reference compiler we should not overwrite what is already in scope so I updated the copy of symbol and added a test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the semantics of these names are not really what you get from either a USE statement or from intrinsics. There is already precedence here with names like |
||
const Scope &scope{context().GetCUDADeviceScope()}; | ||
for (auto sym : scope.GetSymbols()) { | ||
if (!currScope().FindSymbol(sym->name())) { | ||
currScope().CopySymbol(sym); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
return false; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
!===-- module/__cuda_device_builtins.f90 -----------------------------------===! | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These could all be in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason I added a new file is that we can check on the module name instead of having to check on the module name and the procedure prefix. |
||
! | ||
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
! See https://llvm.org/LICENSE.txt for license information. | ||
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
! | ||
!===------------------------------------------------------------------------===! | ||
|
||
! CUDA Fortran procedures available in device subprogram | ||
|
||
module __CUDA_device_builtins | ||
|
||
implicit none | ||
|
||
! Set PRIVATE by default to explicitly only export what is meant | ||
! to be exported by this MODULE. | ||
private | ||
|
||
! Synchronization Functions | ||
|
||
interface | ||
subroutine __cuda_device_builtins_syncthreads() | ||
end subroutine | ||
end interface | ||
public :: __cuda_device_builtins_syncthreads | ||
|
||
interface | ||
integer function __cuda_device_builtins_syncthreads_and(value) | ||
integer :: value | ||
end function | ||
end interface | ||
public :: __cuda_device_builtins_syncthreads_and | ||
|
||
interface | ||
integer function __cuda_device_builtins_syncthreads_count(value) | ||
integer :: value | ||
end function | ||
end interface | ||
public :: __cuda_device_builtins_syncthreads_count | ||
|
||
interface | ||
integer function __cuda_device_builtins_syncthreads_or(int_value) | ||
end function | ||
end interface | ||
public :: __cuda_device_builtins_syncthreads_or | ||
|
||
interface | ||
subroutine __cuda_device_builtins_syncwarp(mask) | ||
integer :: mask | ||
end subroutine | ||
end interface | ||
public :: __cuda_device_builtins_syncwarp | ||
|
||
! Memory Fences | ||
|
||
interface | ||
subroutine __cuda_device_builtins_threadfence() | ||
end subroutine | ||
end interface | ||
public :: __cuda_device_builtins_threadfence | ||
|
||
interface | ||
subroutine __cuda_device_builtins_threadfence_block() | ||
end subroutine | ||
end interface | ||
public :: __cuda_device_builtins_threadfence_block | ||
|
||
interface | ||
subroutine __cuda_device_builtins_threadfence_system() | ||
end subroutine | ||
end interface | ||
public :: __cuda_device_builtins_threadfence_system | ||
|
||
end module |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
!===-- module/cudedevice.f90 -----------------------------------------------===! | ||
klausler marked this conversation as resolved.
Show resolved
Hide resolved
|
||
! | ||
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
! See https://llvm.org/LICENSE.txt for license information. | ||
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
! | ||
!===------------------------------------------------------------------------===! | ||
|
||
! CUDA Fortran procedures available in device subprogram | ||
|
||
module cudadevice | ||
use __cuda_device_builtins, only: & | ||
syncthreads => __cuda_device_builtins_syncthreads, & | ||
syncthreads_and => __cuda_device_builtins_syncthreads_and, & | ||
syncthreads_count => __cuda_device_builtins_syncthreads_count, & | ||
syncthreads_or => __cuda_device_builtins_syncthreads_or, & | ||
syncwarp => __cuda_device_builtins_syncwarp, & | ||
threadfence => __cuda_device_builtins_threadfence, & | ||
threadfence_block => __cuda_device_builtins_threadfence_block, & | ||
threadfence_system => __cuda_device_builtins_threadfence_system | ||
end module |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s | ||
|
||
! Test CUDA Fortran intrinsic can pass semantic | ||
|
||
attributes(global) subroutine devsub() | ||
implicit none | ||
integer :: ret | ||
|
||
! 3.6.4. Synchronization Functions | ||
call syncthreads() | ||
call syncwarp(1) | ||
call threadfence() | ||
call threadfence_block() | ||
call threadfence_system() | ||
ret = syncthreads_and(1) | ||
ret = syncthreads_count(1) | ||
ret = syncthreads_or(1) | ||
end | ||
|
||
! CHECK-LABEL: Subprogram scope: devsub | ||
! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins | ||
! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins | ||
! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins | ||
! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins | ||
! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins | ||
! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins | ||
! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins | ||
! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins | ||
|
||
subroutine host() | ||
call syncthreads() | ||
end subroutine | ||
|
||
! CHECK-LABEL: Subprogram scope: host | ||
! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
! RUN: %python %S/test_errors.py %s %flang_fc1 | ||
|
||
module dev | ||
integer, device :: syncthreads | ||
|
||
contains | ||
|
||
attributes(device) subroutine sub1() | ||
syncthreads = 1 ! syncthreads not overwritten by cudadevice | ||
end subroutine | ||
|
||
attributes(global) subroutine sub2() | ||
!ERROR: 'threadfence' is use-associated from module '__cuda_device_builtins' and cannot be re-declared | ||
integer :: threadfence | ||
end subroutine | ||
end module | ||
|
Uh oh!
There was an error while loading. Please reload this page.