Skip to content

Commit 4bdec58

Browse files
authored
[flang][runtime] Enable more code for offload device builds. (#67489)
I extended the "closure" of the device code containing the initial transformational.cpp. The device side of the library should not be complete at least for some APIs. For example, I tested with C OpenMP code calling BesselJnX0 with a nullptr descriptor that failed with a runtime error when executing on a GPU. I added `--expt-relaxed-constexpr` for NVCC compiler to avoid multiple warnings about missing `__attribute__((device))` on constexpr methods coming from C++ header files.
1 parent 21c2ba4 commit 4bdec58

File tree

12 files changed

+396
-150
lines changed

12 files changed

+396
-150
lines changed

flang/include/flang/Runtime/api-attrs.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,18 @@
4242
#endif
4343
#endif /* !defined(RT_EXT_API_GROUP_END) */
4444

45+
/*
46+
* RT_OFFLOAD_API_GROUP_BEGIN/END pair is placed around definitions
47+
* of functions that can be referenced in other modules of Flang
48+
* runtime. For OpenMP offload these functions are made "declare target"
49+
* making sure they are compiled for the target even though direct
50+
* references to them from other "declare target" functions may not
51+
* be seen. Host-only functions should not be put in between these
52+
* two macros.
53+
*/
54+
#define RT_OFFLOAD_API_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
55+
#define RT_OFFLOAD_API_GROUP_END RT_EXT_API_GROUP_END
56+
4557
/*
4658
* RT_VAR_GROUP_BEGIN/END pair is placed around definitions
4759
* of module scope variables referenced by Flang runtime (directly
@@ -88,4 +100,16 @@
88100
#endif
89101
#endif /* !defined(RT_CONST_VAR_ATTRS) */
90102

103+
/*
104+
* RT_DEVICE_COMPILATION is defined for any device compilation.
105+
* Note that it can only be used reliably with compilers that perform
106+
* separate host and device compilations.
107+
*/
108+
#if ((defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)) || \
109+
(defined(_OPENMP) && (defined(__AMDGCN__) || defined(__NVPTX__)))
110+
#define RT_DEVICE_COMPILATION 1
111+
#else
112+
#undef RT_DEVICE_COMPILATION
113+
#endif
114+
91115
#endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */

flang/include/flang/Runtime/descriptor.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -181,20 +181,21 @@ class Descriptor {
181181
ISO::CFI_attribute_t attribute = CFI_attribute_other);
182182

183183
// CUDA_TODO: Clang does not support unique_ptr on device.
184-
static OwningPtr<Descriptor> Create(TypeCode t, std::size_t elementBytes,
185-
void *p = nullptr, int rank = maxRank,
184+
static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCode t,
185+
std::size_t elementBytes, void *p = nullptr, int rank = maxRank,
186186
const SubscriptValue *extent = nullptr,
187187
ISO::CFI_attribute_t attribute = CFI_attribute_other,
188188
int derivedTypeLenParameters = 0);
189-
static OwningPtr<Descriptor> Create(TypeCategory, int kind, void *p = nullptr,
190-
int rank = maxRank, const SubscriptValue *extent = nullptr,
189+
static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCategory, int kind,
190+
void *p = nullptr, int rank = maxRank,
191+
const SubscriptValue *extent = nullptr,
191192
ISO::CFI_attribute_t attribute = CFI_attribute_other);
192-
static OwningPtr<Descriptor> Create(int characterKind,
193+
static RT_API_ATTRS OwningPtr<Descriptor> Create(int characterKind,
193194
SubscriptValue characters, void *p = nullptr, int rank = maxRank,
194195
const SubscriptValue *extent = nullptr,
195196
ISO::CFI_attribute_t attribute = CFI_attribute_other);
196-
static OwningPtr<Descriptor> Create(const typeInfo::DerivedType &dt,
197-
void *p = nullptr, int rank = maxRank,
197+
static RT_API_ATTRS OwningPtr<Descriptor> Create(
198+
const typeInfo::DerivedType &dt, void *p = nullptr, int rank = maxRank,
198199
const SubscriptValue *extent = nullptr,
199200
ISO::CFI_attribute_t attribute = CFI_attribute_other);
200201

flang/include/flang/Runtime/memory.h

Lines changed: 91 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,116 @@
1212
#ifndef FORTRAN_RUNTIME_MEMORY_H_
1313
#define FORTRAN_RUNTIME_MEMORY_H_
1414

15+
#include "flang/Runtime/api-attrs.h"
16+
#include <cassert>
1517
#include <memory>
18+
#include <type_traits>
1619

1720
namespace Fortran::runtime {
1821

1922
class Terminator;
2023

21-
[[nodiscard]] void *AllocateMemoryOrCrash(
24+
[[nodiscard]] RT_API_ATTRS void *AllocateMemoryOrCrash(
2225
const Terminator &, std::size_t bytes);
2326
template <typename A> [[nodiscard]] A &AllocateOrCrash(const Terminator &t) {
2427
return *reinterpret_cast<A *>(AllocateMemoryOrCrash(t, sizeof(A)));
2528
}
26-
void FreeMemory(void *);
27-
template <typename A> void FreeMemory(A *p) {
29+
RT_API_ATTRS void FreeMemory(void *);
30+
template <typename A> RT_API_ATTRS void FreeMemory(A *p) {
2831
FreeMemory(reinterpret_cast<void *>(p));
2932
}
3033
template <typename A> void FreeMemoryAndNullify(A *&p) {
3134
FreeMemory(p);
3235
p = nullptr;
3336
}
3437

35-
template <typename A> struct OwningPtrDeleter {
36-
void operator()(A *p) { FreeMemory(p); }
38+
// Very basic implementation mimicking std::unique_ptr.
39+
// It should work for any offload device compiler.
40+
// It uses a fixed memory deleter based on FreeMemory(),
41+
// and does not support array objects with runtime length.
42+
template <typename A> class OwningPtr {
43+
public:
44+
using pointer_type = A *;
45+
46+
OwningPtr() = default;
47+
RT_API_ATTRS explicit OwningPtr(pointer_type p) : ptr_(p) {}
48+
RT_API_ATTRS OwningPtr(const OwningPtr &) = delete;
49+
RT_API_ATTRS OwningPtr &operator=(const OwningPtr &) = delete;
50+
RT_API_ATTRS OwningPtr(OwningPtr &&other) {
51+
ptr_ = other.ptr_;
52+
other.ptr_ = pointer_type{};
53+
}
54+
RT_API_ATTRS OwningPtr &operator=(OwningPtr &&other) {
55+
if (this != &other) {
56+
delete_ptr(ptr_);
57+
ptr_ = other.ptr_;
58+
other.ptr_ = pointer_type{};
59+
}
60+
return *this;
61+
}
62+
constexpr RT_API_ATTRS OwningPtr(std::nullptr_t) : OwningPtr() {}
63+
64+
// Delete the pointer, if owns one.
65+
RT_API_ATTRS ~OwningPtr() {
66+
if (ptr_ != pointer_type{}) {
67+
delete_ptr(ptr_);
68+
ptr_ = pointer_type{};
69+
}
70+
}
71+
72+
// Release the ownership.
73+
RT_API_ATTRS pointer_type release() {
74+
pointer_type p = ptr_;
75+
ptr_ = pointer_type{};
76+
return p;
77+
}
78+
79+
// Replace the pointer.
80+
RT_API_ATTRS void reset(pointer_type p = pointer_type{}) {
81+
std::swap(ptr_, p);
82+
if (p != pointer_type{}) {
83+
// Delete the owned pointer.
84+
delete_ptr(p);
85+
}
86+
}
87+
88+
// Exchange the pointer with another object.
89+
RT_API_ATTRS void swap(OwningPtr &other) { std::swap(ptr_, other.ptr_); }
90+
91+
// Get the stored pointer.
92+
RT_API_ATTRS pointer_type get() const { return ptr_; }
93+
94+
RT_API_ATTRS explicit operator bool() const {
95+
return get() != pointer_type{};
96+
}
97+
98+
RT_API_ATTRS typename std::add_lvalue_reference<A>::type operator*() const {
99+
assert(get() != pointer_type{});
100+
return *get();
101+
}
102+
103+
RT_API_ATTRS pointer_type operator->() const { return get(); }
104+
105+
private:
106+
RT_API_ATTRS void delete_ptr(pointer_type p) { FreeMemory(p); }
107+
pointer_type ptr_{};
37108
};
38109

39-
template <typename A> using OwningPtr = std::unique_ptr<A, OwningPtrDeleter<A>>;
110+
template <typename X, typename Y>
111+
inline RT_API_ATTRS bool operator!=(
112+
const OwningPtr<X> &x, const OwningPtr<Y> &y) {
113+
return x.get() != y.get();
114+
}
115+
116+
template <typename X>
117+
inline RT_API_ATTRS bool operator!=(const OwningPtr<X> &x, std::nullptr_t) {
118+
return (bool)x;
119+
}
120+
121+
template <typename X>
122+
inline RT_API_ATTRS bool operator!=(std::nullptr_t, const OwningPtr<X> &x) {
123+
return (bool)x;
124+
}
40125

41126
template <typename A> class SizedNew {
42127
public:

flang/include/flang/Runtime/type-code.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,29 +26,33 @@ class TypeCode {
2626

2727
RT_API_ATTRS int raw() const { return raw_; }
2828

29-
constexpr bool IsValid() const {
29+
constexpr RT_API_ATTRS bool IsValid() const {
3030
return raw_ >= CFI_type_signed_char && raw_ <= CFI_TYPE_LAST;
3131
}
32-
constexpr bool IsInteger() const {
32+
constexpr RT_API_ATTRS bool IsInteger() const {
3333
return raw_ >= CFI_type_signed_char && raw_ <= CFI_type_ptrdiff_t;
3434
}
35-
constexpr bool IsReal() const {
35+
constexpr RT_API_ATTRS bool IsReal() const {
3636
return raw_ >= CFI_type_half_float && raw_ <= CFI_type_float128;
3737
}
38-
constexpr bool IsComplex() const {
38+
constexpr RT_API_ATTRS bool IsComplex() const {
3939
return raw_ >= CFI_type_half_float_Complex &&
4040
raw_ <= CFI_type_float128_Complex;
4141
}
42-
constexpr bool IsCharacter() const {
42+
constexpr RT_API_ATTRS bool IsCharacter() const {
4343
return raw_ == CFI_type_char || raw_ == CFI_type_char16_t ||
4444
raw_ == CFI_type_char32_t;
4545
}
46-
constexpr bool IsLogical() const {
46+
constexpr RT_API_ATTRS bool IsLogical() const {
4747
return raw_ == CFI_type_Bool ||
4848
(raw_ >= CFI_type_int_least8_t && raw_ <= CFI_type_int_least64_t);
4949
}
50-
constexpr bool IsDerived() const { return raw_ == CFI_type_struct; }
51-
constexpr bool IsIntrinsic() const { return IsValid() && !IsDerived(); }
50+
constexpr RT_API_ATTRS bool IsDerived() const {
51+
return raw_ == CFI_type_struct;
52+
}
53+
constexpr RT_API_ATTRS bool IsIntrinsic() const {
54+
return IsValid() && !IsDerived();
55+
}
5256

5357
RT_API_ATTRS std::optional<std::pair<TypeCategory, int>>
5458
GetCategoryAndKind() const;
@@ -65,7 +69,7 @@ class TypeCode {
6569
return thisCK && thatCK && *thisCK == *thatCK;
6670
}
6771
}
68-
bool operator!=(TypeCode that) const { return !(*this == that); }
72+
RT_API_ATTRS bool operator!=(TypeCode that) const { return !(*this == that); }
6973

7074
private:
7175
ISO::CFI_type_t raw_{CFI_type_other};

flang/runtime/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,10 @@ option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
150150

151151
# List of files that are buildable for all devices.
152152
set(supported_files
153+
descriptor.cpp
154+
terminator.cpp
153155
transformational.cpp
156+
type-code.cpp
154157
)
155158

156159
if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
@@ -175,6 +178,11 @@ if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
175178
-Xclang -fcuda-allow-variadic-functions
176179
)
177180
endif()
181+
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
182+
set(CUDA_COMPILE_OPTIONS
183+
--expt-relaxed-constexpr
184+
)
185+
endif()
178186
set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
179187
"${CUDA_COMPILE_OPTIONS}"
180188
)

flang/runtime/ISO_Fortran_util.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@
1818
#include <cstdlib>
1919

2020
namespace Fortran::ISO {
21-
static inline constexpr bool IsCharacterType(CFI_type_t ty) {
21+
static inline constexpr RT_API_ATTRS bool IsCharacterType(CFI_type_t ty) {
2222
return ty == CFI_type_char || ty == CFI_type_char16_t ||
2323
ty == CFI_type_char32_t;
2424
}
25-
static inline constexpr bool IsAssumedSize(const CFI_cdesc_t *dv) {
25+
static inline constexpr RT_API_ATTRS bool IsAssumedSize(const CFI_cdesc_t *dv) {
2626
return dv->rank > 0 && dv->dim[dv->rank - 1].extent == -1;
2727
}
2828

29-
static inline std::size_t MinElemLen(CFI_type_t type) {
29+
static inline RT_API_ATTRS std::size_t MinElemLen(CFI_type_t type) {
3030
auto typeParams{Fortran::runtime::TypeCode{type}.GetCategoryAndKind()};
3131
if (!typeParams) {
3232
Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
@@ -38,10 +38,10 @@ static inline std::size_t MinElemLen(CFI_type_t type) {
3838
typeParams->first, typeParams->second);
3939
}
4040

41-
static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
42-
void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
43-
std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[],
44-
bool external) {
41+
static inline RT_API_ATTRS int VerifyEstablishParameters(
42+
CFI_cdesc_t *descriptor, void *base_addr, CFI_attribute_t attribute,
43+
CFI_type_t type, std::size_t elem_len, CFI_rank_t rank,
44+
const CFI_index_t extents[], bool external) {
4545
if (attribute != CFI_attribute_other && attribute != CFI_attribute_pointer &&
4646
attribute != CFI_attribute_allocatable) {
4747
return CFI_INVALID_ATTRIBUTE;
@@ -77,9 +77,9 @@ static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
7777
return CFI_SUCCESS;
7878
}
7979

80-
static inline void EstablishDescriptor(CFI_cdesc_t *descriptor, void *base_addr,
81-
CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
82-
CFI_rank_t rank, const CFI_index_t extents[]) {
80+
static inline RT_API_ATTRS void EstablishDescriptor(CFI_cdesc_t *descriptor,
81+
void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
82+
std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[]) {
8383
descriptor->base_addr = base_addr;
8484
descriptor->elem_len = elem_len;
8585
descriptor->version = CFI_VERSION;

flang/runtime/derived.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#ifndef FORTRAN_RUNTIME_DERIVED_H_
1212
#define FORTRAN_RUNTIME_DERIVED_H_
1313

14+
#include "flang/Runtime/api-attrs.h"
15+
1416
namespace Fortran::runtime::typeInfo {
1517
class DerivedType;
1618
}
@@ -21,21 +23,21 @@ class Terminator;
2123

2224
// Perform default component initialization, allocate automatic components.
2325
// Returns a STAT= code (0 when all's well).
24-
int Initialize(const Descriptor &, const typeInfo::DerivedType &, Terminator &,
25-
bool hasStat = false, const Descriptor *errMsg = nullptr);
26+
RT_API_ATTRS int Initialize(const Descriptor &, const typeInfo::DerivedType &,
27+
Terminator &, bool hasStat = false, const Descriptor *errMsg = nullptr);
2628

2729
// Call FINAL subroutines, if any
28-
void Finalize(
30+
RT_API_ATTRS void Finalize(
2931
const Descriptor &, const typeInfo::DerivedType &derived, Terminator *);
3032

3133
// Call FINAL subroutines, deallocate allocatable & automatic components.
3234
// Does not deallocate the original descriptor.
33-
void Destroy(const Descriptor &, bool finalize, const typeInfo::DerivedType &,
34-
Terminator *);
35+
RT_API_ATTRS void Destroy(const Descriptor &, bool finalize,
36+
const typeInfo::DerivedType &, Terminator *);
3537

3638
// Return true if the passed descriptor is for a derived type
3739
// entity that has a dynamic (allocatable, automatic) component.
38-
bool HasDynamicComponent(const Descriptor &);
40+
RT_API_ATTRS bool HasDynamicComponent(const Descriptor &);
3941

4042
} // namespace Fortran::runtime
4143
#endif // FORTRAN_RUNTIME_DERIVED_H_

0 commit comments

Comments
 (0)