Skip to content

Commit 208544f

Browse files
authored
[flang][runtime] Added noinline for some functions in device build. (#93128)
This helps reducing the compilation time spent by the device compiler optimizer and then the code generator. Since F18 RT is going to be distributed as LLVM BC for some targets (same way as LLVM liboffload device library is distributed) and linked to the user offload code, the compilation time of the produced LLVM BC will be critical.
1 parent 42b5dab commit 208544f

File tree

3 files changed

+27
-4
lines changed

3 files changed

+27
-4
lines changed

flang/include/flang/Common/api-attrs.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,4 +156,26 @@
156156
#define RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
157157
#endif /* !defined(__CUDACC__) */
158158

159+
/*
160+
* RT_DEVICE_NOINLINE may be used for non-performance critical
161+
* functions that should not be inlined to minimize the amount
162+
* of code that needs to be processed by the device compiler's
163+
* optimizer.
164+
*/
165+
#ifndef __has_attribute
166+
#define __has_attribute(x) 0
167+
#endif
168+
#if __has_attribute(noinline)
169+
#define RT_NOINLINE_ATTR __attribute__((noinline))
170+
#else
171+
#define RT_NOINLINE_ATTR
172+
#endif
173+
#if (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
174+
#define RT_DEVICE_NOINLINE RT_NOINLINE_ATTR
175+
#define RT_DEVICE_NOINLINE_HOST_INLINE
176+
#else
177+
#define RT_DEVICE_NOINLINE
178+
#define RT_DEVICE_NOINLINE_HOST_INLINE inline
179+
#endif
180+
159181
#endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */

flang/include/flang/Common/visit.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ namespace log2visit {
3030

3131
template <std::size_t LOW, std::size_t HIGH, typename RESULT, typename VISITOR,
3232
typename... VARIANT>
33-
inline RT_API_ATTRS RESULT Log2VisitHelper(
33+
RT_DEVICE_NOINLINE_HOST_INLINE RT_API_ATTRS RESULT Log2VisitHelper(
3434
VISITOR &&visitor, std::size_t which, VARIANT &&...u) {
3535
if constexpr (LOW + 7 >= HIGH) {
3636
switch (which - LOW) {
@@ -68,8 +68,9 @@ inline RT_API_ATTRS RESULT Log2VisitHelper(
6868
}
6969

7070
template <typename VISITOR, typename... VARIANT>
71-
inline RT_API_ATTRS auto visit(VISITOR &&visitor, VARIANT &&...u)
72-
-> decltype(visitor(std::get<0>(std::forward<VARIANT>(u))...)) {
71+
RT_DEVICE_NOINLINE_HOST_INLINE RT_API_ATTRS auto
72+
visit(VISITOR &&visitor, VARIANT &&...u) -> decltype(visitor(std::get<0>(
73+
std::forward<VARIANT>(u))...)) {
7374
using Result = decltype(visitor(std::get<0>(std::forward<VARIANT>(u))...));
7475
if constexpr (sizeof...(u) == 1) {
7576
static constexpr std::size_t high{

flang/runtime/terminator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class Terminator {
5454
// to regular printf for the device compilation.
5555
// Try to keep the inline implementations as small as possible.
5656
template <typename... Args>
57-
[[noreturn]] RT_API_ATTRS const char *Crash(
57+
[[noreturn]] RT_DEVICE_NOINLINE RT_API_ATTRS const char *Crash(
5858
const char *message, Args... args) const {
5959
#if !defined(RT_DEVICE_COMPILATION)
6060
// Invoke handler set up by the test harness.

0 commit comments

Comments
 (0)