Skip to content

Commit 07410dc

Browse files
gregrodgersronlieb
authored andcommitted
[OPENMP] Support for simple FORTRAN print by implementing Fortran Runtime on the device using hostexec. Only 4 RT functions implemented on this initial commit. It is easy to add more Fortran Runtime functions as they are encountered with different test cases. Currently I know print* works with strings and integers. Comment which Fortran Runtime functions we need by trying things and seeing what unresolved references the compiler generates
Change-Id: Ic70e82510c09c05ccfaa51dab1a996903754276a
1 parent f308f16 commit 07410dc

File tree

4 files changed

+128
-4
lines changed

4 files changed

+128
-4
lines changed

offload/hostexec/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ endif()
6666

6767
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:-UNDEBUG>)
6868
set_property(TARGET amdgcn_hostexec_services PROPERTY POSITION_INDEPENDENT_CODE ON)
69+
target_link_libraries(amdgcn_hostexec_services FortranRuntime -L${CMAKE_BINARY_DIR}/../../lib -L${CMAKE_INSTALL_PREFIX}/lib)
6970

7071
if (LIBOMPTARGET_DEP_CUDA_FOUND)
7172
list(APPEND HOSTRPC_ARCHS "nvptx")
@@ -126,6 +127,7 @@ set(hostexec_stubs_filename ${CMAKE_CURRENT_SOURCE_DIR}/src/hostexec_stubs.cpp)
126127
set(h_file ${CMAKE_CURRENT_SOURCE_DIR}/src/hostexec.h)
127128
set(internal_h_file ${CMAKE_CURRENT_SOURCE_DIR}/src/hostexec_internal.h)
128129

130+
129131
foreach(archname ${HOSTRPC_ARCHS})
130132
if (${archname} STREQUAL "amdgcn")
131133
set(triple "amdgcn-amd-amdhsa")

offload/hostexec/services/execute_service.cpp

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,15 @@ static void handler_SERVICE_FREE(uint32_t device_id, uint64_t *payload) {
184184

185185
static bool trace_init = false;
186186
static bool host_exec_trace;
187-
static char* TrcStrs[HOSTEXEC_SID_VOID+1] = {"unsed", "terminate", "device_malloc",
188-
"host_malloc", "free", "printf", "fprintf", "ftnassign", "sanatizer",
189-
"uint", "uint64", "double", "int", "long", "float" , "void"};
187+
#define _CCHAR (const char *)
188+
static const char *TrcStrs[HOSTEXEC_SID_VOID + 1] = {
189+
_CCHAR "unsed", _CCHAR "terminate", _CCHAR "device_malloc",
190+
_CCHAR "host_malloc", _CCHAR "free", _CCHAR "printf",
191+
_CCHAR "fprintf", _CCHAR "ftnassign", _CCHAR "sanatizer",
192+
_CCHAR "uint", _CCHAR "uint64", _CCHAR "double",
193+
_CCHAR "int", _CCHAR "long", _CCHAR "float",
194+
_CCHAR "void"};
195+
#undef _CCAR
190196
// The consumer thread will serialize each active lane and call execute_service
191197
// for the service request. These services are intended to be architecturally
192198
// independent.
@@ -926,6 +932,43 @@ static service_rc hostrpc_call_fnptr(uint32_t NumArgs, void *fnptr,
926932
return _RC_SUCCESS;
927933
}
928934

935+
extern "C" {
936+
937+
extern void *V_FortranAioBeginExternalListOutput(void *fnptr, ...) {
938+
va_list args;
939+
va_start(args, fnptr);
940+
uint32_t v0 = va_arg(args, uint32_t);
941+
char *v1 = va_arg(args, char *);
942+
uint32_t v2 = va_arg(args, uint32_t);
943+
va_end(args);
944+
return _FortranAioBeginExternalListOutput(v0, v1, v2);
945+
}
946+
extern bool V_FortranAioOutputAscii(void *fnptr, ...) {
947+
va_list args;
948+
va_start(args, fnptr);
949+
void *v0 = va_arg(args, void *);
950+
char *v1 = va_arg(args, char *);
951+
uint64_t v2 = va_arg(args, uint64_t);
952+
va_end(args);
953+
return _FortranAioOutputAscii(v0, v1, v2);
954+
}
955+
extern bool V_FortranAioOutputInteger32(void *fnptr, ...) {
956+
va_list args;
957+
va_start(args, fnptr);
958+
void *v0 = va_arg(args, void *);
959+
uint32_t v1 = va_arg(args, uint32_t);
960+
va_end(args);
961+
return _FortranAioOutputInteger32(v0, v1);
962+
}
963+
extern uint32_t V_FortranAioEndIoStatement(void *fnptr, ...) {
964+
va_list args;
965+
va_start(args, fnptr);
966+
void *v0 = va_arg(args, void *);
967+
va_end(args);
968+
return _FortranAioEndIoStatement(v0);
969+
}
970+
}
971+
929972
template <typename T, typename FT>
930973
static service_rc hostexec_service(char *buf, size_t bufsz, T *return_value) {
931974
if (bufsz == 0)
@@ -959,6 +1002,24 @@ static service_rc hostexec_service(char *buf, size_t bufsz, T *return_value) {
9591002
&data_not_used, a) != _RC_SUCCESS)
9601003
return _RC_ERROR_INVALID_REQUEST;
9611004

1005+
uint64_t DeviceRuntime_idx = (uint64_t)fnptr;
1006+
switch (DeviceRuntime_idx) {
1007+
case _FortranAioBeginExternalListOutput_idx:
1008+
fnptr = (void *)V_FortranAioBeginExternalListOutput;
1009+
break;
1010+
case _FortranAioOutputAscii_idx:
1011+
fnptr = (void *)V_FortranAioOutputAscii;
1012+
break;
1013+
case _FortranAioOutputInteger32_idx:
1014+
fnptr = (void *)V_FortranAioOutputInteger32;
1015+
break;
1016+
case _FortranAioEndIoStatement_idx:
1017+
fnptr = (void *)V_FortranAioEndIoStatement;
1018+
break;
1019+
case _FortranAio_INVALID:
1020+
default:
1021+
break;
1022+
}
9621023
if (hostrpc_call_fnptr<T, FT>(NumArgs, fnptr, a, return_value) != _RC_SUCCESS)
9631024
return _RC_ERROR_INVALID_REQUEST;
9641025

offload/hostexec/src/hostexec_internal.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,20 @@ enum hostexec_sid {
109109
HOSTEXEC_SID_VOID,
110110
};
111111

112+
enum KnownDeviceRuntime_idx {
113+
_FortranAio_INVALID,
114+
_FortranAioBeginExternalListOutput_idx,
115+
_FortranAioOutputAscii_idx,
116+
_FortranAioOutputInteger32_idx,
117+
_FortranAioEndIoStatement_idx
118+
};
119+
120+
extern "C" {
121+
extern void *_FortranAioBeginExternalListOutput(uint32_t a1, char *a2,
122+
uint32_t a3);
123+
extern bool _FortranAioOutputAscii(void *a1, char *a2, uint64_t a3);
124+
extern bool _FortranAioOutputInteger32(void *a1, uint32_t a2);
125+
extern uint32_t _FortranAioEndIoStatement(void *a1);
126+
} // end of extern "C"
127+
112128
#endif // __HOSTEXEC_INTERNAL_H__

offload/hostexec/src/hostexec_stubs.cpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,51 @@ uint32_t __strlen_max(char *instr, uint32_t maxstrlen) {
233233
return maxstrlen;
234234
}
235235

236-
} // end extern "C"
236+
#if defined(__NVPTX__) || defined(__AMDGCN__)
237+
// These are the function definitions of known device runtime functions
238+
// that will be executed on the host using hostexec. The host functions
239+
// that implement the service are defined in services/execute_service.cpp.
240+
// These functions begin with V_ because they are variadic functions.
241+
// Variadic functions make it easy to reconstruct the exact arguments for
242+
// the call to the actual host runtime function.
243+
244+
// Since these variadic functions are static functions in execute_service.cpp,
245+
// we only send the index from the enum set of known functions as a fake
246+
// function pointer in the first arg. See the switch(DeviceRuntime_idx) in
247+
// execute_service.cpp which determins the correct function pointer to
248+
// the V_ function based on this index.
249+
//
250+
// If you get an unresolved reference at device link time, then you have an
251+
// unimplemented device runtime function. To implement a new function, take
252+
// these steps.
253+
// 1. Find or build the c++ interface to this function and put the declare in
254+
// hostexec_internal.h. Add an new value to the enum KnownDeviceRuntime_idx.
255+
// 2. Add the definition of the device function below with the same interface.
256+
// The definition should call the hostexec_<RT> with the proper return type
257+
// Use c-style typecasting if the return type you need does not have a
258+
// corresponding hostexec function.
259+
// 3. Add a new V_ function in execute_service.cpp and the new case in the
260+
// switch(DeviceRuntime_idx).
237261

262+
void *_FortranAioBeginExternalListOutput(uint32_t a1, char *a2, uint32_t a3) {
263+
void *enum2ptr = (void *)_FortranAioBeginExternalListOutput_idx;
264+
return (void *)hostexec_uint64(enum2ptr, a1, a2, a3);
265+
}
266+
bool _FortranAioOutputAscii(void *a1, char *a2, uint64_t a3) {
267+
// Must terminate the print string because it gets sent to host.
268+
a2[a3 - 1] = (char)0;
269+
void *enum2ptr = (void *)_FortranAioOutputAscii_idx;
270+
return (bool)hostexec_uint(enum2ptr, a1, a2, a3);
271+
}
272+
bool _FortranAioOutputInteger32(void *a1, uint32_t a2) {
273+
void *enum2ptr = (void *)_FortranAioOutputInteger32_idx;
274+
return (bool)hostexec_uint(enum2ptr, a1, a2);
275+
}
276+
uint32_t _FortranAioEndIoStatement(void *a1) {
277+
void *enum2ptr = (void *)_FortranAioEndIoStatement_idx;
278+
return (uint32_t)hostexec_uint(enum2ptr, a1);
279+
}
280+
#endif
281+
282+
} // end extern "C"
238283
#pragma omp end declare target

0 commit comments

Comments
 (0)