Skip to content

Commit 91f5f97

Browse files
authored
[OpenMP] Unconditionally provide an RPC client interface for OpenMP (#117933)
Summary: This patch adds an RPC interface that lives directly in the OpenMP device runtime. This allows OpenMP to implement custom opcodes. Currently this is only providing the host call interface, which is the raw version of reverse offloading. Previously this lived in `libc/` as an extension which is not the correct place. The interface here uses a weak symbol for the RPC client by the same name that the `libc` interface uses. This means that it will defer to the libc one if both are present so we don't need to set up multiple instances. The presense of this symbol is what controls whether or not we set up the RPC server. Because this is an external symbol it normally won't be optimized out, so there's a special pass in OpenMPOpt that deletes this symbol if it is unused during linking. That means at `O0` the RPC server will always be present now, but will be removed trivially if it's not used at O1 and higher.
1 parent 9b64811 commit 91f5f97

File tree

11 files changed

+133
-74
lines changed

11 files changed

+133
-74
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
131131
-DOMPTARGET_DEVICE_RUNTIME
132132
-I${include_directory}
133133
-I${devicertl_base_directory}/../include
134+
-I${LLVM_MAIN_SRC_DIR}/../libc
134135
${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
135136
)
136137

@@ -275,6 +276,7 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
275276
target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512)
276277
target_include_directories(${ide_target_name} PRIVATE
277278
${include_directory}
279+
${LLVM_MAIN_SRC_DIR}/../libc
278280
${devicertl_base_directory}/../include
279281
${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
280282
)

offload/DeviceRTL/src/Misc.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include "Allocator.h"
1313
#include "Configuration.h"
1414
#include "DeviceTypes.h"
15+
#include "Shared/RPCOpcodes.h"
16+
#include "shared/rpc.h"
1517

1618
#include "Debug.h"
1719

@@ -110,6 +112,12 @@ void *indirectCallLookup(void *HstPtr) {
110112
return HstPtr;
111113
}
112114

115+
/// The openmp client instance used to communicate with the server.
116+
/// FIXME: This is marked as 'retain' so that it is not removed via
117+
/// `-mlink-builtin-bitcode`
118+
[[gnu::visibility("protected"), gnu::weak,
119+
gnu::retain]] rpc::Client Client asm("__llvm_rpc_client");
120+
113121
} // namespace impl
114122
} // namespace ompx
115123

@@ -156,6 +164,20 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
156164
return;
157165
}
158166
}
167+
168+
unsigned long long __llvm_omp_host_call(void *fn, void *data, size_t size) {
169+
rpc::Client::Port Port = ompx::impl::Client.open<OFFLOAD_HOST_CALL>();
170+
Port.send_n(data, size);
171+
Port.send([=](rpc::Buffer *buffer, uint32_t) {
172+
buffer->data[0] = reinterpret_cast<uintptr_t>(fn);
173+
});
174+
unsigned long long Ret;
175+
Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
176+
Ret = static_cast<unsigned long long>(Buffer->data[0]);
177+
});
178+
Port.close();
179+
return Ret;
180+
}
159181
}
160182

161183
///}

offload/DeviceRTL/src/exports

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ malloc
1515
free
1616
memcmp
1717
printf
18+
__llvm_rpc_client
1819
__assert_fail

offload/include/Shared/RPCOpcodes.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===-- Shared/RPCOpcodes.h - Offload specific RPC opcodes ----- C++ ------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Defines RPC opcodes that are specifically used by the OpenMP device runtime.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef OMPTARGET_SHARED_RPC_OPCODES_H
14+
#define OMPTARGET_SHARED_RPC_OPCODES_H
15+
16+
#define LLVM_OFFLOAD_RPC_BASE 'o'
17+
#define LLVM_OFFLOAD_OPCODE(n) (LLVM_OFFLOAD_RPC_BASE << 24 | n)
18+
19+
typedef enum {
20+
OFFLOAD_HOST_CALL = LLVM_OFFLOAD_OPCODE(0),
21+
} offload_opcode_t;
22+
23+
#undef LLVM_OFFLOAD_OPCODE
24+
25+
#endif // OMPTARGET_SHARED_RPC_OPCODES_H

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,9 +2148,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
21482148

21492149
/// We want to set up the RPC server for host services to the GPU if it is
21502150
/// availible.
2151-
bool shouldSetupRPCServer() const override {
2152-
return libomptargetSupportsRPC();
2153-
}
2151+
bool shouldSetupRPCServer() const override { return true; }
21542152

21552153
/// The RPC interface should have enough space for all availible parallelism.
21562154
uint64_t requestedRPCPortCount() const override {

offload/plugins-nextgen/common/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,15 @@ endif()
2323

2424
# Include the RPC server from the `libc` project if availible.
2525
include(FindLibcCommonUtils)
26+
target_link_libraries(PluginCommon PRIVATE llvm-libc-common-utilities)
2627
if(TARGET llvmlibc_rpc_server AND ${LIBOMPTARGET_GPU_LIBC_SUPPORT})
27-
target_link_libraries(PluginCommon PRIVATE llvmlibc_rpc_server llvm-libc-common-utilities)
28+
target_link_libraries(PluginCommon PRIVATE llvmlibc_rpc_server)
2829
target_compile_definitions(PluginCommon PRIVATE LIBOMPTARGET_RPC_SUPPORT)
2930
elseif(${LIBOMPTARGET_GPU_LIBC_SUPPORT})
3031
find_library(llvmlibc_rpc_server NAMES llvmlibc_rpc_server
3132
PATHS ${LIBOMPTARGET_LLVM_LIBRARY_DIR} NO_DEFAULT_PATH)
3233
if(llvmlibc_rpc_server)
33-
target_link_libraries(PluginCommon PRIVATE ${llvmlibc_rpc_server} llvm-libc-common-utilities)
34+
target_link_libraries(PluginCommon PRIVATE ${llvmlibc_rpc_server})
3435
target_compile_definitions(PluginCommon PRIVATE LIBOMPTARGET_RPC_SUPPORT)
3536
endif()
3637
endif()

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,9 +1580,6 @@ template <typename ResourceRef> class GenericDeviceResourceManagerTy {
15801580
std::deque<ResourceRef> ResourcePool;
15811581
};
15821582

1583-
/// A static check on whether or not we support RPC in libomptarget.
1584-
bool libomptargetSupportsRPC();
1585-
15861583
} // namespace plugin
15871584
} // namespace target
15881585
} // namespace omp

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,11 +2179,3 @@ int32_t GenericPluginTy::get_function(__tgt_device_binary Binary,
21792179
*KernelPtr = &Kernel;
21802180
return OFFLOAD_SUCCESS;
21812181
}
2182-
2183-
bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
2184-
#ifdef LIBOMPTARGET_RPC_SUPPORT
2185-
return true;
2186-
#else
2187-
return false;
2188-
#endif
2189-
}

offload/plugins-nextgen/common/src/RPC.cpp

Lines changed: 63 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,85 @@
99
#include "RPC.h"
1010

1111
#include "Shared/Debug.h"
12+
#include "Shared/RPCOpcodes.h"
1213

1314
#include "PluginInterface.h"
1415

15-
// TODO: This should be included unconditionally and cleaned up.
16-
#if defined(LIBOMPTARGET_RPC_SUPPORT)
1716
#include "shared/rpc.h"
1817
#include "shared/rpc_opcodes.h"
19-
#endif
2018

2119
using namespace llvm;
2220
using namespace omp;
2321
using namespace target;
2422

23+
template <uint32_t NumLanes>
24+
rpc::Status handle_offload_opcodes(plugin::GenericDeviceTy &Device,
25+
rpc::Server::Port &Port) {
26+
27+
switch (Port.get_opcode()) {
28+
case RPC_MALLOC: {
29+
Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
30+
Buffer->data[0] = reinterpret_cast<uintptr_t>(Device.allocate(
31+
Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING));
32+
});
33+
break;
34+
}
35+
case RPC_FREE: {
36+
Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
37+
Device.free(reinterpret_cast<void *>(Buffer->data[0]),
38+
TARGET_ALLOC_DEVICE_NON_BLOCKING);
39+
});
40+
break;
41+
}
42+
case OFFLOAD_HOST_CALL: {
43+
uint64_t Sizes[NumLanes] = {0};
44+
unsigned long long Results[NumLanes] = {0};
45+
void *Args[NumLanes] = {nullptr};
46+
Port.recv_n(Args, Sizes, [&](uint64_t Size) { return new char[Size]; });
47+
Port.recv([&](rpc::Buffer *buffer, uint32_t ID) {
48+
using FuncPtrTy = unsigned long long (*)(void *);
49+
auto Func = reinterpret_cast<FuncPtrTy>(buffer->data[0]);
50+
Results[ID] = Func(Args[ID]);
51+
});
52+
Port.send([&](rpc::Buffer *Buffer, uint32_t ID) {
53+
Buffer->data[0] = static_cast<uint64_t>(Results[ID]);
54+
delete[] reinterpret_cast<char *>(Args[ID]);
55+
});
56+
break;
57+
}
58+
default:
59+
return rpc::UNHANDLED_OPCODE;
60+
break;
61+
}
62+
return rpc::SUCCESS;
63+
}
64+
65+
static rpc::Status handle_offload_opcodes(plugin::GenericDeviceTy &Device,
66+
rpc::Server::Port &Port,
67+
uint32_t NumLanes) {
68+
if (NumLanes == 1)
69+
return handle_offload_opcodes<1>(Device, Port);
70+
else if (NumLanes == 32)
71+
return handle_offload_opcodes<32>(Device, Port);
72+
else if (NumLanes == 64)
73+
return handle_offload_opcodes<64>(Device, Port);
74+
else
75+
return rpc::ERROR;
76+
}
77+
2578
RPCServerTy::RPCServerTy(plugin::GenericPluginTy &Plugin)
2679
: Buffers(Plugin.getNumDevices()) {}
2780

2881
llvm::Expected<bool>
2982
RPCServerTy::isDeviceUsingRPC(plugin::GenericDeviceTy &Device,
3083
plugin::GenericGlobalHandlerTy &Handler,
3184
plugin::DeviceImageTy &Image) {
32-
#ifdef LIBOMPTARGET_RPC_SUPPORT
3385
return Handler.isSymbolInImage(Device, Image, "__llvm_rpc_client");
34-
#else
35-
return false;
36-
#endif
3786
}
3887

3988
Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
4089
plugin::GenericGlobalHandlerTy &Handler,
4190
plugin::DeviceImageTy &Image) {
42-
#ifdef LIBOMPTARGET_RPC_SUPPORT
4391
uint64_t NumPorts =
4492
std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT);
4593
void *RPCBuffer = Device.allocate(
@@ -62,13 +110,9 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
62110
Buffers[Device.getDeviceId()] = RPCBuffer;
63111

64112
return Error::success();
65-
66-
#endif
67-
return Error::success();
68113
}
69114

70115
Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) {
71-
#ifdef LIBOMPTARGET_RPC_SUPPORT
72116
uint64_t NumPorts =
73117
std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT);
74118
rpc::Server Server(NumPorts, Buffers[Device.getDeviceId()]);
@@ -77,41 +121,22 @@ Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) {
77121
if (!Port)
78122
return Error::success();
79123

80-
int Status = rpc::SUCCESS;
81-
switch (Port->get_opcode()) {
82-
case RPC_MALLOC: {
83-
Port->recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
84-
Buffer->data[0] = reinterpret_cast<uintptr_t>(Device.allocate(
85-
Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING));
86-
});
87-
break;
88-
}
89-
case RPC_FREE: {
90-
Port->recv([&](rpc::Buffer *Buffer, uint32_t) {
91-
Device.free(reinterpret_cast<void *>(Buffer->data[0]),
92-
TARGET_ALLOC_DEVICE_NON_BLOCKING);
93-
});
94-
break;
95-
}
96-
default:
97-
// Let the `libc` library handle any other unhandled opcodes.
124+
int Status = handle_offload_opcodes(Device, *Port, Device.getWarpSize());
125+
126+
// Let the `libc` library handle any other unhandled opcodes.
127+
#ifdef LIBOMPTARGET_RPC_SUPPORT
128+
if (Status == rpc::UNHANDLED_OPCODE)
98129
Status = handle_libc_opcodes(*Port, Device.getWarpSize());
99-
break;
100-
}
101-
Port->close();
130+
#endif
102131

132+
Port->close();
103133
if (Status != rpc::SUCCESS)
104134
return createStringError("RPC server given invalid opcode!");
105135

106-
return Error::success();
107-
#endif
108136
return Error::success();
109137
}
110138

111139
Error RPCServerTy::deinitDevice(plugin::GenericDeviceTy &Device) {
112-
#ifdef LIBOMPTARGET_RPC_SUPPORT
113140
Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST);
114141
return Error::success();
115-
#endif
116-
return Error::success();
117142
}

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,9 +496,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
496496

497497
/// We want to set up the RPC server for host services to the GPU if it is
498498
/// availible.
499-
bool shouldSetupRPCServer() const override {
500-
return libomptargetSupportsRPC();
501-
}
499+
bool shouldSetupRPCServer() const override { return true; }
502500

503501
/// The RPC interface should have enough space for all availible parallelism.
504502
uint64_t requestedRPCPortCount() const override {
Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
11
// RUN: %libomptarget-compile-run-and-check-generic
22

3-
// REQUIRES: libc
4-
53
#include <assert.h>
64
#include <omp.h>
75
#include <stdio.h>
86

97
#pragma omp begin declare variant match(device = {kind(gpu)})
108
// Extension provided by the 'libc' project.
11-
unsigned long long rpc_host_call(void *fn, void *args, size_t size);
12-
#pragma omp declare target to(rpc_host_call) device_type(nohost)
9+
unsigned long long __llvm_omp_host_call(void *fn, void *args, size_t size);
10+
#pragma omp declare target to(__llvm_omp_host_call) device_type(nohost)
1311
#pragma omp end declare variant
1412

1513
#pragma omp begin declare variant match(device = {kind(cpu)})
1614
// Dummy host implementation to make this work for all targets.
17-
unsigned long long rpc_host_call(void *fn, void *args, size_t size) {
15+
unsigned long long __llvm_omp_host_call(void *fn, void *args, size_t size) {
1816
return ((unsigned long long (*)(void *))fn)(args);
1917
}
2018
#pragma omp end declare variant
@@ -25,22 +23,14 @@ typedef struct args_s {
2523
} args_t;
2624

2725
// CHECK-DAG: Thread: 0, Block: 0
28-
// CHECK-DAG: Result: 42
2926
// CHECK-DAG: Thread: 1, Block: 0
30-
// CHECK-DAG: Result: 42
3127
// CHECK-DAG: Thread: 0, Block: 1
32-
// CHECK-DAG: Result: 42
3328
// CHECK-DAG: Thread: 1, Block: 1
34-
// CHECK-DAG: Result: 42
3529
// CHECK-DAG: Thread: 0, Block: 2
36-
// CHECK-DAG: Result: 42
3730
// CHECK-DAG: Thread: 1, Block: 2
38-
// CHECK-DAG: Result: 42
3931
// CHECK-DAG: Thread: 0, Block: 3
40-
// CHECK-DAG: Result: 42
4132
// CHECK-DAG: Thread: 1, Block: 3
42-
// CHECK-DAG: Result: 42
43-
long long foo(void *data) {
33+
unsigned long long foo(void *data) {
4434
assert(omp_is_initial_device() && "Not executing on host?");
4535
args_t *args = (args_t *)data;
4636
printf("Thread: %d, Block: %d\n", args->thread_id, args->block_id);
@@ -54,11 +44,19 @@ int main() {
5444
fn_ptr = (void *)&foo;
5545
#pragma omp target update to(fn_ptr)
5646

57-
#pragma omp target teams num_teams(4)
47+
int failed = 0;
48+
#pragma omp target teams num_teams(4) map(tofrom : failed)
5849
#pragma omp parallel num_threads(2)
5950
{
6051
args_t args = {omp_get_thread_num(), omp_get_team_num()};
61-
unsigned long long res = rpc_host_call(fn_ptr, &args, sizeof(args_t));
62-
printf("Result: %d\n", (int)res);
52+
unsigned long long res =
53+
__llvm_omp_host_call(fn_ptr, &args, sizeof(args_t));
54+
if (res != 42)
55+
#pragma omp atomic write
56+
failed = 1;
6357
}
58+
59+
// CHECK: PASS
60+
if (!failed)
61+
printf("PASS\n");
6462
}

0 commit comments

Comments
 (0)