Skip to content

Commit 4d50803

Browse files
[libomptarget] Build DeviceRTL for amdgpu
Passes same tests as the current deviceRTL. Includes cmake change from D111987. CI is showing a different set of pass/fails to local, committing this without the tests enabled by default while debugging that difference. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D112227
1 parent d736002 commit 4d50803

21 files changed

+90
-13
lines changed

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
252252
std::string BitcodeSuffix;
253253
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
254254
options::OPT_fno_openmp_target_new_runtime, false))
255-
BitcodeSuffix = "new-amdgcn-" + GPUArch;
255+
BitcodeSuffix = "new-amdgpu-" + GPUArch;
256256
else
257257
BitcodeSuffix = "amdgcn-" + GPUArch;
258258

openmp/libomptarget/DeviceRTL/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,5 @@ foreach(sm ${nvptx_sm_list})
226226
endforeach()
227227

228228
foreach(mcpu ${amdgpu_mcpus})
229-
# require D112227 or similar to enable the compilation for amdgpu
230-
# compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
229+
compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
231230
endforeach()

openmp/libomptarget/DeviceRTL/src/Configuration.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ using namespace _OMP;
2020

2121
#pragma omp declare target
2222

23-
extern uint32_t __omp_rtl_debug_kind;
23+
extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
2424

25-
// TOOD: We want to change the name as soon as the old runtime is gone.
25+
// TODO: We want to change the name as soon as the old runtime is gone.
2626
DeviceEnvironmentTy CONSTANT(omptarget_device_environment)
2727
__attribute__((used));
2828

openmp/libomptarget/DeviceRTL/src/Synchronization.cpp

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,23 @@ uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) {
6868
///{
6969
#pragma omp begin declare variant match(device = {arch(amdgcn)})
7070

71-
uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
72-
return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, "");
71+
uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
72+
// builtin_amdgcn_atomic_inc32 should expand to this switch when
73+
// passed a runtime value, but does not do so yet. Workaround here.
74+
switch (Ordering) {
75+
default:
76+
__builtin_unreachable();
77+
case __ATOMIC_RELAXED:
78+
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
79+
case __ATOMIC_ACQUIRE:
80+
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
81+
case __ATOMIC_RELEASE:
82+
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
83+
case __ATOMIC_ACQ_REL:
84+
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
85+
case __ATOMIC_SEQ_CST:
86+
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
87+
}
7388
}
7489

7590
uint32_t SHARED(namedBarrierTracker);
@@ -126,18 +141,65 @@ void namedBarrier() {
126141
fence::team(__ATOMIC_RELEASE);
127142
}
128143

144+
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
145+
// so that it is usable within a template environment and so that a runtime
146+
// value of the memory order is expanded to this switch within clang/llvm.
147+
void fenceTeam(int Ordering) {
148+
switch (Ordering) {
149+
default:
150+
__builtin_unreachable();
151+
case __ATOMIC_ACQUIRE:
152+
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
153+
case __ATOMIC_RELEASE:
154+
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
155+
case __ATOMIC_ACQ_REL:
156+
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
157+
case __ATOMIC_SEQ_CST:
158+
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
159+
}
160+
}
161+
void fenceKernel(int Ordering) {
162+
switch (Ordering) {
163+
default:
164+
__builtin_unreachable();
165+
case __ATOMIC_ACQUIRE:
166+
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
167+
case __ATOMIC_RELEASE:
168+
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
169+
case __ATOMIC_ACQ_REL:
170+
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
171+
case __ATOMIC_SEQ_CST:
172+
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
173+
}
174+
}
175+
void fenceSystem(int Ordering) {
176+
switch (Ordering) {
177+
default:
178+
__builtin_unreachable();
179+
case __ATOMIC_ACQUIRE:
180+
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
181+
case __ATOMIC_RELEASE:
182+
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
183+
case __ATOMIC_ACQ_REL:
184+
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
185+
case __ATOMIC_SEQ_CST:
186+
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
187+
}
188+
}
189+
129190
void syncWarp(__kmpc_impl_lanemask_t) {
130191
// AMDGCN doesn't need to sync threads in a warp
131192
}
132193

133194
void syncThreads() { __builtin_amdgcn_s_barrier(); }
134195
void syncThreadsAligned() { syncThreads(); }
135196

136-
void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); }
137-
138-
void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); }
139-
140-
void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); }
197+
// TODO: Don't have wavefront lane locks. Possibly can't have them.
198+
void unsetLock(omp_lock_t *) { __builtin_trap(); }
199+
int testLock(omp_lock_t *) { __builtin_trap(); }
200+
void initLock(omp_lock_t *) { __builtin_trap(); }
201+
void destroyLock(omp_lock_t *) { __builtin_trap(); }
202+
void setLock(omp_lock_t *) { __builtin_trap(); }
141203

142204
#pragma omp end declare variant
143205
///}

openmp/libomptarget/test/mapping/data_member_ref.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// amdgcn does not have printf definition
44
// XFAIL: amdgcn-amd-amdhsa
5+
// XFAIL: amdgcn-amd-amdhsa-newRTL
56

67
#include <stdio.h>
78

openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// amdgcn does not have printf definition
44
// XFAIL: amdgcn-amd-amdhsa
5+
// XFAIL: amdgcn-amd-amdhsa-newRTL
56

67
#include <cstdio>
78
#include <cstdlib>

openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// amdgcn does not have printf definition
44
// XFAIL: amdgcn-amd-amdhsa
5+
// XFAIL: amdgcn-amd-amdhsa-newRTL
56

67
#include <cstdio>
78
#include <cstdlib>

openmp/libomptarget/test/mapping/delete_inf_refcount.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// fails with error message 'Unable to generate target entries' on amdgcn
44
// XFAIL: amdgcn-amd-amdhsa
5+
// XFAIL: amdgcn-amd-amdhsa-newRTL
56

67
#include <stdio.h>
78
#include <omp.h>

openmp/libomptarget/test/mapping/lambda_by_value.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// amdgcn does not have printf definition
44
// XFAIL: amdgcn-amd-amdhsa
5+
// XFAIL: amdgcn-amd-amdhsa-newRTL
56

67
#include <stdio.h>
78
#include <stdint.h>

openmp/libomptarget/test/mapping/ompx_hold/struct.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
// amdgcn does not have printf definition
55
// XFAIL: amdgcn-amd-amdhsa
6+
// XFAIL: amdgcn-amd-amdhsa-newRTL
67

78
#include <omp.h>
89
#include <stdio.h>

openmp/libomptarget/test/mapping/ptr_and_obj_motion.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// amdgcn does not have printf definition
44
// XFAIL: amdgcn-amd-amdhsa
5+
// XFAIL: amdgcn-amd-amdhsa-newRTL
56

67
#include <stdio.h>
78

openmp/libomptarget/test/mapping/reduction_implicit_map.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// amdgcn does not have printf definition
44
// UNSUPPORTED: amdgcn-amd-amdhsa
5+
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
56

67
#include <stdio.h>
78

openmp/libomptarget/test/offloading/bug49021.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// Wrong results on amdgcn
44
// UNSUPPORTED: amdgcn-amd-amdhsa
5+
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
56

67
#include <iostream>
78

openmp/libomptarget/test/offloading/bug49334.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
// Currently hangs on amdgpu
44
// UNSUPPORTED: amdgcn-amd-amdhsa
5-
5+
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
66
// UNSUPPORTED: x86_64-pc-linux-gnu
77

88
#include <cassert>

openmp/libomptarget/test/offloading/bug50022.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// RUN: %libomptarget-compilexx-and-run-generic
22

33
// UNSUPPORTED: amdgcn-amd-amdhsa
4+
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
45

56
#include <cassert>
67
#include <iostream>

openmp/libomptarget/test/offloading/global_constructor.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
// Fails in DAGToDAG on an address space problem
44
// UNSUPPORTED: amdgcn-amd-amdhsa
5+
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
56

67
#include <cmath>
78
#include <cstdio>

openmp/libomptarget/test/offloading/host_as_target.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
// amdgcn does not have printf definition
1111
// XFAIL: amdgcn-amd-amdhsa
12+
// XFAIL: amdgcn-amd-amdhsa-newRTL
1213

1314
#include <stdio.h>
1415
#include <omp.h>

openmp/libomptarget/test/unified_shared_memory/api.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
// Fails on amdgcn with error: GPU Memory Error
66
// XFAIL: amdgcn-amd-amdhsa
7+
// XFAIL: amdgcn-amd-amdhsa-newRTL
78

89
#include <stdio.h>
910
#include <omp.h>

openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
// Fails on amdgcn with error: GPU Memory Error
77
// XFAIL: amdgcn-amd-amdhsa
8+
// XFAIL: amdgcn-amd-amdhsa-newRTL
89

910
#include <omp.h>
1011
#include <stdio.h>

openmp/libomptarget/test/unified_shared_memory/close_modifier.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
// amdgcn does not have printf definition
77
// XFAIL: amdgcn-amd-amdhsa
8+
// XFAIL: amdgcn-amd-amdhsa-newRTL
89

910
#include <omp.h>
1011
#include <stdio.h>

openmp/libomptarget/test/unified_shared_memory/shared_update.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
// amdgcn does not have printf definition
66
// XFAIL: amdgcn-amd-amdhsa
7+
// XFAIL: amdgcn-amd-amdhsa-newRTL
78

89
#include <stdio.h>
910
#include <omp.h>

0 commit comments

Comments
 (0)