Skip to content

Commit 5b8bf76

Browse files
doru1004ronlieb
authored andcommitted
Add upstream driver steps.
The default driver is the current driver i.e. the one using the opaque linker. To use the upstream driver i.e. the one using the clang-linker-wrapper set the following environment variable to 1: export CLANG_USE_LINKER_WRAPPER=1 Change-Id: I13f542fd1afc16eedfcf4c4c7d1b9d0d72daa059
1 parent eda7d3b commit 5b8bf76

File tree

18 files changed

+52
-144
lines changed

18 files changed

+52
-144
lines changed

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,11 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
665665
StringRef TcTargetID) {
666666
// Add target ID features to -target-feature options. No diagnostics should
667667
// be emitted here since invalid target ID is diagnosed at other places.
668-
StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
668+
StringRef TargetID;
669+
if (Args.hasArg(options::OPT_mcpu_EQ))
670+
TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
671+
else if (Args.hasArg(options::OPT_march_EQ))
672+
TargetID = Args.getLastArgValue(options::OPT_march_EQ);
669673

670674
// Use this toolchain's TargetID if mcpu is not defined
671675
if (TargetID.empty() && !TcTargetID.empty())

clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,8 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
402402
StringRef GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
403403
assert(!GPUArch.empty() && "Must have an explicit GPU arch.");
404404

405-
CC1Args.push_back("-target-cpu");
406-
CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
405+
assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
406+
"Only OpenMP offloading kinds are supported.");
407407

408408
// Extract all the -m options
409409
std::vector<llvm::StringRef> Features;
@@ -435,7 +435,10 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
435435
options::OPT_fno_gpu_allow_device_init, false))
436436
CC1Args.push_back("-fgpu-allow-device-init");
437437

438-
CC1Args.push_back("-fcuda-allow-variadic-functions");
438+
// TODO: check if flag is needed for the opaque linker case
439+
const char *UseLinkerWrapper = std::getenv("CLANG_USE_LINKER_WRAPPER");
440+
if (!UseLinkerWrapper || atoi(UseLinkerWrapper) == 0)
441+
CC1Args.push_back("-fcuda-allow-variadic-functions");
439442

440443
// Default to "hidden" visibility, as object level linking will not be
441444
// supported for the foreseeable future.

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9577,9 +9577,15 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
95779577
}
95789578
}
95799579

9580-
if (!OpenMPTCs.empty() &&
9581-
Args.hasFlag(options::OPT_opaque_offload_linker,
9582-
options::OPT_no_opaque_offload_linker, isAMDGPU)) {
9580+
// Overwrite the default driver choice using an env var:
9581+
// CLANG_USE_LINKER_WRAPPER which can be 0 or 1.
9582+
bool UseOpaqueOffloadLinker = isAMDGPU;
9583+
if (const char *UseLinkerWrapper = std::getenv("CLANG_USE_LINKER_WRAPPER"))
9584+
UseOpaqueOffloadLinker = !(atoi(UseLinkerWrapper) == 1);
9585+
9586+
if (!OpenMPTCs.empty() && Args.hasFlag(options::OPT_opaque_offload_linker,
9587+
options::OPT_no_opaque_offload_linker,
9588+
UseOpaqueOffloadLinker)) {
95839589
ConstructOpaqueJob(C, JA, Output, Inputs, Args, TC->getTriple(),
95849590
LinkingOutput);
95859591
return;

clang/test/Driver/amdgpu-openmp-toolchain.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@
9494

9595
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
9696
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
97-
// CHECK-TARGET-ID: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a:sramecc-:xnack+" "-target-feature" "-sramecc" "-target-feature" "+xnack"
97+
// CHECK-TARGET-ID: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a" "-target-feature" "-sramecc" "-target-feature" "+xnack"
98+
9899
// CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp,feature=-sramecc,feature=+xnack
99100

100101
// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \

clang/test/Driver/openmp-runtimelib.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// REQUIRES: amdgpu-registered-target
22

3-
// Asan-Debug: /lib-debug/asan/libomptarget
4-
// Asan-Devel: /lib/asan/libomptarget
5-
// Asan-Perf: /lib-perf/asan/libomptarget
3+
// Asan-Debug: /lib-debug/asan
4+
// Asan-Devel: /lib/asan
5+
// Asan-Perf: /lib-perf/asan
66

77
// RUN: %clang -### -fopenmp -nogpuinc -nogpulib --offload-arch=gfx90a -fopenmp-runtimelib=lib-debug %s -O3 2>&1 \
88
// RUN: | FileCheck -check-prefixes=Debug %s
@@ -31,8 +31,8 @@
3131
// RUN: %clang -### -fopenmp -nogpuinc -nogpulib --offload-arch=gfx90a:xnack+ -fopenmp-target-fast -fsanitize=address -shared-libasan %s -O3 2>&1 \
3232
// RUN: | FileCheck -check-prefix=Asan-Devel %s
3333

34-
// Debug: /lib-debug/libomptarget
35-
// Perf: /lib-perf/libomptarget
36-
// Devel: /lib/../runtimes/runtimes-bins/offload/libomptarget
37-
// Default: /lib/../runtimes/runtimes-bins/offload/libomptarget
38-
// Error: clang: error: unsupported argument
34+
// Debug: /lib-debug
35+
// Perf: /lib-perf
36+
// Devel: /../lib
37+
// Default: /../lib
38+
// Error: clang: error: unsupported argument 'oopsy' to option '-fopenmp-runtimelib='

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,27 +1211,10 @@ DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
12111211

12121212
// Set the subarchitecture and target triple for this compilation.
12131213
const OptTable &Tbl = getOptTable();
1214-
StringRef Triple = Input.front().getBinary()->getTriple();
1215-
std::string AMDGPUFeatures;
1216-
1217-
if (llvm::Triple(Triple).isAMDGPU()) {
1218-
// Extract Features from the binary and append them in arch
1219-
auto Features = getTargetFeatures(Input);
1220-
for (auto feature : Features) {
1221-
AMDGPUFeatures.append(feature.substr(1, feature.size()) +
1222-
feature.substr(0, 1) + ":");
1223-
}
1224-
}
1214+
DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
1215+
Args.MakeArgString(Input.front().getBinary()->getArch()));
12251216
DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
1226-
Args.MakeArgString(Triple));
1227-
if (llvm::Triple(Triple).isAMDGPU() && !AMDGPUFeatures.empty())
1228-
DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
1229-
Args.MakeArgString(
1230-
Input.front().getBinary()->getArch() + ":" +
1231-
AMDGPUFeatures.substr(0, AMDGPUFeatures.size() - 1)));
1232-
else
1233-
DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
1234-
Args.MakeArgString(Input.front().getBinary()->getArch()));
1217+
Args.MakeArgString(Input.front().getBinary()->getTriple()));
12351218

12361219
// If every input file is bitcode we have whole program visibility as we
12371220
// do only support static linking with bitcode.

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5598,13 +5598,12 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
55985598
UsedAssumedInformation, AA::Interprocedural);
55995599
continue;
56005600
}
5601-
#if 0 // fixme snap2 mi-teams nest_call_par2
56025601
if (auto *CI = dyn_cast<CallBase>(&I)) {
5603-
if (CI->isIndirectCall())
5602+
if (CI->isIndirectCall() && !F.getName().contains("__kmpc_parallel_51") &&
5603+
!F.getName().contains("__kmpc_parallel_spmd"))
56045604
A.getOrCreateAAFor<AAIndirectCallInfo>(
56055605
IRPosition::callsite_function(*CI));
56065606
}
5607-
#endif
56085607
if (auto *SI = dyn_cast<StoreInst>(&I)) {
56095608
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
56105609
continue;

offload/DeviceRTL/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ set(src_files
123123
# propagation. That said, we will run the vectorizer again after the runtime
124124
# has been linked into the user program.
125125
set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false )
126-
set(link_opt_flags -O3 -openmp-opt-disable -vectorize-slp=false )
126+
set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false )
127127
set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports)
128128

129129
# If the user built with the GPU C library enabled we will use that instead.

offload/DeviceRTL/include/State.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ struct ThreadStateTy {
116116
}
117117
};
118118

119-
extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
119+
extern ThreadStateTy **ThreadStates;
120120
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
121121

122122
/// Initialize the state machinery. Must be called by all threads.

offload/DeviceRTL/include/extra_allocators.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ void __kmpc_flush_acqrel(IdentTy *Loc);
8888
void *__kmpc_task_allow_completion_event(IdentTy *loc_ref, uint32_t gtid,
8989
TaskDescriptorTy *task);
9090
///}
91-
9291
} // extern "C"
9392

9493
/// Extra API exposed by ROCm

offload/DeviceRTL/src/State.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -267,10 +267,7 @@ void state::TeamStateTy::assertEqual(TeamStateTy &Other) const {
267267
}
268268

269269
state::TeamStateTy SHARED(ompx::state::TeamState);
270-
271-
__attribute__((loader_uninitialized))
272-
state::ThreadStateTy *ompx::state::ThreadStates[mapping::MaxThreadsPerTeam];
273-
#pragma omp allocate(ompx::state::ThreadStates) allocator(omp_pteam_mem_alloc)
270+
state::ThreadStateTy **SHARED(ompx::state::ThreadStates);
274271

275272
namespace {
276273

@@ -294,11 +291,10 @@ void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
294291
SharedMemorySmartStack.init(IsSPMD);
295292
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
296293
TeamState.init(IsSPMD);
294+
ThreadStates = nullptr;
297295
KernelEnvironmentPtr = &KernelEnvironment;
298296
KernelLaunchEnvironmentPtr = &KernelLaunchEnvironment;
299297
}
300-
301-
ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
302298
}
303299

304300
KernelEnvironmentTy &state::getKernelEnvironment() {
@@ -312,11 +308,12 @@ KernelLaunchEnvironmentTy &state::getKernelLaunchEnvironment() {
312308
void state::enterDataEnvironment(IdentTy *Ident) {
313309
ASSERT(config::mayUseThreadStates(),
314310
"Thread state modified while explicitly disabled!");
311+
if (!config::mayUseThreadStates())
312+
return;
315313

316314
unsigned TId = mapping::getThreadIdInBlock();
317315
ThreadStateTy *NewThreadState = static_cast<ThreadStateTy *>(
318316
memory::allocGlobal(sizeof(ThreadStateTy), "ThreadStates alloc"));
319-
#ifdef FIXME // breaks snap_red nested_par3 nest_call_par2
320317
uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
321318
if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
322319
uint32_t Bytes =
@@ -332,7 +329,6 @@ void state::enterDataEnvironment(IdentTy *Ident) {
332329
ASSERT(atomic::load(ThreadStatesBitsPtr, atomic::seq_cst),
333330
"Expected valid thread states bit!");
334331
}
335-
#endif
336332
NewThreadState->init(ThreadStates[TId]);
337333
TeamState.HasThreadState = true;
338334
ThreadStates[TId] = NewThreadState;
@@ -347,6 +343,9 @@ void state::exitDataEnvironment() {
347343
}
348344

349345
void state::resetStateForThread(uint32_t TId) {
346+
if (!config::mayUseThreadStates())
347+
return;
348+
350349
if (OMP_LIKELY(!TeamState.HasThreadState || !ThreadStates[TId]))
351350
return;
352351

offload/test/api/assert.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// XFAIL: amdgcn-amd-amdhsa
21
// RUN: %libomptarget-compile-run-and-check-generic
32
// RUN: %libomptarget-compileopt-run-and-check-generic
43

offload/test/api/omp_dynamic_shared_memory_mixed.c

Lines changed: 0 additions & 53 deletions
This file was deleted.

offload/test/hsa/xnack-check.cpp

Lines changed: 0 additions & 31 deletions
This file was deleted.

offload/test/jit/empty_kernel_lvl1.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// XFAIL: amdgcn-amd-amdhsa
21
// clang-format off
32
// RUN: %libomptarget-compileopt-generic -fopenmp-target-jit \
43
// RUN: -DTGT1_DIRECTIVE="target"

offload/test/jit/type_punning.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// XFAIL: amdgcn-amd-amdhsa
21
// clang-format off
32
//
43
// RUN: %libomptarget-compileopt-generic -fopenmp-target-jit

offload/test/lit.cfg

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ if 'OMP_TARGET_OFFLOAD' in os.environ:
3434
if 'HSA_ENABLE_SDMA' in os.environ:
3535
config.environment['HSA_ENABLE_SDMA'] = os.environ['HSA_ENABLE_SDMA']
3636

37+
if 'CLANG_USE_LINKER_WRAPPER' in os.environ:
38+
config.environment['CLANG_USE_LINKER_WRAPPER'] = os.environ['CLANG_USE_LINKER_WRAPPER']
39+
3740
# Architectures like gfx942 may or may not be APUs so an additional environment
3841
# variable is required as some tests can be APU specific.
3942
if 'IS_APU' in os.environ:
@@ -146,11 +149,6 @@ elif config.libomptarget_current_target.startswith('amdgcn'):
146149
config.available_features.add('apu')
147150
if is_mi200:
148151
config.available_features.add('mi200')
149-
hsa_inc_dir = config.llvm_lib_directory + "/../../../rocr-runtime/src/inc"
150-
hsa_lib_dir = config.llvm_lib_directory + "/../../llvm-project/runtimes/rocr-runtime-prefix/src/rocr-runtime-build"
151-
if os.path.isdir(hsa_inc_dir) and os.path.isdir(hsa_lib_dir):
152-
config.available_features.add('hsa')
153-
config.test_flags = config.test_flags + " -I " + hsa_inc_dir + " -L " + hsa_lib_dir
154152

155153

156154
# Setup environment to find dynamic library at runtime
@@ -205,7 +203,7 @@ def add_libraries(source):
205203
# error:
206204
# error: Linking globals named '_ZN4ompx5state9TeamStateE': symbol multiply defined!
207205
# return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"
208-
return source
206+
return source + " " + config.llvm_library_intdir + "/../runtimes/runtimes-bins/offload/libomptarget.devicertl.a"
209207

210208
def get_arch_from_target(libomptarget_target):
211209
if libomptarget_target.startswith('amdgcn'):
@@ -265,6 +263,8 @@ for libomptarget_target in config.libomptarget_all_targets:
265263
"%libomptarget-compilexx-" + libomptarget_target))
266264
config.substitutions.append(("%libomptarget-compilexxx-generic-force-usm",
267265
"%libomptarget-compilexxx-force-usm-" + libomptarget_target))
266+
config.substitutions.append(("%libomptarget-compilexxx-generic-cuda",
267+
"%clangxxx-cuda-" + libomptarget_target))
268268
config.substitutions.append(("%libomptarget-compile-generic",
269269
"%libomptarget-compile-" + libomptarget_target))
270270
config.substitutions.append(("%libomptarget-compile-fortran-generic",
@@ -377,6 +377,8 @@ for libomptarget_target in config.libomptarget_all_targets:
377377
config.substitutions.append(("%flang-" + libomptarget_target, \
378378
"%flang %openmp_flags %flags %flags_flang --offload-arch=" +\
379379
get_arch_from_target(libomptarget_target)))
380+
config.substitutions.append(("%clangxxx-cuda-" + libomptarget_target, \
381+
"%clangxx %flags %flags_clang -foffload-via-llvm --offload-arch=native"))
380382
config.substitutions.append(("%fcheck-" + libomptarget_target, \
381383
config.libomptarget_filecheck + " %s"))
382384
else:

offload/test/offloading/static_linking.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// XFAIL: amdgcn-amd-amdhsa
21
// RUN: %libomptarget-compile-generic -DLIBRARY -c -o %t.o
32
// RUN: ar rcs %t.a %t.o
43
// RUN: %libomptarget-compile-generic %t.a && %libomptarget-run-generic 2>&1 | %fcheck-generic

0 commit comments

Comments
 (0)