Skip to content

Commit 5511b18

Browse files
committed
[HIP] support 128 bit int division
Currently nvcc supports 128 bit int division in device code. This patch adds support of 128 bit int division to HIP. It builds lib functions for 128 bit division in compiler-rt for amdgcn target. Then links compiler-rt with -mlink-bitcode-file. It adds support of archive of bitcode to -mlink-bitcode-file. Fixes: #71223 Fixes: SWDEV-426193
1 parent c6ecbcb commit 5511b18

File tree

12 files changed

+222
-12
lines changed

12 files changed

+222
-12
lines changed

clang/include/clang/CodeGen/CodeGenAction.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,16 @@
99
#ifndef LLVM_CLANG_CODEGEN_CODEGENACTION_H
1010
#define LLVM_CLANG_CODEGEN_CODEGENACTION_H
1111

12+
#include "clang/Basic/CodeGenOptions.h"
1213
#include "clang/Frontend/FrontendAction.h"
1314
#include <memory>
1415

1516
namespace llvm {
1617
class LLVMContext;
1718
class Module;
19+
namespace object {
20+
class Archive;
21+
}
1822
}
1923

2024
namespace clang {
@@ -54,8 +58,15 @@ class CodeGenAction : public ASTFrontendAction {
5458
std::unique_ptr<llvm::Module> loadModule(llvm::MemoryBufferRef MBRef);
5559

5660
/// Load bitcode modules to link into our module from the options.
61+
/// \returns true if error happens.
5762
bool loadLinkModules(CompilerInstance &CI);
5863

64+
/// Add bitcode modules in an archive to LinkModules.
65+
/// \returns true if error happens.
66+
bool addArchiveToLinkModules(llvm::object::Archive *Archive,
67+
const CodeGenOptions::BitcodeFileToLink &F,
68+
CompilerInstance &CI);
69+
5970
protected:
6071
/// Create a new code generation action. If the optional \p _VMContext
6172
/// parameter is supplied, the action uses it without taking ownership,

clang/lib/CodeGen/CodeGenAction.cpp

Lines changed: 99 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/IRReader/IRReader.h"
4242
#include "llvm/LTO/LTOBackend.h"
4343
#include "llvm/Linker/Linker.h"
44+
#include "llvm/Object/Archive.h"
4445
#include "llvm/Pass.h"
4546
#include "llvm/Support/MemoryBuffer.h"
4647
#include "llvm/Support/SourceMgr.h"
@@ -940,33 +941,120 @@ CodeGenAction::~CodeGenAction() {
940941
delete VMContext;
941942
}
942943

944+
bool CodeGenAction::addArchiveToLinkModules(
945+
llvm::object::Archive *Archive, const CodeGenOptions::BitcodeFileToLink &F,
946+
CompilerInstance &CI) {
947+
Error Err = Error::success();
948+
949+
for (auto &Child : Archive->children(Err)) {
950+
Expected<llvm::MemoryBufferRef> ChildBufOrErr = Child.getMemoryBufferRef();
951+
if (!ChildBufOrErr) {
952+
handleAllErrors(ChildBufOrErr.takeError(),
953+
[&](const llvm::ErrorInfoBase &EIB) {
954+
CI.getDiagnostics().Report(diag::err_cannot_open_file)
955+
<< F.Filename << EIB.message();
956+
});
957+
LinkModules.clear();
958+
return true;
959+
}
960+
auto ChildBuffer = llvm::MemoryBuffer::getMemBufferCopy(
961+
ChildBufOrErr->getBuffer(), ChildBufOrErr->getBufferIdentifier());
962+
963+
if (!ChildBuffer) {
964+
handleAllErrors(ChildBufOrErr.takeError(),
965+
[&](const llvm::ErrorInfoBase &EIB) {
966+
CI.getDiagnostics().Report(diag::err_cannot_open_file)
967+
<< F.Filename << EIB.message();
968+
});
969+
LinkModules.clear();
970+
return true;
971+
}
972+
973+
Expected<std::unique_ptr<llvm::Module>> ChildModuleOrErr =
974+
getOwningLazyBitcodeModule(std::move(ChildBuffer), *VMContext);
975+
if (!ChildModuleOrErr) {
976+
handleAllErrors(ChildModuleOrErr.takeError(),
977+
[&](const llvm::ErrorInfoBase &EIB) {
978+
CI.getDiagnostics().Report(diag::err_cannot_open_file)
979+
<< F.Filename << EIB.message();
980+
});
981+
LinkModules.clear();
982+
return true;
983+
}
984+
985+
LinkModules.push_back({std::move(ChildModuleOrErr.get()), F.PropagateAttrs,
986+
F.Internalize, F.LinkFlags});
987+
}
988+
if (Err) {
989+
CI.getDiagnostics().Report(diag::err_cannot_open_file)
990+
<< F.Filename << toString(std::move(Err));
991+
LinkModules.clear();
992+
return true;
993+
}
994+
return false;
995+
}
996+
943997
bool CodeGenAction::loadLinkModules(CompilerInstance &CI) {
944998
if (!LinkModules.empty())
945999
return false;
9461000

9471001
for (const CodeGenOptions::BitcodeFileToLink &F :
9481002
CI.getCodeGenOpts().LinkBitcodeFiles) {
949-
auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
950-
if (!BCBuf) {
1003+
1004+
auto BCBufOrErr = CI.getFileManager().getBufferForFile(F.Filename);
1005+
if (!BCBufOrErr) {
9511006
CI.getDiagnostics().Report(diag::err_cannot_open_file)
952-
<< F.Filename << BCBuf.getError().message();
1007+
<< F.Filename << BCBufOrErr.getError().message();
9531008
LinkModules.clear();
9541009
return true;
9551010
}
9561011

1012+
auto &BCBuf = *BCBufOrErr;
1013+
9571014
Expected<std::unique_ptr<llvm::Module>> ModuleOrErr =
958-
getOwningLazyBitcodeModule(std::move(*BCBuf), *VMContext);
959-
if (!ModuleOrErr) {
960-
handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
961-
CI.getDiagnostics().Report(diag::err_cannot_open_file)
962-
<< F.Filename << EIB.message();
963-
});
1015+
getOwningLazyBitcodeModule(std::move(BCBuf), *VMContext);
1016+
1017+
if (ModuleOrErr) {
1018+
LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
1019+
F.Internalize, F.LinkFlags});
1020+
continue;
1021+
}
1022+
1023+
// If parsing as bitcode failed, clear the error and try to parse as an
1024+
// archive.
1025+
handleAllErrors(ModuleOrErr.takeError(),
1026+
[&](const llvm::ErrorInfoBase &EIB) {});
1027+
1028+
Expected<std::unique_ptr<llvm::object::Binary>> BinOrErr =
1029+
llvm::object::createBinary(BCBuf->getMemBufferRef(), VMContext);
1030+
1031+
if (!BinOrErr) {
1032+
handleAllErrors(BinOrErr.takeError(),
1033+
[&](const llvm::ErrorInfoBase &EIB) {
1034+
CI.getDiagnostics().Report(diag::err_cannot_open_file)
1035+
<< F.Filename << EIB.message();
1036+
});
1037+
LinkModules.clear();
1038+
return true;
1039+
}
1040+
1041+
std::unique_ptr<llvm::object::Binary> &Bin = *BinOrErr;
1042+
1043+
if (Bin->isArchive()) {
1044+
llvm::object::Archive *Archive =
1045+
llvm::cast<llvm::object::Archive>(Bin.get());
1046+
if (addArchiveToLinkModules(Archive, F, CI))
1047+
return true;
1048+
} else {
1049+
// It's not an archive, and we failed to parse it as bitcode, so report
1050+
// an error.
1051+
CI.getDiagnostics().Report(diag::err_cannot_open_file)
1052+
<< F.Filename << "Unrecognized file format";
9641053
LinkModules.clear();
9651054
return true;
9661055
}
967-
LinkModules.push_back({std::move(ModuleOrErr.get()), F.PropagateAttrs,
968-
F.Internalize, F.LinkFlags});
9691056
}
1057+
9701058
return false;
9711059
}
9721060

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Support/Alignment.h"
2222
#include "llvm/Support/FileSystem.h"
2323
#include "llvm/Support/Path.h"
24+
#include "llvm/Support/VirtualFileSystem.h"
2425
#include "llvm/TargetParser/TargetParser.h"
2526

2627
using namespace clang::driver;
@@ -403,6 +404,10 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
403404
BCLibs.emplace_back(AsanRTL, /*ShouldInternalize=*/false);
404405
}
405406

407+
auto BuiltinCRT = getCompilerRT(DriverArgs, "builtins");
408+
if (getVFS().exists(BuiltinCRT))
409+
BCLibs.emplace_back(BuiltinCRT, /*ShouldInternalize=*/false);
410+
406411
// Add the HIP specific bitcode library.
407412
BCLibs.push_back(RocmInstallation->getHIPPath());
408413

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Prepare archive of bitcode file.
2+
3+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm-bc \
4+
// RUN: -fcuda-is-device \
5+
// RUN: -disable-llvm-passes -DIS_LIB -o %t.bc -xhip %s
6+
7+
// RUN: rm -f %t.a
8+
// RUN: llvm-ar rcs %t.a %t.bc
9+
10+
// Link archive of bitcode file.
11+
12+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
13+
// RUN: -mlink-builtin-bitcode %t.a -emit-llvm \
14+
// RUN: -disable-llvm-passes -o - -xhip %s \
15+
// RUN: | FileCheck %s
16+
17+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
18+
// RUN: -mlink-bitcode-file %t.a -emit-llvm \
19+
// RUN: -disable-llvm-passes -o - -xhip %s \
20+
// RUN: | FileCheck %s
21+
22+
// Test empty file as arhive.
23+
24+
// RUN: rm -f %t.a
25+
// RUN: touch %t.a
26+
27+
// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
28+
// RUN: -mlink-builtin-bitcode %t.a -emit-llvm \
29+
// RUN: -disable-llvm-passes -o - -xhip %s 2>&1\
30+
// RUN: | FileCheck %s -check-prefix=INVLID
31+
32+
// Test invalid arhive.
33+
34+
// RUN: echo "!<arch>\nfake" >%t.a
35+
// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
36+
// RUN: -mlink-builtin-bitcode %t.a -emit-llvm \
37+
// RUN: -disable-llvm-passes -o - -xhip %s 2>&1 \
38+
// RUN: | FileCheck %s -check-prefix=INVLID
39+
40+
// Test archive of invalid bitcode file.
41+
42+
// RUN: echo "BC\xC0\xDE" >%t.bc
43+
// RUN: rm -f %t.a
44+
// RUN: llvm-ar rcs %t.a %t.bc
45+
// RUN: not %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
46+
// RUN: -mlink-builtin-bitcode %t.a -emit-llvm \
47+
// RUN: -disable-llvm-passes -o - -xhip %s 2>&1 \
48+
// RUN: | FileCheck %s -check-prefix=INVLID-BC
49+
50+
#include "Inputs/cuda.h"
51+
52+
#ifdef IS_LIB
53+
__device__ void libfun() {}
54+
#else
55+
__device__ void libfun();
56+
__global__ void kern() {
57+
libfun();
58+
}
59+
#endif
60+
61+
// CHECK: define {{.*}}void @_Z6libfunv()
62+
// INVLID: fatal error: cannot open file {{.*}}: The file was not recognized as a valid object file
63+
// INVLID-BC: fatal error: cannot open file {{.*}}: Invalid bitcode signature
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
!<arch>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
!<arch>
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// REQUIRES: x86-registered-target
2+
// REQUIRES: amdgpu-registered-target
3+
4+
// Check device compiler-rt is linked when available.
5+
6+
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx1010 \
7+
// RUN: -nogpuinc -nostdinc --offload-device-only --rocm-path=%S/Inputs/rocm \
8+
// RUN: -resource-dir=%S/Inputs/device_compiler_rt_resource_dir \
9+
// RUN: %s 2>&1 | FileCheck %s
10+
11+
// CHECK: "-mlink-bitcode-file" "{{[^"]+(/|\\\\)device_compiler_rt_resource_dir(/|\\\\)lib(64)?(/|\\\\)amdgcn-amd-amdhsa(/|\\\\).*}}libclang_rt.builtins.a"
12+
// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR:[^"]+(/|\\\\)rocm(/|\\\\)amdgcn(/|\\\\).*]]hip.bc"
13+
// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]ocml.bc"
14+
// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]ockl.bc"
15+
// CHECK-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_isa_version_{{[0-9]+}}.bc"

compiler-rt/cmake/Modules/CompilerRTUtils.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,8 @@ function(get_compiler_rt_target arch variable)
456456
endif()
457457
endif()
458458
set(target "${arch}${triple_suffix}")
459+
elseif(${arch} STREQUAL "amdgcn")
460+
set(target "amdgcn-amd-amdhsa")
459461
else()
460462
set(target "${arch}${triple_suffix}")
461463
endif()

compiler-rt/cmake/base-config-ix.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ macro(test_targets)
194194
endif()
195195
endif()
196196

197+
set(COMPILER_RT_ENABLE_TARGET_AMDGCN OFF CACHE BOOL "Option to enable AMDGCN in Compiler RT")
198+
if (COMPILER_RT_ENABLE_TARGET_AMDGCN)
199+
add_default_target_arch("amdgcn")
200+
endif()
201+
197202
# Generate the COMPILER_RT_SUPPORTED_ARCH list.
198203
if(ANDROID)
199204
# Examine compiler output to determine target architecture.

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ set(SPARCV9 sparcv9)
6565
set(WASM32 wasm32)
6666
set(WASM64 wasm64)
6767
set(VE ve)
68+
set(AMDGCN amdgcn)
6869

6970
if(APPLE)
7071
set(ARM64 arm64 arm64e)
@@ -76,7 +77,7 @@ set(ALL_BUILTIN_SUPPORTED_ARCH
7677
${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
7778
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
7879
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
79-
${WASM32} ${WASM64} ${VE} ${LOONGARCH64})
80+
${WASM32} ${WASM64} ${VE} ${LOONGARCH64} ${AMDGCN})
8081

8182
include(CompilerRTUtils)
8283
include(CompilerRTDarwinUtils)

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,13 @@ set(aarch64_SOURCES
560560
aarch64/fp_mode.c
561561
)
562562

563+
set(amdgcn_SOURCES
564+
divti3.c
565+
udivmodti4.c
566+
truncdfbf2.c
567+
truncsfbf2.c
568+
)
569+
563570
if(COMPILER_RT_HAS_ASM_SME AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
564571
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c)
565572
message(STATUS "AArch64 SME ABI routines enabled")
@@ -846,6 +853,15 @@ else ()
846853
list(APPEND BUILTIN_CFLAGS_${arch} -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
847854
endif()
848855

856+
if (${arch} STREQUAL "amdgcn")
857+
list(APPEND BUILTIN_CFLAGS_${arch}
858+
--target=amdgcn-amd-amdhsa
859+
-emit-llvm
860+
-nogpuinc
861+
-nogpulib
862+
-Xclang -mcode-object-version=none )
863+
endif()
864+
849865
# For RISCV32, we must force enable int128 for compiling long
850866
# double routines.
851867
if(COMPILER_RT_ENABLE_SOFTWARE_INT128 OR "${arch}" STREQUAL "riscv32")

compiler-rt/lib/builtins/int_lib.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#else
2727
#define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
2828
#endif
29+
#elif __AMDGPU__
30+
#define COMPILER_RT_ABI __attribute__((amdgpu_lib_fun, weak))
2931
#else
3032
#define COMPILER_RT_ABI
3133
#endif

0 commit comments

Comments
 (0)