Skip to content

Commit 9c0632f

Browse files
committed
merge main into amd-staging
lands and reverts 134401d [Offload] Move RPC server handling to a dedicated thread (llvm#112988) bd8a818 [Offload] Add cuLaunchHostFunc to dynamic cuda
2 parents 46dcee6 + 04d5608 commit 9c0632f

File tree

395 files changed

+18494
-2927
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

395 files changed

+18494
-2927
lines changed

.ci/generate-buildkite-pipeline-premerge

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,8 @@ if [[ "${windows_projects}" != "" ]]; then
128128
limit: 2
129129
timeout_in_minutes: 150
130130
env:
131-
CC: 'cl'
132-
CXX: 'cl'
133-
LD: 'link'
131+
MAX_PARALLEL_COMPILE_JOBS: '16'
132+
MAX_PARALLEL_LINK_JOBS: '4'
134133
commands:
135134
- 'C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64'
136135
- 'bash .ci/monolithic-windows.sh "$(echo ${windows_projects} | tr ' ' ';')" "$(echo ${windows_check_targets})"'

.ci/monolithic-windows.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ echo "--- cmake"
5050
pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt
5151
pip install -q -r "${MONOREPO_ROOT}"/.ci/requirements.txt
5252

53+
export CC=cl
54+
export CXX=cl
55+
export LD=link
56+
5357
# The CMAKE_*_LINKER_FLAGS to disable the manifest come from research
5458
# on fixing a build reliability issue on the build server, please
5559
# see https://github.com/llvm/llvm-project/pull/82393 and
@@ -72,8 +76,8 @@ cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
7276
-D CMAKE_EXE_LINKER_FLAGS="/MANIFEST:NO" \
7377
-D CMAKE_MODULE_LINKER_FLAGS="/MANIFEST:NO" \
7478
-D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" \
75-
-D LLVM_PARALLEL_COMPILE_JOBS=16 \
76-
-D LLVM_PARALLEL_LINK_JOBS=4
79+
-D LLVM_PARALLEL_COMPILE_JOBS=${MAX_PARALLEL_COMPILE_JOBS} \
80+
-D LLVM_PARALLEL_LINK_JOBS=${MAX_PARALLEL_LINK_JOBS}
7781

7882
echo "--- ninja"
7983
# Targets are not escaped as they are passed as separate arguments.

.github/workflows/containers/github-action-ci-windows/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ RUN choco install -y handle
108108
109109
RUN pip3 install pywin32 buildbot-worker==2.8.4
110110
111-
ARG RUNNER_VERSION=2.321.0
111+
ARG RUNNER_VERSION=2.322.0
112112
ENV RUNNER_VERSION=$RUNNER_VERSION
113113
114114
RUN powershell -Command \

.github/workflows/containers/github-action-ci/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ WORKDIR /home/gha
9696

9797
FROM ci-container as ci-container-agent
9898

99-
ENV GITHUB_RUNNER_VERSION=2.321.0
99+
ENV GITHUB_RUNNER_VERSION=2.322.0
100100

101101
RUN mkdir actions-runner && \
102102
cd actions-runner && \

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,11 +1426,12 @@ class MCPlusBuilder {
14261426
}
14271427

14281428
/// Creates an indirect call to the function within the \p DirectCall PLT
1429-
/// stub. The function's memory location is pointed by the \p TargetLocation
1429+
/// stub. The function's address location is pointed by the \p TargetLocation
14301430
/// symbol.
1431+
/// Move instruction annotations from \p DirectCall to the indirect call.
14311432
virtual InstructionListType
1432-
createIndirectPltCall(const MCInst &DirectCall,
1433-
const MCSymbol *TargetLocation, MCContext *Ctx) {
1433+
createIndirectPLTCall(MCInst &&DirectCall, const MCSymbol *TargetLocation,
1434+
MCContext *Ctx) {
14341435
llvm_unreachable("not implemented");
14351436
return {};
14361437
}

bolt/lib/Passes/PLTCall.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ Error PLTCall::runOnFunctions(BinaryContext &BC) {
7070
const BinaryFunction *CalleeBF = BC.getFunctionForSymbol(CallSymbol);
7171
if (!CalleeBF || !CalleeBF->isPLTFunction())
7272
continue;
73-
const InstructionListType NewCode = BC.MIB->createIndirectPltCall(
74-
*II, CalleeBF->getPLTSymbol(), BC.Ctx.get());
73+
const InstructionListType NewCode = BC.MIB->createIndirectPLTCall(
74+
std::move(*II), CalleeBF->getPLTSymbol(), BC.Ctx.get());
7575
II = BB.replaceInstruction(II, NewCode);
7676
assert(!NewCode.empty() && "PLT Call replacement must be non-empty");
7777
std::advance(II, NewCode.size() - 1);

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
834834
/// # of this BB)
835835
/// br x0 # Indirect jump instruction
836836
///
837+
/// Return true on successful jump table instruction sequence match, false
838+
/// otherwise.
837839
bool analyzeIndirectBranchFragment(
838840
const MCInst &Inst,
839841
DenseMap<const MCInst *, SmallVector<MCInst *, 4>> &UDChain,
@@ -842,6 +844,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
842844
// Expect AArch64 BR
843845
assert(Inst.getOpcode() == AArch64::BR && "Unexpected opcode");
844846

847+
JumpTable = nullptr;
848+
845849
// Match the indirect branch pattern for aarch64
846850
SmallVector<MCInst *, 4> &UsesRoot = UDChain[&Inst];
847851
if (UsesRoot.size() == 0 || UsesRoot[0] == nullptr)
@@ -879,8 +883,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
879883
// Parsed as ADDXrs reg:x8 reg:x8 reg:x12 imm:0
880884
return false;
881885
}
882-
assert(DefAdd->getOpcode() == AArch64::ADDXrx &&
883-
"Failed to match indirect branch!");
886+
if (DefAdd->getOpcode() != AArch64::ADDXrx)
887+
return false;
884888

885889
// Validate ADD operands
886890
int64_t OperandExtension = DefAdd->getOperand(3).getImm();
@@ -897,8 +901,8 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
897901
// ldr w7, [x6]
898902
// add x6, x6, w7, sxtw => no shift amount
899903
// br x6
900-
errs() << "BOLT-WARNING: "
901-
"Failed to match indirect branch: ShiftVAL != 2 \n";
904+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: "
905+
"failed to match indirect branch: ShiftVAL != 2\n");
902906
return false;
903907
}
904908

@@ -909,7 +913,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
909913
else if (ExtendType == AArch64_AM::SXTW)
910914
ScaleValue = 4LL;
911915
else
912-
llvm_unreachable("Failed to match indirect branch! (fragment 3)");
916+
return false;
913917

914918
// Match an ADR to load base address to be used when addressing JT targets
915919
SmallVector<MCInst *, 4> &UsesAdd = UDChain[DefAdd];
@@ -920,18 +924,15 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
920924
return false;
921925
}
922926
MCInst *DefBaseAddr = UsesAdd[1];
923-
assert(DefBaseAddr->getOpcode() == AArch64::ADR &&
924-
"Failed to match indirect branch pattern! (fragment 3)");
927+
if (DefBaseAddr->getOpcode() != AArch64::ADR)
928+
return false;
925929

926930
PCRelBase = DefBaseAddr;
927931
// Match LOAD to load the jump table (relative) target
928932
const MCInst *DefLoad = UsesAdd[2];
929-
assert(mayLoad(*DefLoad) &&
930-
"Failed to match indirect branch load pattern! (1)");
931-
assert((ScaleValue != 1LL || isLDRB(*DefLoad)) &&
932-
"Failed to match indirect branch load pattern! (2)");
933-
assert((ScaleValue != 2LL || isLDRH(*DefLoad)) &&
934-
"Failed to match indirect branch load pattern! (3)");
933+
if (!mayLoad(*DefLoad) || (ScaleValue == 1LL && !isLDRB(*DefLoad)) ||
934+
(ScaleValue == 2LL && !isLDRH(*DefLoad)))
935+
return false;
935936

936937
// Match ADD that calculates the JumpTable Base Address (not the offset)
937938
SmallVector<MCInst *, 4> &UsesLoad = UDChain[DefLoad];
@@ -941,7 +942,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
941942
isRegToRegMove(*DefJTBaseAdd, From, To)) {
942943
// Sometimes base address may have been defined in another basic block
943944
// (hoisted). Return with no jump table info.
944-
JumpTable = nullptr;
945945
return true;
946946
}
947947

@@ -953,24 +953,27 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
953953
// adr x12, 0x247b30 <__gettextparse+0x5b0>
954954
// add x13, x12, w13, sxth #2
955955
// br x13
956-
errs() << "BOLT-WARNING: Failed to match indirect branch: "
957-
"nop/adr instead of adrp/add \n";
956+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: failed to match indirect branch: "
957+
"nop/adr instead of adrp/add\n");
958958
return false;
959959
}
960960

961-
assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
962-
"Failed to match jump table base address pattern! (1)");
961+
if (DefJTBaseAdd->getOpcode() != AArch64::ADDXri) {
962+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: failed to match jump table base "
963+
"address pattern! (1)\n");
964+
return false;
965+
}
963966

964967
if (DefJTBaseAdd->getOperand(2).isImm())
965968
Offset = DefJTBaseAdd->getOperand(2).getImm();
966969
SmallVector<MCInst *, 4> &UsesJTBaseAdd = UDChain[DefJTBaseAdd];
967970
const MCInst *DefJTBasePage = UsesJTBaseAdd[1];
968971
if (DefJTBasePage == nullptr || isLoadFromStack(*DefJTBasePage)) {
969-
JumpTable = nullptr;
970972
return true;
971973
}
972-
assert(DefJTBasePage->getOpcode() == AArch64::ADRP &&
973-
"Failed to match jump table base page pattern! (2)");
974+
if (DefJTBasePage->getOpcode() != AArch64::ADRP)
975+
return false;
976+
974977
if (DefJTBasePage->getOperand(1).isExpr())
975978
JumpTable = DefJTBasePage->getOperand(1).getExpr();
976979
return true;
@@ -1263,7 +1266,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
12631266
return true;
12641267
}
12651268

1266-
InstructionListType createIndirectPltCall(const MCInst &DirectCall,
1269+
InstructionListType createIndirectPLTCall(MCInst &&DirectCall,
12671270
const MCSymbol *TargetLocation,
12681271
MCContext *Ctx) override {
12691272
const bool IsTailCall = isTailCall(DirectCall);
@@ -1297,8 +1300,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
12971300
MCInst InstCall;
12981301
InstCall.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR);
12991302
InstCall.addOperand(MCOperand::createReg(AArch64::X17));
1300-
if (IsTailCall)
1301-
setTailCall(InstCall);
1303+
moveAnnotations(std::move(DirectCall), InstCall);
13021304
Code.emplace_back(InstCall);
13031305

13041306
return Code;

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1605,7 +1605,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
16051605
return true;
16061606
}
16071607

1608-
InstructionListType createIndirectPltCall(const MCInst &DirectCall,
1608+
InstructionListType createIndirectPLTCall(MCInst &&DirectCall,
16091609
const MCSymbol *TargetLocation,
16101610
MCContext *Ctx) override {
16111611
assert((DirectCall.getOpcode() == X86::CALL64pcrel32 ||

bolt/test/AArch64/exceptions-plt.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Verify that PLT optimization in BOLT preserves exception-handling info.
2+
3+
// REQUIRES: system-linux
4+
5+
// RUN: %clangxx %cxxflags -O1 -Wl,-q,-znow %s -o %t.exe
6+
// RUN: llvm-bolt %t.exe -o %t.bolt.exe --plt=all --print-only=.*main.* \
7+
// RUN: --print-finalized 2>&1 | FileCheck %s
8+
9+
// CHECK-LABEL: Binary Function
10+
// CHECK: adrp {{.*}}__cxa_throw
11+
// CHECK-NEXT: ldr {{.*}}__cxa_throw
12+
// CHECK-NEXT: blr x17 {{.*}} handler: {{.*}} PLTCall:
13+
14+
int main() {
15+
try {
16+
throw new int;
17+
} catch (...) {
18+
return 0;
19+
}
20+
return 1;
21+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
## Verify that BOLT does not crash while encountering instruction sequence that
2+
## does not perfectly match jump table pattern.
3+
4+
# REQUIRES: system-linux
5+
6+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
7+
# RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
8+
# RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg 2>&1 | FileCheck %s
9+
10+
.section .text
11+
.align 4
12+
.globl _start
13+
.type _start, %function
14+
_start:
15+
sub w0, w0, #0x4a
16+
## The address loaded into x22 is undefined. However, the instructions that
17+
## follow ldr, use the x22 address as a regular jump table.
18+
ldr x22, [x29, #0x98]
19+
ldrb w0, [x22, w0, uxtw]
20+
adr x1, #12
21+
add x0, x1, w0, sxtb #2
22+
br x0
23+
# CHECK: br x0 # UNKNOWN
24+
.L0:
25+
ret
26+
.size _start, .-_start
27+
28+
## Force relocation mode.
29+
.reloc 0, R_AARCH64_NONE

bolt/test/AArch64/test-indirect-branch.s

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33

44
// clang-format off
55

6-
// REQUIRES: system-linux
6+
// REQUIRES: system-linux, asserts
7+
78
// RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
89
// RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
9-
// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict\
10+
// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg --strict --debug-only=mcplus \
1011
// RUN: -v=1 2>&1 | FileCheck %s
1112

1213
// Pattern 1: there is no shift amount after the 'add' instruction.
@@ -39,7 +40,7 @@ _start:
3940
// svc #0
4041

4142
// Pattern 1
42-
// CHECK: BOLT-WARNING: Failed to match indirect branch: ShiftVAL != 2
43+
// CHECK: BOLT-DEBUG: failed to match indirect branch: ShiftVAL != 2
4344
.globl test1
4445
.type test1, %function
4546
test1:
@@ -57,7 +58,7 @@ test1_2:
5758
ret
5859

5960
// Pattern 2
60-
// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
61+
// CHECK: BOLT-DEBUG: failed to match indirect branch: nop/adr instead of adrp/add
6162
.globl test2
6263
.type test2, %function
6364
test2:

bolt/test/runtime/exceptions-plt.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Verify that PLT optimization in BOLT preserves exception-handling info.
2+
3+
// REQUIRES: system-linux
4+
5+
// RUN: %clangxx %cxxflags -O1 -Wl,-q,-znow %s -o %t.exe
6+
// RUN: llvm-bolt %t.exe -o %t.bolt.exe --plt=all
7+
// RUN: %t.bolt.exe
8+
9+
int main() {
10+
try {
11+
throw new int;
12+
} catch (...) {
13+
return 0;
14+
}
15+
return 1;
16+
}

clang/CMakeLists.txt

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -897,58 +897,6 @@ if (CLANG_ENABLE_BOOTSTRAP)
897897
endforeach()
898898
endif()
899899

900-
set(CLANG_BOLT OFF CACHE STRING "Apply BOLT optimization to Clang. \
901-
May be specified as Instrument or Perf or LBR to use a particular profiling \
902-
mechanism.")
903-
string(TOUPPER "${CLANG_BOLT}" CLANG_BOLT)
904-
905-
if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
906-
set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
907-
set(CLANG_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CLANG_BOLT_INSTRUMENTED})
908-
set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
909-
910-
# Pass extra flag in no-LBR mode
911-
if (CLANG_BOLT STREQUAL "PERF")
912-
set(BOLT_NO_LBR "-nl")
913-
endif()
914-
915-
if (CLANG_BOLT STREQUAL "INSTRUMENT")
916-
# Instrument clang with BOLT
917-
add_custom_target(clang-instrumented
918-
DEPENDS ${CLANG_INSTRUMENTED}
919-
)
920-
add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
921-
DEPENDS clang llvm-bolt
922-
COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
923-
-instrument --instrumentation-file-append-pid
924-
--instrumentation-file=${BOLT_FDATA}
925-
COMMENT "Instrumenting clang binary with BOLT"
926-
USES_TERMINAL
927-
VERBATIM
928-
)
929-
add_custom_target(clang-bolt-training-deps DEPENDS clang-instrumented)
930-
else() # perf or LBR
931-
add_custom_target(clang-bolt-training-deps DEPENDS clang)
932-
endif()
933-
934-
# Optimize original (pre-bolt) Clang using the collected profile
935-
add_custom_target(clang-bolt
936-
DEPENDS clang-bolt-profile
937-
COMMAND ${CMAKE_COMMAND} -E rename $<TARGET_FILE:clang> ${CLANG_PATH}-prebolt
938-
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CLANG_PATH}-prebolt ${CLANG_PATH}++-prebolt
939-
COMMAND llvm-bolt ${CLANG_PATH}-prebolt
940-
-o $<TARGET_FILE:clang>
941-
-data ${BOLT_FDATA}
942-
-reorder-blocks=ext-tsp -reorder-functions=cdsort -split-functions
943-
-split-all-cold -split-eh -dyno-stats -use-gnu-stack
944-
-update-debug-sections
945-
${BOLT_NO_LBR}
946-
COMMENT "Optimizing Clang with BOLT"
947-
USES_TERMINAL
948-
VERBATIM
949-
)
950-
endif()
951-
952900
if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION)
953901
add_subdirectory(utils/ClangVisualizers)
954902
endif()

0 commit comments

Comments
 (0)