Skip to content

Commit 2bd8484

Browse files
committed
Merge remote-tracking branch 'intel/sycl' into steffen/rework_redu_res_acq
2 parents 5cd62c9 + ca9fea6 commit 2bd8484

File tree

9,146 files changed

+513990
-359248
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

9,146 files changed

+513990
-359248
lines changed

.github/CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ clang/ @intel/dpcpp-cfe-reviewers
66
# Clang driver
77
clang/**/Driver/ @intel/dpcpp-clang-driver-reviewers
88

9+
# Clang tools
10+
clang-tools-extra/ @intel/dpcpp-cfe-reviewers
11+
912
# LLVM-SPIRV translator
1013
llvm-spirv/ @intel/dpcpp-spirv-reviewers
1114

.github/workflows/llvm-bugs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ jobs:
4848
4949
subject: `[Bug ${issue.data.number}] ${issue.data.title}`,
5050
template: "new-github-issue",
51+
'o:tracking-clicks': 'no',
5152
'h:X-Mailgun-Variables': JSON.stringify(payload)
5253
};
5354

.github/workflows/sycl_linux_build_and_test.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232
build_configure_extra_args:
3333
type: string
3434
required: false
35-
default: "--hip --cuda"
35+
default: "--hip --cuda --enable-esimd-emulator"
3636
build_artifact_suffix:
3737
type: string
3838
required: true
@@ -200,6 +200,11 @@ jobs:
200200
image: ${{ matrix.image }}
201201
options: ${{ matrix.container_options }}
202202
steps:
203+
- name: Reset GPU
204+
if: ${{ contains(matrix.config, 'gen9') }}
205+
run: |
206+
sudo mount -t debugfs none /sys/kernel/debug
207+
sudo bash -c 'echo 1 > /sys/kernel/debug/dri/0/i915_wedged'
203208
- run: cp -r /actions .
204209
- name: Register cleanup after job is finished
205210
uses: ./actions/cleanup

.github/workflows/sycl_precommit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ jobs:
3939
build_cache_size: "8G"
4040
build_artifact_suffix: "default"
4141
build_cache_suffix: "default"
42-
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9"
42+
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu"

.mailmap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# Combinations of both are possible too, see
1717
# https://git-scm.com/docs/gitmailmap for format details.
1818
#
19-
# You can commit changes for your own names and email addresses without review.
19+
# You can commit changes for your own names and email addresses without review.
2020
# If you want to add entries for other people, please have them review the
2121
# addition.
2222
#

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,9 @@ class BinaryContext {
489489
void adjustCodePadding();
490490

491491
/// Regular page size.
492-
static constexpr unsigned RegularPageSize = 0x1000;
492+
unsigned RegularPageSize{0x1000};
493+
static constexpr unsigned RegularPageSizeX86 = 0x1000;
494+
static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
493495

494496
/// Huge page size to use.
495497
static constexpr unsigned HugePageSize = 0x200000;

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,10 +1299,11 @@ class BinaryFunction {
12991299
case ELF::R_X86_64_32:
13001300
case ELF::R_X86_64_32S:
13011301
case ELF::R_X86_64_64:
1302+
case ELF::R_X86_64_PC8:
1303+
case ELF::R_X86_64_PC32:
1304+
case ELF::R_X86_64_PC64:
13021305
Relocations[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
13031306
return;
1304-
case ELF::R_X86_64_PC32:
1305-
case ELF::R_X86_64_PC8:
13061307
case ELF::R_X86_64_PLT32:
13071308
case ELF::R_X86_64_GOTPCRELX:
13081309
case ELF::R_X86_64_REX_GOTPCRELX:

bolt/include/bolt/Core/DebugData.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ class DebugInfoBinaryPatcher : public SimpleBinaryPatcher {
490490
PatchBaseClass,
491491
PatchValue32,
492492
PatchValue64to32,
493+
PatchValue32GenericSize,
493494
PatchValue64,
494495
PatchValueVariable,
495496
ReferencePatchValue,
@@ -536,6 +537,22 @@ class DebugInfoBinaryPatcher : public SimpleBinaryPatcher {
536537
uint32_t Value;
537538
};
538539

540+
/// Patch for 4 byte entry, where original entry size is not 4 bytes or 8
541+
/// bytes.
542+
struct DebugPatch32GenericSize : public Patch {
543+
DebugPatch32GenericSize(uint32_t O, uint32_t V, uint32_t OVS)
544+
: Patch(O, DebugPatchKind::PatchValue32GenericSize) {
545+
Value = V;
546+
OldValueSize = OVS;
547+
}
548+
549+
static bool classof(const Patch *Writer) {
550+
return Writer->getKind() == DebugPatchKind::PatchValue32GenericSize;
551+
}
552+
uint32_t Value;
553+
uint32_t OldValueSize;
554+
};
555+
539556
struct DebugPatch64 : public Patch {
540557
DebugPatch64(uint32_t O, uint64_t V)
541558
: Patch(O, DebugPatchKind::PatchValue64) {
@@ -693,6 +710,9 @@ class DebugInfoBinaryPatcher : public SimpleBinaryPatcher {
693710
case DebugPatchKind::PatchValue64to32:
694711
delete reinterpret_cast<DebugPatch64to32 *>(P);
695712
break;
713+
case DebugPatchKind::PatchValue32GenericSize:
714+
delete reinterpret_cast<DebugPatch32GenericSize *>(P);
715+
break;
696716
case DebugPatchKind::PatchValue64:
697717
delete reinterpret_cast<DebugPatch64 *>(P);
698718
break;

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -424,9 +424,7 @@ class MCPlusBuilder {
424424

425425
/// Return a register number that is guaranteed to not match with
426426
/// any real register on the underlying architecture.
427-
virtual MCPhysReg getNoRegister() const {
428-
llvm_unreachable("not implemented");
429-
}
427+
MCPhysReg getNoRegister() const { return MCRegister::NoRegister; }
430428

431429
/// Return a register corresponding to a function integer argument \p ArgNo
432430
/// if the argument is passed in a register. Or return the result of
@@ -528,11 +526,6 @@ class MCPlusBuilder {
528526
return false;
529527
}
530528

531-
virtual bool isMOVSX64rm32(const MCInst &Inst) const {
532-
llvm_unreachable("not implemented");
533-
return false;
534-
}
535-
536529
virtual bool isLeave(const MCInst &Inst) const {
537530
llvm_unreachable("not implemented");
538531
return false;
@@ -1294,6 +1287,16 @@ class MCPlusBuilder {
12941287
return false;
12951288
}
12961289

1290+
/// Convert a move instruction into a conditional move instruction, given a
1291+
/// condition code.
1292+
virtual bool
1293+
convertMoveToConditionalMove(MCInst &Inst, unsigned CC,
1294+
bool AllowStackMemOp = false,
1295+
bool AllowBasePtrStackMemOp = false) const {
1296+
llvm_unreachable("not implemented");
1297+
return false;
1298+
}
1299+
12971300
/// Lower a tail call instruction \p Inst if required by target.
12981301
virtual bool lowerTailCall(MCInst &Inst) {
12991302
llvm_unreachable("not implemented");
@@ -1327,6 +1330,16 @@ class MCPlusBuilder {
13271330
return IndirectBranchType::UNKNOWN;
13281331
}
13291332

1333+
/// Analyze branch \p Instruction in PLT section and try to determine
1334+
/// associated got entry address.
1335+
virtual uint64_t analyzePLTEntry(MCInst &Instruction,
1336+
InstructionIterator Begin,
1337+
InstructionIterator End,
1338+
uint64_t BeginPC) const {
1339+
llvm_unreachable("not implemented");
1340+
return 0;
1341+
}
1342+
13301343
virtual bool analyzeVirtualMethodCall(InstructionIterator Begin,
13311344
InstructionIterator End,
13321345
std::vector<MCInst *> &MethodFetchInsns,

bolt/include/bolt/Core/Relocation.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct Relocation {
4949
/// Used to validate relocation correctness.
5050
uint64_t Value;
5151

52-
/// Return size of the given relocation \p Type.
52+
/// Return size in bytes of the given relocation \p Type.
5353
static size_t getSizeForType(uint64_t Type);
5454

5555
/// Return size of this relocation.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//===- bolt/Passes/CMOVConversion.h ----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass finds the following patterns:
10+
// jcc
11+
// / \
12+
// (empty) mov src, dst
13+
// \ /
14+
//
15+
// and replaces them with:
16+
//
17+
// cmovcc src, dst
18+
//
19+
// The advantage of performing this conversion in BOLT (compared to compiler
20+
// heuristic driven instruction selection) is that BOLT can use LBR
21+
// misprediction information and only convert poorly predictable branches.
22+
// Note that branch misprediction rate is different from branch bias.
23+
// For well-predictable branches, it might be beneficial to leave jcc+mov as is
24+
// from microarchitectural perspective to avoid unneeded dependencies (CMOV
25+
// instruction has a dataflow dependence on flags and both operands).
26+
//
27+
//===----------------------------------------------------------------------===//
28+
29+
#ifndef BOLT_PASSES_CMOVCONVERSION_H
30+
#define BOLT_PASSES_CMOVCONVERSION_H
31+
32+
#include "bolt/Passes/BinaryPasses.h"
33+
34+
namespace llvm {
35+
namespace bolt {
36+
37+
/// Pass for folding eligible hammocks into CMOV's if profitable.
38+
class CMOVConversion : public BinaryFunctionPass {
39+
struct Stats {
40+
/// Record how many possible cases there are.
41+
uint64_t StaticPossible = 0;
42+
uint64_t DynamicPossible = 0;
43+
44+
/// Record how many cases were converted.
45+
uint64_t StaticPerformed = 0;
46+
uint64_t DynamicPerformed = 0;
47+
48+
/// Record how many mispredictions were eliminated.
49+
uint64_t PossibleMP = 0;
50+
uint64_t RemovedMP = 0;
51+
52+
Stats operator+(const Stats &O) {
53+
StaticPossible += O.StaticPossible;
54+
DynamicPossible += O.DynamicPossible;
55+
StaticPerformed += O.StaticPerformed;
56+
DynamicPerformed += O.DynamicPerformed;
57+
PossibleMP += O.PossibleMP;
58+
RemovedMP += O.RemovedMP;
59+
return *this;
60+
}
61+
double getStaticRatio() { return (double)StaticPerformed / StaticPossible; }
62+
double getDynamicRatio() {
63+
return (double)DynamicPerformed / DynamicPossible;
64+
}
65+
double getMPRatio() { return (double)RemovedMP / PossibleMP; }
66+
67+
void dump();
68+
};
69+
// BinaryContext-wide stats
70+
Stats Global;
71+
72+
void runOnFunction(BinaryFunction &Function);
73+
74+
public:
75+
explicit CMOVConversion() : BinaryFunctionPass(false) {}
76+
77+
const char *getName() const override { return "CMOV conversion"; }
78+
79+
void runOnFunctions(BinaryContext &BC) override;
80+
};
81+
82+
} // namespace bolt
83+
} // namespace llvm
84+
85+
#endif

bolt/include/bolt/Rewrite/RewriteInstance.h

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class RewriteInstance {
5656
Error setProfile(StringRef Filename);
5757

5858
/// Run all the necessary steps to read, optimize and rewrite the binary.
59-
void run();
59+
Error run();
6060

6161
/// Diff this instance against another one. Non-const since we may run passes
6262
/// to fold identical functions.
@@ -96,7 +96,7 @@ class RewriteInstance {
9696

9797
/// Read info from special sections. E.g. eh_frame and .gcc_except_table
9898
/// for exception and stack unwinding information.
99-
void readSpecialSections();
99+
Error readSpecialSections();
100100

101101
/// Adjust supplied command-line options based on input data.
102102
void adjustCommandLineOptions();
@@ -218,7 +218,7 @@ class RewriteInstance {
218218

219219
/// Detect addresses and offsets available in the binary for allocating
220220
/// new sections.
221-
void discoverStorage();
221+
Error discoverStorage();
222222

223223
/// Adjust function sizes and set proper maximum size values after the whole
224224
/// symbol table has been processed.
@@ -246,10 +246,23 @@ class RewriteInstance {
246246
/// Disassemble and create function entries for PLT.
247247
void disassemblePLT();
248248

249+
/// Auxiliary function to create .plt BinaryFunction on \p EntryAddres
250+
/// with the \p EntrySize size. \p TargetAddress is the .got entry
251+
/// associated address.
252+
void createPLTBinaryFunction(uint64_t TargetAddress, uint64_t EntryAddress,
253+
uint64_t EntrySize);
254+
255+
/// Disassemble aarch64-specific .plt \p Section auxiliary function
256+
void disassemblePLTSectionAArch64(BinarySection &Section);
257+
258+
/// Disassemble X86-specific .plt \p Section auxiliary function. \p EntrySize
259+
/// is the expected .plt \p Section entry function size.
260+
void disassemblePLTSectionX86(BinarySection &Section, uint64_t EntrySize);
261+
249262
/// ELF-specific part. TODO: refactor into new class.
250-
#define ELF_FUNCTION(FUNC) \
251-
template <typename ELFT> void FUNC(object::ELFObjectFile<ELFT> *Obj); \
252-
void FUNC() { \
263+
#define ELF_FUNCTION(TYPE, FUNC) \
264+
template <typename ELFT> TYPE FUNC(object::ELFObjectFile<ELFT> *Obj); \
265+
TYPE FUNC() { \
253266
if (auto *ELF32LE = dyn_cast<object::ELF32LEObjectFile>(InputFile)) \
254267
return FUNC(ELF32LE); \
255268
if (auto *ELF64LE = dyn_cast<object::ELF64LEObjectFile>(InputFile)) \
@@ -264,25 +277,25 @@ class RewriteInstance {
264277
void patchELFPHDRTable();
265278

266279
/// Create section header table.
267-
ELF_FUNCTION(patchELFSectionHeaderTable);
280+
ELF_FUNCTION(void, patchELFSectionHeaderTable);
268281

269282
/// Create the regular symbol table and patch dyn symbol tables.
270-
ELF_FUNCTION(patchELFSymTabs);
283+
ELF_FUNCTION(void, patchELFSymTabs);
271284

272285
/// Read dynamic section/segment of ELF.
273-
ELF_FUNCTION(readELFDynamic);
286+
ELF_FUNCTION(Error, readELFDynamic);
274287

275288
/// Patch dynamic section/segment of ELF.
276-
ELF_FUNCTION(patchELFDynamic);
289+
ELF_FUNCTION(void, patchELFDynamic);
277290

278291
/// Patch .got
279-
ELF_FUNCTION(patchELFGOT);
292+
ELF_FUNCTION(void, patchELFGOT);
280293

281294
/// Patch allocatable relocation sections.
282-
ELF_FUNCTION(patchELFAllocatableRelaSections);
295+
ELF_FUNCTION(void, patchELFAllocatableRelaSections);
283296

284297
/// Finalize memory image of section header string table.
285-
ELF_FUNCTION(finalizeSectionStringTable);
298+
ELF_FUNCTION(void, finalizeSectionStringTable);
286299

287300
/// Return a name of the input file section in the output file.
288301
template <typename ELFObjType, typename ELFShdrTy>
@@ -473,7 +486,7 @@ class RewriteInstance {
473486
/// multiple variants generated by different linkers.
474487
struct PLTSectionInfo {
475488
const char *Name;
476-
uint64_t EntrySize;
489+
uint64_t EntrySize{0};
477490
};
478491

479492
/// Different types of X86-64 PLT sections.
@@ -485,10 +498,8 @@ class RewriteInstance {
485498
};
486499

487500
/// AArch64 PLT sections.
488-
const PLTSectionInfo AArch64_PLTSections[2] = {
489-
{ ".plt", 16 },
490-
{ nullptr, 0 }
491-
};
501+
const PLTSectionInfo AArch64_PLTSections[3] = {
502+
{".plt"}, {".iplt"}, {nullptr}};
492503

493504
/// Return PLT information for a section with \p SectionName or nullptr
494505
/// if the section is not PLT.

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ extern llvm::cl::OptionCategory BoltInstrCategory;
3030
extern llvm::cl::OptionCategory HeatmapCategory;
3131

3232
extern llvm::cl::opt<unsigned> AlignText;
33+
extern llvm::cl::opt<unsigned> AlignFunctions;
3334
extern llvm::cl::opt<bool> AggregateOnly;
3435
extern llvm::cl::opt<unsigned> BucketsPerLine;
3536
extern llvm::cl::opt<bool> DiffOnly;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
101101
InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
102102
MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)) {
103103
Relocation::Arch = this->TheTriple->getArch();
104+
RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
104105
PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
105106
}
106107

0 commit comments

Comments
 (0)