Skip to content

Commit 77cafed

Browse files
committed
Merge branch 'sycl' into async_barrier
2 parents f110d25 + 1fe5eaa commit 77cafed

File tree

10,044 files changed

+550949
-367643
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

10,044 files changed

+550949
-367643
lines changed

.github/CODEOWNERS

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ clang/ @intel/dpcpp-cfe-reviewers
66
# Clang driver
77
clang/**/Driver/ @intel/dpcpp-clang-driver-reviewers
88

9+
# Clang tools
10+
clang-tools-extra/ @intel/dpcpp-cfe-reviewers
11+
912
# LLVM-SPIRV translator
1013
llvm-spirv/ @intel/dpcpp-spirv-reviewers
1114

@@ -45,7 +48,7 @@ xptifw/ @intel/llvm-reviewers-runtime
4548
llvm/ @intel/dpcpp-tools-reviewers
4649

4750
# Clang offload tools
48-
clang/tools/clang-offload-*/ @intel/dpcpp-tools-reviewers
51+
clang/tools/clang-offload-*/ @sndmitriev @intel/dpcpp-tools-reviewers
4952

5053
# Explicit SIMD
5154
ESIMD/ @intel/dpcpp-esimd-reviewers

.github/workflows/llvm-bugs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ jobs:
4848
4949
subject: `[Bug ${issue.data.number}] ${issue.data.title}`,
5050
template: "new-github-issue",
51+
'o:tracking-clicks': 'no',
5152
'h:X-Mailgun-Variables': JSON.stringify(payload)
5253
};
5354

.github/workflows/sycl_linux_build_and_test.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ on:
3232
build_configure_extra_args:
3333
type: string
3434
required: false
35-
default: "--hip --cuda"
35+
default: "--hip --cuda --enable-esimd-emulator"
3636
build_artifact_suffix:
3737
type: string
3838
required: true
@@ -200,6 +200,11 @@ jobs:
200200
image: ${{ matrix.image }}
201201
options: ${{ matrix.container_options }}
202202
steps:
203+
- name: Reset GPU
204+
if: ${{ contains(matrix.config, 'gen9') }}
205+
run: |
206+
sudo mount -t debugfs none /sys/kernel/debug
207+
sudo bash -c 'echo 1 > /sys/kernel/debug/dri/0/i915_wedged'
203208
- run: cp -r /actions .
204209
- name: Register cleanup after job is finished
205210
uses: ./actions/cleanup

.github/workflows/sycl_precommit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ jobs:
3939
build_cache_size: "8G"
4040
build_artifact_suffix: "default"
4141
build_cache_suffix: "default"
42-
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9"
42+
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu"

.mailmap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# Combinations of both are possible too, see
1717
# https://git-scm.com/docs/gitmailmap for format details.
1818
#
19-
# You can commit changes for your own names and email addresses without review.
19+
# You can commit changes for your own names and email addresses without review.
2020
# If you want to add entries for other people, please have them review the
2121
# addition.
2222
#

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ Intel LLVM-based projects:
1010

1111
[![](https://spec.oneapi.io/oneapi-logo-white-scaled.jpg)](https://www.oneapi.io/)
1212

13-
[![Linux Post Commit Checks](https://github.com/intel/llvm/workflows/Linux%20Post%20Commit%20Checks/badge.svg)](https://github.com/intel/llvm/actions?query=workflow%3A%22Linux+Post+Commit+Checks%22)
14-
[![Generate Doxygen documentation](https://github.com/intel/llvm/workflows/Generate%20Doxygen%20documentation/badge.svg)](https://github.com/intel/llvm/actions?query=workflow%3A%22Generate+Doxygen+documentation%22)
13+
[![SYCL Post Commit](https://github.com/intel/llvm/actions/workflows/sycl_post_commit.yml/badge.svg?branch=sycl)](https://github.com/intel/llvm/actions/workflows/sycl_post_commit.yml)
14+
[![Generate Doxygen documentation](https://github.com/intel/llvm/actions/workflows/gh_pages.yml/badge.svg?branch=sycl)](https://github.com/intel/llvm/actions/workflows/gh_pages.yml)
1515

1616
The Data Parallel C++ or DPC++ is a LLVM-based compiler project that implements
1717
compiler and runtime support for the SYCL\* language. The project is hosted in

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,9 @@ class BinaryContext {
489489
void adjustCodePadding();
490490

491491
/// Regular page size.
492-
static constexpr unsigned RegularPageSize = 0x1000;
492+
unsigned RegularPageSize{0x1000};
493+
static constexpr unsigned RegularPageSizeX86 = 0x1000;
494+
static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
493495

494496
/// Huge page size to use.
495497
static constexpr unsigned HugePageSize = 0x200000;

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,10 +1299,11 @@ class BinaryFunction {
12991299
case ELF::R_X86_64_32:
13001300
case ELF::R_X86_64_32S:
13011301
case ELF::R_X86_64_64:
1302+
case ELF::R_X86_64_PC8:
1303+
case ELF::R_X86_64_PC32:
1304+
case ELF::R_X86_64_PC64:
13021305
Relocations[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
13031306
return;
1304-
case ELF::R_X86_64_PC32:
1305-
case ELF::R_X86_64_PC8:
13061307
case ELF::R_X86_64_PLT32:
13071308
case ELF::R_X86_64_GOTPCRELX:
13081309
case ELF::R_X86_64_REX_GOTPCRELX:

bolt/include/bolt/Core/DebugData.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ class DebugInfoBinaryPatcher : public SimpleBinaryPatcher {
490490
PatchBaseClass,
491491
PatchValue32,
492492
PatchValue64to32,
493+
PatchValue32GenericSize,
493494
PatchValue64,
494495
PatchValueVariable,
495496
ReferencePatchValue,
@@ -536,6 +537,22 @@ class DebugInfoBinaryPatcher : public SimpleBinaryPatcher {
536537
uint32_t Value;
537538
};
538539

540+
/// Patch for 4 byte entry, where original entry size is not 4 bytes or 8
541+
/// bytes.
542+
struct DebugPatch32GenericSize : public Patch {
543+
DebugPatch32GenericSize(uint32_t O, uint32_t V, uint32_t OVS)
544+
: Patch(O, DebugPatchKind::PatchValue32GenericSize) {
545+
Value = V;
546+
OldValueSize = OVS;
547+
}
548+
549+
static bool classof(const Patch *Writer) {
550+
return Writer->getKind() == DebugPatchKind::PatchValue32GenericSize;
551+
}
552+
uint32_t Value;
553+
uint32_t OldValueSize;
554+
};
555+
539556
struct DebugPatch64 : public Patch {
540557
DebugPatch64(uint32_t O, uint64_t V)
541558
: Patch(O, DebugPatchKind::PatchValue64) {
@@ -693,6 +710,9 @@ class DebugInfoBinaryPatcher : public SimpleBinaryPatcher {
693710
case DebugPatchKind::PatchValue64to32:
694711
delete reinterpret_cast<DebugPatch64to32 *>(P);
695712
break;
713+
case DebugPatchKind::PatchValue32GenericSize:
714+
delete reinterpret_cast<DebugPatch32GenericSize *>(P);
715+
break;
696716
case DebugPatchKind::PatchValue64:
697717
delete reinterpret_cast<DebugPatch64 *>(P);
698718
break;

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -424,9 +424,7 @@ class MCPlusBuilder {
424424

425425
/// Return a register number that is guaranteed to not match with
426426
/// any real register on the underlying architecture.
427-
virtual MCPhysReg getNoRegister() const {
428-
llvm_unreachable("not implemented");
429-
}
427+
MCPhysReg getNoRegister() const { return MCRegister::NoRegister; }
430428

431429
/// Return a register corresponding to a function integer argument \p ArgNo
432430
/// if the argument is passed in a register. Or return the result of
@@ -528,11 +526,6 @@ class MCPlusBuilder {
528526
return false;
529527
}
530528

531-
virtual bool isMOVSX64rm32(const MCInst &Inst) const {
532-
llvm_unreachable("not implemented");
533-
return false;
534-
}
535-
536529
virtual bool isLeave(const MCInst &Inst) const {
537530
llvm_unreachable("not implemented");
538531
return false;
@@ -1289,7 +1282,18 @@ class MCPlusBuilder {
12891282

12901283
/// Replace instruction with a shorter version that could be relaxed later
12911284
/// if needed.
1292-
virtual bool shortenInstruction(MCInst &Inst) const {
1285+
virtual bool shortenInstruction(MCInst &Inst,
1286+
const MCSubtargetInfo &STI) const {
1287+
llvm_unreachable("not implemented");
1288+
return false;
1289+
}
1290+
1291+
/// Convert a move instruction into a conditional move instruction, given a
1292+
/// condition code.
1293+
virtual bool
1294+
convertMoveToConditionalMove(MCInst &Inst, unsigned CC,
1295+
bool AllowStackMemOp = false,
1296+
bool AllowBasePtrStackMemOp = false) const {
12931297
llvm_unreachable("not implemented");
12941298
return false;
12951299
}
@@ -1327,6 +1331,16 @@ class MCPlusBuilder {
13271331
return IndirectBranchType::UNKNOWN;
13281332
}
13291333

1334+
/// Analyze branch \p Instruction in PLT section and try to determine
1335+
/// associated got entry address.
1336+
virtual uint64_t analyzePLTEntry(MCInst &Instruction,
1337+
InstructionIterator Begin,
1338+
InstructionIterator End,
1339+
uint64_t BeginPC) const {
1340+
llvm_unreachable("not implemented");
1341+
return 0;
1342+
}
1343+
13301344
virtual bool analyzeVirtualMethodCall(InstructionIterator Begin,
13311345
InstructionIterator End,
13321346
std::vector<MCInst *> &MethodFetchInsns,

bolt/include/bolt/Core/Relocation.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct Relocation {
4949
/// Used to validate relocation correctness.
5050
uint64_t Value;
5151

52-
/// Return size of the given relocation \p Type.
52+
/// Return size in bytes of the given relocation \p Type.
5353
static size_t getSizeForType(uint64_t Type);
5454

5555
/// Return size of this relocation.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//===- bolt/Passes/CMOVConversion.h ----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass finds the following patterns:
10+
// jcc
11+
// / \
12+
// (empty) mov src, dst
13+
// \ /
14+
//
15+
// and replaces them with:
16+
//
17+
// cmovcc src, dst
18+
//
19+
// The advantage of performing this conversion in BOLT (compared to compiler
20+
// heuristic driven instruction selection) is that BOLT can use LBR
21+
// misprediction information and only convert poorly predictable branches.
22+
// Note that branch misprediction rate is different from branch bias.
23+
// For well-predictable branches, it might be beneficial to leave jcc+mov as is
24+
// from microarchitectural perspective to avoid unneeded dependencies (CMOV
25+
// instruction has a dataflow dependence on flags and both operands).
26+
//
27+
//===----------------------------------------------------------------------===//
28+
29+
#ifndef BOLT_PASSES_CMOVCONVERSION_H
30+
#define BOLT_PASSES_CMOVCONVERSION_H
31+
32+
#include "bolt/Passes/BinaryPasses.h"
33+
34+
namespace llvm {
35+
namespace bolt {
36+
37+
/// Pass for folding eligible hammocks into CMOV's if profitable.
38+
class CMOVConversion : public BinaryFunctionPass {
39+
struct Stats {
40+
/// Record how many possible cases there are.
41+
uint64_t StaticPossible = 0;
42+
uint64_t DynamicPossible = 0;
43+
44+
/// Record how many cases were converted.
45+
uint64_t StaticPerformed = 0;
46+
uint64_t DynamicPerformed = 0;
47+
48+
/// Record how many mispredictions were eliminated.
49+
uint64_t PossibleMP = 0;
50+
uint64_t RemovedMP = 0;
51+
52+
Stats operator+(const Stats &O) {
53+
StaticPossible += O.StaticPossible;
54+
DynamicPossible += O.DynamicPossible;
55+
StaticPerformed += O.StaticPerformed;
56+
DynamicPerformed += O.DynamicPerformed;
57+
PossibleMP += O.PossibleMP;
58+
RemovedMP += O.RemovedMP;
59+
return *this;
60+
}
61+
double getStaticRatio() { return (double)StaticPerformed / StaticPossible; }
62+
double getDynamicRatio() {
63+
return (double)DynamicPerformed / DynamicPossible;
64+
}
65+
double getMPRatio() { return (double)RemovedMP / PossibleMP; }
66+
67+
void dump();
68+
};
69+
// BinaryContext-wide stats
70+
Stats Global;
71+
72+
void runOnFunction(BinaryFunction &Function);
73+
74+
public:
75+
explicit CMOVConversion() : BinaryFunctionPass(false) {}
76+
77+
const char *getName() const override { return "CMOV conversion"; }
78+
79+
void runOnFunctions(BinaryContext &BC) override;
80+
};
81+
82+
} // namespace bolt
83+
} // namespace llvm
84+
85+
#endif

0 commit comments

Comments
 (0)