Skip to content

Commit d062d77

Browse files
committed
Merge remote-tracking branch 'intel/sycl' into optimize-build
2 parents bf57926 + 64e92cb commit d062d77

File tree

4,750 files changed

+112626
-57027
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

4,750 files changed

+112626
-57027
lines changed

.github/workflows/sycl_linux_build_and_test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ on:
4040
build_configure_extra_args:
4141
type: string
4242
required: false
43-
default: "--hip --hip-amd-arch=gfx906 --cuda"
43+
default: "--hip --cuda"
4444
build_artifact_suffix:
4545
type: string
4646
required: true
@@ -86,7 +86,7 @@ jobs:
8686
\"build_cache_root\":\"/__w/\",
8787
\"build_cache_suffix\":\"default\",
8888
\"build_cache_size\":\"2G\",
89-
\"build_configure_extra_args\":\"--hip --hip-amd-arch=gfx906 --cuda\",
89+
\"build_configure_extra_args\":\"--hip --cuda\",
9090
\"build_artifact_suffix\":\"default\",
9191
\"build_upload_artifact\":\"false\",
9292
\"intel_drivers_image\":\"ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest\",

.github/workflows/sycl_nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,5 @@ jobs:
7070
build_cache_root: "/__w/"
7171
build_cache_suffix: new_pm
7272
build_artifact_suffix: new_pm
73-
build_configure_extra_args: '--hip --hip-amd-arch=gfx906 --cuda --cmake-opt=-DLLVM_ENABLE_NEW_PASS_MANAGER=ON'
73+
build_configure_extra_args: '--hip --cuda --cmake-opt=-DLLVM_ENABLE_NEW_PASS_MANAGER=ON'
7474
lts_config: "hip_amdgpu;ocl_x64"

.github/workflows/sycl_stale_issues.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,5 @@ jobs:
1717
exempt-issue-labels: 'confirmed,hip,cuda,enhancement,help wanted,upstream'
1818
stale-issue-label: 'stale'
1919
exempt-all-issue-assignees: true
20+
operations-per-run: 200
21+

.github/workflows/sycl_windows_build_and_test.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ jobs:
1717
run: |
1818
choco install -y cuda --version 11.6.0.51123
1919
choco install -y ninja
20+
choco install -y sccache --version 0.2.15
2021
refreshenv
2122
echo CUDA_PATH=%CUDA_PATH%
2223
echo CUDA_PATH=%CUDA_PATH% >> %GITHUB_ENV%
@@ -49,6 +50,8 @@ jobs:
4950
--cmake-opt="-DCMAKE_C_COMPILER=cl" ^
5051
--cmake-opt="-DCMAKE_CXX_COMPILER=cl" ^
5152
--cmake-opt="-DCMAKE_INSTALL_PREFIX=%GITHUB_WORKSPACE%\install" ^
53+
--cmake-opt="-DCMAKE_CXX_COMPILER_LAUNCHER=sccache" ^
54+
--cmake-opt="-DCMAKE_C_COMPILER_LAUNCHER=sccache" ^
5255
--cuda
5356
- name: Build
5457
shell: cmd
@@ -64,4 +67,4 @@ jobs:
6467
uses: actions/upload-artifact@v2
6568
with:
6669
name: sycl_windows_default
67-
path: install/**/*
70+
path: install/**/*

README.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,86 @@ See [LICENSE.txt](sycl/LICENSE.TXT) for details.
2828
See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
2929

3030
*\*Other names and brands may be claimed as the property of others.*
31+
32+
This is an example work-flow and configuration to get and build the LLVM source:
33+
34+
1. Checkout LLVM (including related sub-projects like Clang):
35+
36+
* ``git clone https://github.com/llvm/llvm-project.git``
37+
38+
* Or, on windows, ``git clone --config core.autocrlf=false
39+
https://github.com/llvm/llvm-project.git``
40+
41+
2. Configure and build LLVM and Clang:
42+
43+
* ``cd llvm-project``
44+
45+
* ``cmake -S llvm -B build -G <generator> [options]``
46+
47+
Some common build system generators are:
48+
49+
* ``Ninja`` --- for generating [Ninja](https://ninja-build.org)
50+
build files. Most llvm developers use Ninja.
51+
* ``Unix Makefiles`` --- for generating make-compatible parallel makefiles.
52+
* ``Visual Studio`` --- for generating Visual Studio projects and
53+
solutions.
54+
* ``Xcode`` --- for generating Xcode projects.
55+
56+
Some common options:
57+
58+
* ``-DLLVM_ENABLE_PROJECTS='...'`` and ``-DLLVM_ENABLE_RUNTIMES='...'`` ---
59+
semicolon-separated list of the LLVM sub-projects and runtimes you'd like to
60+
additionally build. ``LLVM_ENABLE_PROJECTS`` can include any of: clang,
61+
clang-tools-extra, cross-project-tests, flang, libc, libclc, lld, lldb,
62+
mlir, openmp, polly, or pstl. ``LLVM_ENABLE_RUNTIMES`` can include any of
63+
libcxx, libcxxabi, libunwind, compiler-rt, libc or openmp. Some runtime
64+
projects can be specified either in ``LLVM_ENABLE_PROJECTS`` or in
65+
``LLVM_ENABLE_RUNTIMES``.
66+
67+
For example, to build LLVM, Clang, libcxx, and libcxxabi, use
68+
``-DLLVM_ENABLE_PROJECTS="clang" -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi"``.
69+
70+
* ``-DCMAKE_INSTALL_PREFIX=directory`` --- Specify for *directory* the full
71+
path name of where you want the LLVM tools and libraries to be installed
72+
(default ``/usr/local``). Be careful if you install runtime libraries: if
73+
your system uses those provided by LLVM (like libc++ or libc++abi), you
74+
must not overwrite your system's copy of those libraries, since that
75+
could render your system unusable. In general, using something like
76+
``/usr`` is not advised, but ``/usr/local`` is fine.
77+
78+
* ``-DCMAKE_BUILD_TYPE=type`` --- Valid options for *type* are Debug,
79+
Release, RelWithDebInfo, and MinSizeRel. Default is Debug.
80+
81+
* ``-DLLVM_ENABLE_ASSERTIONS=On`` --- Compile with assertion checks enabled
82+
(default is Yes for Debug builds, No for all other build types).
83+
84+
* ``cmake --build build [-- [options] <target>]`` or your build system specified above
85+
directly.
86+
87+
* The default target (i.e. ``ninja`` or ``make``) will build all of LLVM.
88+
89+
* The ``check-all`` target (i.e. ``ninja check-all``) will run the
90+
regression tests to ensure everything is in working order.
91+
92+
* CMake will generate targets for each tool and library, and most
93+
LLVM sub-projects generate their own ``check-<project>`` target.
94+
95+
* Running a serial build will be **slow**. To improve speed, try running a
96+
parallel build. That's done by default in Ninja; for ``make``, use the option
97+
``-j NNN``, where ``NNN`` is the number of parallel jobs, e.g. the number of
98+
CPUs you have.
99+
100+
* For more information see [CMake](https://llvm.org/docs/CMake.html)
101+
102+
Consult the
103+
[Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-started-with-llvm)
104+
page for detailed information on configuring and compiling LLVM. You can visit
105+
[Directory Layout](https://llvm.org/docs/GettingStarted.html#directory-layout)
106+
to learn about the layout of the source code tree.
107+
108+
## Getting in touch
109+
110+
Join [LLVM Discourse forums](https://discourse.llvm.org/), [discord chat](https://discord.gg/xS7Z362) or #llvm IRC channel on [OFTC](https://oftc.net/).
111+
112+
The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for
113+
participants to all modes of communication within the project.

bolt/docs/Heatmaps.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@ $ perf record -e cycles:u -j any,u [-p PID|-a] -- sleep <interval>
2323
Note that at the moment running with LBR (`-j any,u` or `-b`) is
2424
a requirement.
2525

26-
Once the run is complete, and `perf.data` is generated, run BOLT in
27-
a heatmap mode:
26+
Once the run is complete, and `perf.data` is generated, run llvm-bolt-heatmap:
2827

2928
```bash
30-
$ llvm-bolt heatmap -p perf.data <executable>
29+
$ llvm-bolt-heatmap -p perf.data <executable>
3130
```
3231

3332
By default the heatmap will be dumped to *stdout*. You can change it

bolt/docs/OptimizingClang.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ Collecting accurate profile requires running `perf` on a hardware that
4141
implements taken branch sampling (`-b/-j` flag). For that reason, it may not be possible to
4242
collect the accurate profile in a virtualized environment, e.g. in the cloud.
4343
We do support regular sampling profiles, but the performance
44-
improvements are expected to be more modest.
44+
improvements are expected to be more modest.
4545

4646
```bash
4747
$ mkdir ${TOPLEV}/stage3
@@ -211,7 +211,8 @@ $ cd ${TOPLEV}/stage1
211211
$ cmake -G Ninja ${TOPLEV}/llvm-project/llvm -DLLVM_TARGETS_TO_BUILD=X86 \
212212
-DCMAKE_BUILD_TYPE=Release \
213213
-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_ASM_COMPILER=gcc \
214-
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \
214+
-DLLVM_ENABLE_PROJECTS="clang;lld" \
215+
-DLLVM_ENABLE_RUNTIMES="compiler-rt" \
215216
-DCOMPILER_RT_BUILD_SANITIZERS=OFF -DCOMPILER_RT_BUILD_XRAY=OFF \
216217
-DCOMPILER_RT_BUILD_LIBFUZZER=OFF \
217218
-DCMAKE_INSTALL_PREFIX=${TOPLEV}/stage1/install

bolt/include/bolt/Passes/BinaryPasses.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,16 @@ class ShortenInstructions : public BinaryFunctionPass {
295295

296296
/// Perform simple peephole optimizations.
297297
class Peepholes : public BinaryFunctionPass {
298+
public:
299+
enum PeepholeOpts : char {
300+
PEEP_NONE = 0x0,
301+
PEEP_DOUBLE_JUMPS = 0x2,
302+
PEEP_TAILCALL_TRAPS = 0x4,
303+
PEEP_USELESS_BRANCHES = 0x8,
304+
PEEP_ALL = 0xf
305+
};
306+
307+
private:
298308
uint64_t NumDoubleJumps{0};
299309
uint64_t TailCallTraps{0};
300310
uint64_t NumUselessCondBranches{0};

bolt/include/bolt/Passes/LivenessAnalysis.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "bolt/Passes/DataflowAnalysis.h"
1313
#include "bolt/Passes/RegAnalysis.h"
14+
#include "llvm/MC/MCRegisterInfo.h"
1415
#include "llvm/Support/CommandLine.h"
1516

1617
namespace opts {

bolt/include/bolt/Passes/ReachingDefOrUse.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "bolt/Passes/DataflowAnalysis.h"
1313
#include "bolt/Passes/RegAnalysis.h"
1414
#include "llvm/ADT/Optional.h"
15+
#include "llvm/MC/MCRegisterInfo.h"
1516
#include "llvm/Support/CommandLine.h"
1617
#include "llvm/Support/Timer.h"
1718

bolt/include/bolt/Passes/ShrinkWrapping.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define BOLT_PASSES_SHRINKWRAPPING_H
1111

1212
#include "bolt/Passes/FrameAnalysis.h"
13+
#include "llvm/MC/MCRegisterInfo.h"
1314

1415
namespace llvm {
1516
namespace bolt {

bolt/include/bolt/Rewrite/DWARFRewriter.h

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define BOLT_REWRITE_DWARF_REWRITER_H
1111

1212
#include "bolt/Core/DebugData.h"
13+
#include "llvm/MC/MCAsmLayout.h"
1314
#include <cstdint>
1415
#include <memory>
1516
#include <mutex>
@@ -108,9 +109,6 @@ class DWARFRewriter {
108109
/// Output .dwp files.
109110
void writeDWP(std::unordered_map<uint64_t, std::string> &DWOIdToName);
110111

111-
/// Abbreviations that were converted to use DW_AT_ranges.
112-
std::set<const DWARFAbbreviationDeclaration *> ConvertedRangesAbbrevs;
113-
114112
/// DWARFDie contains a pointer to a DIE and hence gets invalidated once the
115113
/// embedded DIE is destroyed. This wrapper class stores a DIE internally and
116114
/// could be cast to a DWARFDie that is valid even after the initial DIE is
@@ -135,8 +133,6 @@ class DWARFRewriter {
135133
const DWARFAbbreviationDeclaration *,
136134
std::vector<std::pair<DWARFDieWrapper, DebugAddressRange>>>;
137135

138-
PendingRangesType PendingRanges;
139-
140136
/// Convert \p Abbrev from using a simple DW_AT_(low|high)_pc range to
141137
/// DW_AT_ranges with optional \p RangesBase.
142138
void convertToRangesPatchAbbrev(const DWARFUnit &Unit,
@@ -151,30 +147,10 @@ class DWARFRewriter {
151147
SimpleBinaryPatcher &DebugInfoPatcher,
152148
Optional<uint64_t> RangesBase = None);
153149

154-
/// Same as above, but takes a vector of \p Ranges as a parameter.
155-
void convertToRanges(DWARFDie DIE, const DebugAddressRangesVector &Ranges,
156-
SimpleBinaryPatcher &DebugInfoPatcher);
157-
158150
/// Patch DW_AT_(low|high)_pc values for the \p DIE based on \p Range.
159151
void patchLowHigh(DWARFDie DIE, DebugAddressRange Range,
160-
SimpleBinaryPatcher &DebugInfoPatcher);
161-
162-
/// Convert pending ranges associated with the given \p Abbrev.
163-
void convertPending(const DWARFUnit &Unit,
164-
const DWARFAbbreviationDeclaration *Abbrev,
165-
SimpleBinaryPatcher &DebugInfoPatcher,
166-
DebugAbbrevWriter &AbbrevWriter);
167-
168-
/// Adds to Pending Ranges.
169-
/// For Debug Fission also adding to .debug_addr to take care of a case where
170-
/// some entries are not converted to ranges and left as
171-
/// DW_AT_low_pc/DW_AT_high_pc.
172-
void addToPendingRanges(const DWARFAbbreviationDeclaration *Abbrev,
173-
DWARFDie DIE, DebugAddressRangesVector &Ranges,
174-
Optional<uint64_t> DWOId);
175-
176-
/// Once all DIEs were seen, update DW_AT_(low|high)_pc values.
177-
void flushPendingRanges(SimpleBinaryPatcher &DebugInfoPatcher);
152+
SimpleBinaryPatcher &DebugInfoPatcher,
153+
Optional<uint64_t> DWOId);
178154

179155
/// Helper function for creating and returning per-DWO patchers/writers.
180156
template <class T, class Patcher>

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ extern llvm::cl::OptionCategory BoltRelocCategory;
2727
extern llvm::cl::OptionCategory BoltOutputCategory;
2828
extern llvm::cl::OptionCategory AggregatorCategory;
2929
extern llvm::cl::OptionCategory BoltInstrCategory;
30-
31-
extern llvm::cl::SubCommand HeatmapCommand;
30+
extern llvm::cl::OptionCategory HeatmapCategory;
3231

3332
extern llvm::cl::opt<unsigned> AlignText;
3433
extern llvm::cl::opt<bool> AggregateOnly;
@@ -38,7 +37,6 @@ extern llvm::cl::opt<bool> EnableBAT;
3837
extern llvm::cl::opt<bool> RemoveSymtab;
3938
extern llvm::cl::opt<unsigned> ExecutionCountThreshold;
4039
extern llvm::cl::opt<unsigned> HeatmapBlock;
41-
extern llvm::cl::opt<std::string> HeatmapFile;
4240
extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
4341
extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
4442
extern llvm::cl::opt<bool> HotData;

bolt/lib/Core/BinaryContext.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
#include "llvm/MC/MCInstPrinter.h"
2727
#include "llvm/MC/MCObjectStreamer.h"
2828
#include "llvm/MC/MCObjectWriter.h"
29+
#include "llvm/MC/MCRegisterInfo.h"
2930
#include "llvm/MC/MCSectionELF.h"
3031
#include "llvm/MC/MCStreamer.h"
32+
#include "llvm/MC/MCSubtargetInfo.h"
3133
#include "llvm/MC/MCSymbol.h"
3234
#include "llvm/Support/CommandLine.h"
3335
#include "llvm/Support/Regex.h"

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/MC/MCExpr.h"
2929
#include "llvm/MC/MCInst.h"
3030
#include "llvm/MC/MCInstPrinter.h"
31+
#include "llvm/MC/MCRegisterInfo.h"
3132
#include "llvm/Object/ObjectFile.h"
3233
#include "llvm/Support/CommandLine.h"
3334
#include "llvm/Support/Debug.h"

bolt/lib/Core/DebugData.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "bolt/Core/DebugData.h"
1414
#include "bolt/Core/BinaryContext.h"
1515
#include "bolt/Utils/Utils.h"
16+
#include "llvm/MC/MCAssembler.h"
1617
#include "llvm/MC/MCContext.h"
1718
#include "llvm/MC/MCObjectStreamer.h"
1819
#include "llvm/Support/CommandLine.h"

bolt/lib/Core/MCPlusBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/MC/MCInstrAnalysis.h"
1717
#include "llvm/MC/MCInstrDesc.h"
1818
#include "llvm/MC/MCInstrInfo.h"
19+
#include "llvm/MC/MCRegisterInfo.h"
1920
#include "llvm/Support/Debug.h"
2021
#include <cstdint>
2122
#include <queue>

bolt/lib/Core/Relocation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212

1313
#include "bolt/Core/Relocation.h"
1414
#include "llvm/MC/MCContext.h"
15+
#include "llvm/MC/MCExpr.h"
1516
#include "llvm/MC/MCStreamer.h"
17+
#include "llvm/MC/MCSymbol.h"
1618
#include "llvm/Object/ELF.h"
1719

1820
using namespace llvm;

0 commit comments

Comments
 (0)