Skip to content

Commit 85daf40

Browse files
committed
test
2 parents 30c7cef + e093109 commit 85daf40

File tree

7,698 files changed

+344162
-175976
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

7,698 files changed

+344162
-175976
lines changed

.github/CODEOWNERS

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,11 @@ clang/test/AST/Interp/ @tbaederr
6767
/mlir/include/mlir/Dialect/Linalg @dcaballe @nicolasvasilache @rengolin
6868
/mlir/lib/Dialect/Linalg @dcaballe @nicolasvasilache @rengolin
6969
/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp @MaheshRavishankar @nicolasvasilache
70-
/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @MaheshRavishankar @nicolasvasilache
70+
/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @dcaballe @MaheshRavishankar @nicolasvasilache
7171
/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @MaheshRavishankar @nicolasvasilache
7272
/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @hanhanW @nicolasvasilache
73-
/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @hanhanW @nicolasvasilache
74-
/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @hanhanW @nicolasvasilache
73+
/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @dcaballe @hanhanW @nicolasvasilache
74+
/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @banach-space @dcaballe @hanhanW @nicolasvasilache
7575

7676
# MemRef Dialect in MLIR.
7777
/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp @MaheshRavishankar @nicolasvasilache
@@ -85,10 +85,10 @@ clang/test/AST/Interp/ @tbaederr
8585
/mlir/**/*VectorToSCF* @banach-space @dcaballe @matthias-springer @nicolasvasilache
8686
/mlir/**/*VectorToLLVM* @banach-space @dcaballe @nicolasvasilache
8787
/mlir/**/*X86Vector* @aartbik @dcaballe @nicolasvasilache
88-
/mlir/include/mlir/Dialect/Vector @dcaballe @nicolasvasilache
89-
/mlir/lib/Dialect/Vector @dcaballe @nicolasvasilache
90-
/mlir/lib/Dialect/Vector/Transforms/* @hanhanW @nicolasvasilache
91-
/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @MaheshRavishankar @nicolasvasilache
88+
/mlir/include/mlir/Dialect/Vector @banach-space @dcaballe @nicolasvasilache
89+
/mlir/lib/Dialect/Vector @banach-space @dcaballe @nicolasvasilache
90+
/mlir/lib/Dialect/Vector/Transforms/* @banach-space @dcaballe @hanhanW @nicolasvasilache
91+
/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @banach-space @dcaballe @MaheshRavishankar @nicolasvasilache
9292
/mlir/**/*EmulateNarrowType* @dcaballe @hanhanW
9393

9494
# Presburger library in MLIR

.github/new-prs-labeler.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,29 @@ backend:AArch64:
775775
- clang/include/clang/Sema/SemaARM.h
776776
- clang/lib/Sema/SemaARM.cpp
777777

778+
backend:Hexagon:
779+
- clang/include/clang/Basic/BuiltinsHexagon*.def
780+
- clang/include/clang/Sema/SemaHexagon.h
781+
- clang/lib/Basic/Targets/Hexagon.*
782+
- clang/lib/CodeGen/Targets/Hexagon.cpp
783+
- clang/lib/Driver/ToolChains/Hexagon.*
784+
- clang/lib/Sema/SemaHexagon.cpp
785+
- lld/ELF/Arch/Hexagon.cpp
786+
- lldb/source/Plugins/ABI/Hexagon/**
787+
- lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/**
788+
- llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
789+
- llvm/include/llvm/IR/IntrinsicsHexagon*
790+
- llvm/include/llvm/Support/Hexagon*
791+
- llvm/lib/Support/Hexagon*
792+
- llvm/lib/Target/Hexagon/**
793+
- llvm/test/CodeGen/Hexagon/**
794+
- llvm/test/CodeGen/*/Hexagon/**
795+
- llvm/test/DebugInfo/*/Hexagon/**
796+
- llvm/test/Transforms/*/Hexagon
797+
- llvm/test/MC/Disassembler/Hexagon/**
798+
- llvm/test/MC/Hexagon/**
799+
- llvm/test/tools/llvm-objdump/ELF/Hexagon/**
800+
778801
backend:loongarch:
779802
- llvm/include/llvm/IR/IntrinsicsLoongArch.td
780803
- llvm/test/MC/LoongArch/**

.github/workflows/libcxx-build-and-test.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ jobs:
6363
cxx: [ 'clang++-19' ]
6464
include:
6565
- config: 'generic-gcc'
66-
cc: 'gcc-13'
67-
cxx: 'g++-13'
66+
cc: 'gcc-14'
67+
cxx: 'g++-14'
6868
steps:
6969
- uses: actions/checkout@v4
7070
- name: ${{ matrix.config }}.${{ matrix.cxx }}
@@ -101,8 +101,8 @@ jobs:
101101
cxx: [ 'clang++-19' ]
102102
include:
103103
- config: 'generic-gcc-cxx11'
104-
cc: 'gcc-13'
105-
cxx: 'g++-13'
104+
cc: 'gcc-14'
105+
cxx: 'g++-14'
106106
- config: 'generic-cxx23'
107107
cc: 'clang-17'
108108
cxx: 'clang++-17'

bolt/docs/CommandLineArgumentReference.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,12 @@
283283

284284
List of functions to pad with amount of bytes
285285

286+
- `--print-mappings`
287+
288+
Print mappings in the legend, between characters/blocks and text sections
289+
(default false).
290+
291+
286292
- `--profile-format=<value>`
287293

288294
Format to dump profile output in aggregation mode, default is fdata
@@ -688,6 +694,10 @@
688694

689695
Use a modified clustering algorithm geared towards minimizing branches
690696

697+
- `--name-similarity-function-matching-threshold=<uint>`
698+
699+
Match functions using namespace and edit distance.
700+
691701
- `--no-inline`
692702

693703
Disable all inlining (overrides other inlining options)
@@ -1236,4 +1246,4 @@
12361246

12371247
- `--print-options`
12381248

1239-
Print non-default options after command line parsing
1249+
Print non-default options after command line parsing

bolt/docs/HeatmapHeader.png

75 KB
Loading

bolt/docs/Heatmaps.md

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Code Heatmaps
22

33
BOLT has gained the ability to print code heatmaps based on
4-
sampling-based LBR profiles generated by `perf`. The output is produced
5-
in colored ASCII to be displayed in a color-capable terminal. It looks
6-
something like this:
4+
sampling-based profiles generated by `perf`, either with `LBR` data or not.
5+
The output is produced in colored ASCII to be displayed in a color-capable
6+
terminal. It looks something like this:
77

88
![](./Heatmap.png)
99

@@ -32,20 +32,64 @@ $ llvm-bolt-heatmap -p perf.data <executable>
3232
```
3333

3434
By default the heatmap will be dumped to *stdout*. You can change it
35-
with `-o <heatmapfile>` option. Each character/block in the heatmap
36-
shows the execution data accumulated for corresponding 64 bytes of
37-
code. You can change this granularity with a `-block-size` option.
38-
E.g. set it to 4096 to see code usage grouped by 4K pages.
39-
Other useful options are:
35+
with `-o <heatmapfile>` option.
4036

41-
```bash
42-
-line-size=<uint> - number of entries per line (default 256)
43-
-max-address=<uint> - maximum address considered valid for heatmap (default 4GB)
44-
```
4537

4638
If you prefer to look at the data in a browser (or would like to share
4739
it that way), then you can use an HTML conversion tool. E.g.:
4840

4941
```bash
5042
$ aha -b -f <heatmapfile> > <heatmapfile>.html
5143
```
44+
45+
---
46+
47+
## Background on heatmaps:
48+
A heatmap is effectively a histogram that is rendered into a grid for better
49+
visualization.
50+
In theory we can generate a heatmap using any binary and a perf profile.
51+
52+
Each block/character in the heatmap shows the execution data accumulated for
53+
corresponding 64 bytes of code. You can change this granularity with a
54+
`-block-size` option.
55+
E.g. set it to 4096 to see code usage grouped by 4K pages.
56+
57+
58+
When a block is shown as a dot, it means that no samples were found for that
59+
address.
60+
When it is shown as a letter, it indicates a captured sample on a particular
61+
text section of the binary.
62+
To show a mapping between letters and text sections in the legend, use
63+
`-print-mappings`.
64+
When a sampled address does not belong to any of the text sections, the
65+
characters 'o' or 'O' will be shown.
66+
67+
The legend shows by default the ranges in the heatmap according to the number
68+
of samples per block.
69+
A color is assigned per range, except the first two ranges that distinguished by
70+
lower and upper case letters.
71+
72+
On the Y axis, each row/line starts with an actual address of the binary.
73+
Consecutive lines in the heatmap advance by the same amount, with the binary
74+
size covered by a line dependent on the block size and the line size.
75+
An empty new line is inserted for larger gaps between samples.
76+
77+
On the X axis, the horizontally emitted hex numbers can help *estimate* where
78+
in the line the samples lie, but they cannot be combined to provide a full
79+
address, as they are relative to both the bucket and line sizes.
80+
81+
In the example below, the highlighted `0x100` column is not an offset to each
82+
row's address, but instead, it points to the middle of the line.
83+
For the generation, the default bucket size was used with a line size of 128.
84+
85+
86+
![](./HeatmapHeader.png)
87+
88+
89+
Some useful options are:
90+
91+
```
92+
-line-size=<uint> - number of entries per line (default 256)
93+
-max-address=<uint> - maximum address considered valid for heatmap (default 4GB)
94+
-print-mappings - print mappings in the legend, between characters/blocks and text sections (default false)
95+
```

bolt/docs/OptimizingLinux.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ $ perf2bolt -p perf.data -o perf.fdata vmlinux
4444

4545
Under a high load, `perf.data` should be several gigabytes in size and you should expect the converted `perf.fdata` not to exceed 100 MB.
4646

47+
Profiles collected from multiple workloads could be joined into a single profile using `merge-fdata` utility:
48+
```bash
49+
$ merge-fdata perf.1.fdata perf.2.fdata ... perf.<N>.fdata > perf.merged.fdata
50+
```
51+
4752
Two changes are required for the kernel build. The first one is optional but highly recommended. It introduces a BOLT-reserved space into `vmlinux` code section:
4853

4954

bolt/include/bolt/Core/BinaryBasicBlock.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -842,15 +842,6 @@ class BinaryBasicBlock {
842842
bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
843843
MCInst *&CondBranch, MCInst *&UncondBranch);
844844

845-
/// Return true if iterator \p I is pointing to the first instruction in
846-
/// a pair that could be macro-fused.
847-
bool isMacroOpFusionPair(const_iterator I) const;
848-
849-
/// If the basic block has a pair of instructions suitable for macro-fusion,
850-
/// return iterator to the first instruction of the pair.
851-
/// Otherwise return end().
852-
const_iterator getMacroOpFusionPair() const;
853-
854845
/// Printer required for printing dominator trees.
855846
void printAsOperand(raw_ostream &OS, bool PrintType = true) {
856847
if (PrintType)

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -698,10 +698,6 @@ class BinaryContext {
698698

699699
/// Binary-wide aggregated stats.
700700
struct BinaryStats {
701-
/// Stats for macro-fusion.
702-
uint64_t MissedMacroFusionPairs{0};
703-
uint64_t MissedMacroFusionExecCount{0};
704-
705701
/// Stats for stale profile matching:
706702
/// the total number of basic blocks in the profile
707703
uint32_t NumStaleBlocks{0};

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,6 @@ class BinaryFunction {
835835
/// them.
836836
void calculateLoopInfo();
837837

838-
/// Calculate missed macro-fusion opportunities and update BinaryContext
839-
/// stats.
840-
void calculateMacroOpFusionStats();
841-
842838
/// Returns if BinaryDominatorTree has been constructed for this function.
843839
bool hasDomTree() const { return BDT != nullptr; }
844840

0 commit comments

Comments
 (0)