Skip to content

Commit f1f2043

Browse files
committed
Merge with Conflict Markers
CONFLICT (content): Merge conflict in llvm/unittests/ExecutionEngine/Orc/ReOptimizeLayerTest.cpp
2 parents 2057848 + b221afc commit f1f2043

File tree

10,121 files changed

+667379
-163613
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

10,121 files changed

+667379
-163613
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ sycl/doc/extensions/ @intel/dpcpp-specification-reviewers
3636

3737
# Unified Runtime
3838
sycl/cmake/modules/FetchUnifiedRuntime.cmake @intel/unified-runtime-reviewers
39+
sycl/cmake/modules/UnifiedRuntimeTag.cmake @intel/unified-runtime-reviewers
3940
sycl/include/sycl/detail/ur.hpp @intel/unified-runtime-reviewers
4041
sycl/source/detail/posix_ur.cpp @intel/unified-runtime-reviewers
4142
sycl/source/detail/ur.cpp @intel/unified-runtime-reviewers

.github/workflows/containers/github-action-ci/stage1.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ FROM docker.io/library/ubuntu:22.04 as base
22
ENV LLVM_SYSROOT=/opt/llvm
33

44
FROM base as stage1-toolchain
5-
ENV LLVM_VERSION=18.1.8
5+
ENV LLVM_VERSION=19.1.2
66

77
RUN apt-get update && \
88
apt-get install -y \

.github/workflows/libcxx-build-and-test.yaml

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ env:
4949
jobs:
5050
stage1:
5151
if: github.repository_owner == 'llvm'
52-
runs-on: libcxx-runners-8-set
52+
runs-on: libcxx-runners-set
53+
container: ghcr.io/libcxx/actions-builder:testing-2024-09-21
5354
continue-on-error: false
5455
strategy:
5556
fail-fast: false
@@ -79,12 +80,14 @@ jobs:
7980
path: |
8081
**/test-results.xml
8182
**/*.abilist
83+
**/CMakeConfigureLog.yaml
8284
**/CMakeError.log
8385
**/CMakeOutput.log
8486
**/crash_diagnostics/*
8587
stage2:
8688
if: github.repository_owner == 'llvm'
87-
runs-on: libcxx-runners-8-set
89+
runs-on: libcxx-runners-set
90+
container: ghcr.io/libcxx/actions-builder:testing-2024-09-21
8891
needs: [ stage1 ]
8992
continue-on-error: false
9093
strategy:
@@ -123,6 +126,7 @@ jobs:
123126
path: |
124127
**/test-results.xml
125128
**/*.abilist
129+
**/CMakeConfigureLog.yaml
126130
**/CMakeError.log
127131
**/CMakeOutput.log
128132
**/crash_diagnostics/*
@@ -160,20 +164,21 @@ jobs:
160164
'benchmarks',
161165
'bootstrapping-build'
162166
]
163-
machine: [ 'libcxx-runners-8-set' ]
167+
machine: [ 'libcxx-runners-set' ]
164168
include:
165169
- config: 'generic-cxx26'
166-
machine: libcxx-runners-8-set
170+
machine: libcxx-runners-set
167171
- config: 'generic-asan'
168-
machine: libcxx-runners-8-set
172+
machine: libcxx-runners-set
169173
- config: 'generic-tsan'
170-
machine: libcxx-runners-8-set
174+
machine: libcxx-runners-set
171175
- config: 'generic-ubsan'
172-
machine: libcxx-runners-8-set
176+
machine: libcxx-runners-set
173177
# Use a larger machine for MSAN to avoid timeout and memory allocation issues.
174178
- config: 'generic-msan'
175-
machine: libcxx-runners-8-set
179+
machine: libcxx-runners-set
176180
runs-on: ${{ matrix.machine }}
181+
container: ghcr.io/libcxx/actions-builder:testing-2024-09-21
177182
steps:
178183
- uses: actions/checkout@v4
179184
- name: ${{ matrix.config }}
@@ -188,6 +193,7 @@ jobs:
188193
path: |
189194
**/test-results.xml
190195
**/*.abilist
196+
**/CMakeConfigureLog.yaml
191197
**/CMakeError.log
192198
**/CMakeOutput.log
193199
**/crash_diagnostics/*
@@ -230,6 +236,7 @@ jobs:
230236
path: |
231237
**/test-results.xml
232238
**/*.abilist
239+
**/CMakeConfigureLog.yaml
233240
**/CMakeError.log
234241
**/CMakeOutput.log
235242
**/crash_diagnostics/*

.github/workflows/pr-code-format.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
pull_request:
88
branches:
99
- main
10+
- sycl
11+
- sycl-devops-pr/**
12+
- sycl-rel-**
1013
- 'users/**'
1114

1215
jobs:

.github/workflows/scorecard.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@ jobs:
5757

5858
# Upload the results to GitHub's code scanning dashboard.
5959
- name: "Upload to code-scanning"
60-
uses: github/codeql-action/upload-sarif@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10
60+
uses: github/codeql-action/upload-sarif@662472033e021d55d94146f66f6058822b0b39fd # v3.27.0
6161
with:
6262
sarif_file: results.sarif

.github/workflows/sycl-containers.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ jobs:
5151
file: ubuntu2204_intel_drivers
5252
tag: unstable
5353
build_args: "use_latest=true"
54+
- name: Build + Intel Drivers Ubuntu 22.04 Docker image
55+
file: ubuntu2204_intel_drivers
56+
tag: alldeps
57+
build_args: |
58+
base_image=ghcr.io/intel/llvm/ubuntu2204_build
59+
base_tag=latest
60+
use_latest=false
5461
steps:
5562
- name: Checkout
5663
uses: actions/checkout@v4

.github/workflows/sycl-linux-build.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,12 @@ jobs:
212212
if: always() && !cancelled() && contains(inputs.changes, 'libdevice')
213213
run: |
214214
cmake --build $GITHUB_WORKSPACE/build --target check-libdevice
215+
- name: Check E2E test requirements
216+
if: always() && !cancelled() && !contains(inputs.changes, 'sycl')
217+
run: |
218+
# TODO consider moving this to Dockerfile.
219+
export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
220+
LIT_OPTS="--allow-empty-runs" LIT_FILTER="e2e_test_requirements" cmake --build $GITHUB_WORKSPACE/build --target check-sycl
215221
- name: Install
216222
if: ${{ always() && !cancelled() && steps.build.conclusion == 'success' }}
217223
# TODO replace utility installation with a single CMake target

.github/workflows/sycl-linux-run-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ jobs:
185185
with:
186186
path: khronos_sycl_cts
187187
repository: 'KhronosGroup/SYCL-CTS'
188-
ref: 'SYCL-2020'
189-
default_branch: 'SYCL-2020'
188+
ref: 'main'
189+
default_branch: 'main'
190190
cache_path: "/__w/repo_cache/"
191191
- name: SYCL CTS GIT submodules init
192192
if: inputs.tests_selector == 'cts'

.github/workflows/sycl-macos-build-and-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ permissions: read-all
2424
jobs:
2525
build:
2626
name: Build
27-
runs-on: macos-12
27+
runs-on: macos-13
2828
env:
2929
CCACHE_DIR: $GITHUB_WORKSPACE/build_cache_${{ inputs.build_cache_suffix }}
3030
CCACHE_MAXSIZE: ${{ inputs.build_cache_size }}

.github/workflows/sycl-nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ jobs:
198198
echo "TAG=$(date +'%Y-%m-%d')-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
199199
fi
200200
- name: Upload binaries
201-
uses: softprops/action-gh-release@c062e08bd532815e2082a85e87e3ef29c3e6d191
201+
uses: softprops/action-gh-release@e7a8f85e1c67a31e6ed99a94b41bd0b71bbee6b8
202202
with:
203203
files: |
204204
sycl_linux.tar.gz

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,9 @@ class BinaryFunction {
386386
/// Raw branch count for this function in the profile.
387387
uint64_t RawBranchCount{0};
388388

389+
/// Dynamically executed function bytes, used for density computation.
390+
uint64_t SampleCountInBytes{0};
391+
389392
/// Indicates the type of profile the function is using.
390393
uint16_t ProfileFlags{PF_NONE};
391394

@@ -1844,6 +1847,9 @@ class BinaryFunction {
18441847
/// to this function.
18451848
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
18461849

1850+
/// Return the number of dynamically executed bytes, from raw perf data.
1851+
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
1852+
18471853
/// Return the execution count for functions with known profile.
18481854
/// Return 0 if the function has no profile.
18491855
uint64_t getKnownExecutionCount() const {

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
5555
enum ProfileFormatKind { PF_Fdata, PF_YAML };
5656

5757
extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
58+
extern llvm::cl::opt<bool> ShowDensity;
5859
extern llvm::cl::opt<bool> SplitEH;
5960
extern llvm::cl::opt<bool> StrictMode;
6061
extern llvm::cl::opt<bool> TimeOpts;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,6 +2577,7 @@ struct CFISnapshot {
25772577
case MCCFIInstruction::OpAdjustCfaOffset:
25782578
case MCCFIInstruction::OpWindowSave:
25792579
case MCCFIInstruction::OpNegateRAState:
2580+
case MCCFIInstruction::OpNegateRAStateWithPC:
25802581
case MCCFIInstruction::OpLLVMDefAspaceCfa:
25812582
case MCCFIInstruction::OpLabel:
25822583
llvm_unreachable("unsupported CFI opcode");
@@ -2715,6 +2716,7 @@ struct CFISnapshotDiff : public CFISnapshot {
27152716
case MCCFIInstruction::OpAdjustCfaOffset:
27162717
case MCCFIInstruction::OpWindowSave:
27172718
case MCCFIInstruction::OpNegateRAState:
2719+
case MCCFIInstruction::OpNegateRAStateWithPC:
27182720
case MCCFIInstruction::OpLLVMDefAspaceCfa:
27192721
case MCCFIInstruction::OpLabel:
27202722
llvm_unreachable("unsupported CFI opcode");
@@ -2864,6 +2866,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
28642866
case MCCFIInstruction::OpAdjustCfaOffset:
28652867
case MCCFIInstruction::OpWindowSave:
28662868
case MCCFIInstruction::OpNegateRAState:
2869+
case MCCFIInstruction::OpNegateRAStateWithPC:
28672870
case MCCFIInstruction::OpLLVMDefAspaceCfa:
28682871
case MCCFIInstruction::OpLabel:
28692872
llvm_unreachable("unsupported CFI opcode");

bolt/lib/Core/DIEBuilder.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ void DIEBuilder::buildTypeUnits(DebugStrOffsetsWriter *StrOffsetWriter,
281281
for (auto &Row : TUIndex.getRows()) {
282282
uint64_t Signature = Row.getSignature();
283283
// manually populate TypeUnit to UnitVector
284-
DwarfContext->getTypeUnitForHash(DwarfContext->getMaxVersion(), Signature,
285-
true);
284+
DwarfContext->getTypeUnitForHash(Signature, true);
286285
}
287286
}
288287
const unsigned int CUNum = getCUNum(DwarfContext, isDWO());

bolt/lib/Core/HashUtilities.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
145145
continue;
146146
}
147147

148-
std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first;
148+
std::string Mnemonic = BC.InstPrinter->getMnemonic(Inst).first;
149149
llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
150150
Opcodes.insert(Mnemonic);
151151
}

bolt/lib/Passes/BinaryPasses.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Core/ParallelUtilities.h"
1616
#include "bolt/Passes/ReorderAlgorithm.h"
1717
#include "bolt/Passes/ReorderFunctions.h"
18+
#include "bolt/Utils/CommandLineOpts.h"
1819
#include "llvm/Support/CommandLine.h"
1920
#include <atomic>
2021
#include <mutex>
@@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit(
223224
"functions section"),
224225
cl::init(100), cl::Hidden, cl::cat(BoltCategory));
225226

227+
// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
228+
static cl::opt<int> ProfileDensityCutOffHot(
229+
"profile-density-cutoff-hot", cl::init(990000),
230+
cl::desc("Total samples cutoff for functions used to calculate "
231+
"profile density."));
232+
233+
static cl::opt<double> ProfileDensityThreshold(
234+
"profile-density-threshold", cl::init(60),
235+
cl::desc("If the profile density is below the given threshold, it "
236+
"will be suggested to increase the sampling rate."),
237+
cl::Optional);
238+
226239
} // namespace opts
227240

228241
namespace llvm {
@@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
13831396
uint64_t StaleSampleCount = 0;
13841397
uint64_t InferredSampleCount = 0;
13851398
std::vector<const BinaryFunction *> ProfiledFunctions;
1399+
std::vector<std::pair<double, uint64_t>> FuncDensityList;
13861400
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
13871401
for (auto &BFI : BC.getBinaryFunctions()) {
13881402
const BinaryFunction &Function = BFI.second;
@@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
14411455
StaleSampleCount += SampleCount;
14421456
++NumAllStaleFunctions;
14431457
}
1458+
1459+
if (opts::ShowDensity) {
1460+
uint64_t Size = Function.getSize();
1461+
// In case of BOLT split functions registered in BAT, executed traces are
1462+
// automatically attributed to the main fragment. Add up function sizes
1463+
// for all fragments.
1464+
if (IsHotParentOfBOLTSplitFunction)
1465+
for (const BinaryFunction *Fragment : Function.getFragments())
1466+
Size += Fragment->getSize();
1467+
double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
1468+
FuncDensityList.emplace_back(Density, SampleCount);
1469+
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
1470+
<< Function.getSampleCountInBytes() << ", size (b) "
1471+
<< Size << ", density " << Density
1472+
<< ", sample count " << SampleCount << '\n');
1473+
}
14441474
}
14451475
BC.NumProfiledFuncs = ProfiledFunctions.size();
14461476
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
@@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
16841714
BC.outs() << ". Use -print-unknown to see the list.";
16851715
BC.outs() << '\n';
16861716
}
1717+
1718+
if (opts::ShowDensity) {
1719+
double Density = 0.0;
1720+
// Sorted by the density in descending order.
1721+
llvm::stable_sort(FuncDensityList,
1722+
[&](const std::pair<double, uint64_t> &A,
1723+
const std::pair<double, uint64_t> &B) {
1724+
if (A.first != B.first)
1725+
return A.first > B.first;
1726+
return A.second < B.second;
1727+
});
1728+
1729+
uint64_t AccumulatedSamples = 0;
1730+
uint32_t I = 0;
1731+
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
1732+
"The cutoff value is greater than 1000000(100%)");
1733+
while (AccumulatedSamples <
1734+
TotalSampleCount *
1735+
static_cast<float>(opts::ProfileDensityCutOffHot) /
1736+
1000000 &&
1737+
I < FuncDensityList.size()) {
1738+
AccumulatedSamples += FuncDensityList[I].second;
1739+
Density = FuncDensityList[I].first;
1740+
I++;
1741+
}
1742+
if (Density == 0.0) {
1743+
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
1744+
"--profile-density-cutoff-hot option is "
1745+
"set too low. Please check your command.\n";
1746+
} else if (Density < opts::ProfileDensityThreshold) {
1747+
BC.errs()
1748+
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
1749+
<< format("%.1f", opts::ProfileDensityThreshold / Density)
1750+
<< "x more samples. Please consider increasing sampling rate or "
1751+
"profiling for longer duration to get more samples.\n";
1752+
}
1753+
1754+
BC.outs() << "BOLT-INFO: Functions with density >= "
1755+
<< format("%.1f", Density) << " account for "
1756+
<< format("%.2f",
1757+
static_cast<double>(opts::ProfileDensityCutOffHot) /
1758+
10000)
1759+
<< "% total sample counts.\n";
1760+
}
16871761
return Error::success();
16881762
}
16891763

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
638638
: BinaryFunction::PF_LBR;
639639
for (auto &BFI : BC.getBinaryFunctions()) {
640640
BinaryFunction &BF = BFI.second;
641-
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
641+
FuncBranchData *FBD = getBranchData(BF);
642+
if (FBD || getFuncSampleData(BF.getNames())) {
642643
BF.markProfiled(Flags);
644+
if (FBD)
645+
BF.RawBranchCount = FBD->getNumExecutedBranches();
646+
}
643647
}
644648

645649
for (auto &FuncBranches : NamesToBranches)
@@ -845,6 +849,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
845849
return false;
846850
}
847851

852+
// Set ParentFunc to BAT parent function or FromFunc itself.
853+
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
854+
if (!ParentFunc)
855+
ParentFunc = FromFunc;
856+
ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);
857+
848858
std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
849859
BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
850860
Second.From)
@@ -864,13 +874,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
864874
<< FromFunc->getPrintName() << ":"
865875
<< Twine::utohexstr(First.To) << " to "
866876
<< Twine::utohexstr(Second.From) << ".\n");
867-
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
868877
for (auto [From, To] : *FTs) {
869878
if (BAT) {
870879
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
871880
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
872881
}
873-
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
882+
doIntraBranch(*ParentFunc, From, To, Count, false);
874883
}
875884

876885
return true;

0 commit comments

Comments
 (0)