llvm
diff --git a/‎.git-blame-ignore-revs
Lines changed: 3 additions & 0 deletions b/‎.git-blame-ignore-revs
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/pr-code-format.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pr-code-format.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release-sources.yml
Lines changed: 104 additions & 0 deletions b/‎.github/workflows/release-sources.yml
Lines changed: 104 additions & 0 deletions
diff --git a/‎.github/workflows/release-tasks.yml
Lines changed: 11 additions & 0 deletions b/‎.github/workflows/release-tasks.yml
Lines changed: 11 additions & 0 deletions
diff --git a/‎bolt/docs/CommandLineArgumentReference.md
Lines changed: 6 additions & 1 deletion b/‎bolt/docs/CommandLineArgumentReference.md
Lines changed: 6 additions & 1 deletion
diff --git a/‎bolt/lib/Profile/StaleProfileMatching.cpp
Lines changed: 53 additions & 19 deletions b/‎bolt/lib/Profile/StaleProfileMatching.cpp
Lines changed: 53 additions & 19 deletions
diff --git a/‎bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Lines changed: 7 additions & 9 deletions b/‎bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Lines changed: 7 additions & 9 deletions
@@ -81,3 +81,6 @@ f6d557ee34b6bbdb1dc32f29e34b4a4a8ad35e81
 
 # [NFC] clang-format utils/TableGen (#80973)
 b9079baaddfed5e604fbfaa1d81a7a1c38e78c26
+
+# [libc++][NFC] Run clang-format on libcxx/include again (#95874)
+e2c2ffbe7a1b5d9e32a2ce64279475b50c4cba5b
@@ -55,7 +55,7 @@ jobs:
       - name: Install clang-format
         uses: aminya/setup-cpp@v1
         with:
-          clangformat: 18.1.1
+          clangformat: 18.1.7
 
       - name: Setup Python env
         uses: actions/setup-python@v5
 
@@ -0,0 +1,104 @@
+name: Release Sources
+
+permissions:
+  contents: read
+
+on:
+  workflow_dispatch:
+    inputs:
+      release-version:
+        description: Release Version
+        required: true
+        type: string
+  workflow_call:
+    inputs:
+      release-version:
+        description: Release Version
+        required: true
+        type: string
+  # Run on pull_requests for testing purposes.
+  pull_request:
+    paths:
+      - '.github/workflows/release-sources.yml'
+    types:
+      - opened
+      - synchronize
+      - reopened
+      # When a PR is closed, we still start this workflow, but then skip
+      # all the jobs, which makes it effectively a no-op.  The reason to
+      # do this is that it allows us to take advantage of concurrency groups
+      # to cancel in progress CI jobs whenever the PR is closed.
+      - closed
+
+concurrency:
+  group: ${{ github.workflow }}-${{ inputs.release-version || github.event.pull_request.number }}
+  cancel-in-progress: True
+
+jobs:
+  inputs:
+    name: Collect Job Inputs
+    if: >-
+      github.repository_owner == 'llvm' &&
+      github.event.action != 'closed'
+    outputs:
+      ref: ${{ steps.inputs.outputs.ref }}
+      export-args: ${{ steps.inputs.outputs.export-args }}
+    runs-on: ubuntu-latest
+    steps:
+      - id: inputs
+        run: |
+          ref=${{ inputs.release-version || github.sha }}
+          if [ -n "${{ inputs.release-version }}" ]; then
+            export_args="-release ${{ inputs.release-version }} -final"
+          else
+            export_args="-git-ref ${{ github.sha }}"
+          fi
+          echo "ref=$ref" >> $GITHUB_OUTPUT
+          echo "export-args=$export_args" >> $GITHUB_OUTPUT
+
+  release-sources:
+    name: Package Release Sources
+    if: github.repository_owner == 'llvm'
+    runs-on: ubuntu-latest
+    needs:
+      - inputs
+    permissions:
+      id-token: write
+      attestations: write
+    steps:
+      - name: Checkout LLVM
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          ref: ${{ needs.inputs.outputs.ref }}
+          fetch-tags: true
+      - name: Install Dependencies
+        run: |
+          pip install --require-hashes -r ./llvm/utils/git/requirements.txt
+
+      - name: Check Permissions
+        if: github.event_name != 'pull_request'
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+          USER_TOKEN: ${{ secrets.RELEASE_TASKS_USER_TOKEN }}
+        run: |
+          ./llvm/utils/release/./github-upload-release.py --token "$GITHUB_TOKEN" --user ${{ github.actor }} --user-token "$USER_TOKEN" check-permissions
+      - name: Create Tarballs
+        run: |
+          ./llvm/utils/release/export.sh ${{ needs.inputs.outputs.export-args }}
+      - name: Attest Build Provenance
+        if: github.event_name != 'pull_request'
+        id: provenance
+        uses: actions/attest-build-provenance@897ed5eab6ed058a474202017ada7f40bfa52940 # v1.0.0
+        with:
+          subject-path: "*.xz"
+      - if: github.event_name != 'pull_request'
+        run: |
+          mv ${{ steps.provenance.outputs.bundle-path }} .
+      - name: Create Tarball Artifacts
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 #v4.3.3
+        with:
+          path: |
+            *.xz
+            attestation.jsonl
+
+
@@ -85,3 +85,14 @@ jobs:
     with:
       release-version: ${{ needs.validate-tag.outputs.release-version }}
       upload: true
+
+  release-sources:
+    name: Package Release Sources
+    permissions:
+      id-token: write
+      attestations: write
+    needs:
+      - validate-tag
+    uses: ./.github/workflows/release-sources.yml
+    with:
+      release-version: ${{ needs.validate-tag.outputs.release-version }}
@@ -802,6 +802,11 @@
 
   The maximum size of a function to consider for inference.
 
+- `--stale-matching-min-matched-block=<uint>`
+
+  Minimum percent of exact match block for a function to be considered for
+  profile inference.
+
 - `--stale-threshold=<uint>`
 
   Maximum percentage of stale functions to tolerate (default: 100)
@@ -1161,4 +1166,4 @@
 
 - `--print-options`
 
-  Print non-default options after command line parsing
+  Print non-default options after command line parsing
@@ -51,6 +51,12 @@ cl::opt<bool>
                       cl::desc("Infer counts from stale profile data."),
                       cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
 
+cl::opt<unsigned> StaleMatchingMinMatchedBlock(
+    "stale-matching-min-matched-block",
+    cl::desc("Percentage threshold of matched basic blocks at which stale "
+             "profile inference is executed."),
+    cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
+
 cl::opt<unsigned> StaleMatchingMaxFuncSize(
     "stale-matching-max-func-size",
     cl::desc("The maximum size of a function to consider for inference."),
@@ -301,21 +307,21 @@ void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
     BB->setHash(BlendedHashes[I].combine());
   }
 }
-
+// TODO: mediate the difference between flow function construction here in BOLT
+// and in the compiler by splitting blocks with exception throwing calls at the
+// call and adding the landing pad as the successor.
 /// Create a wrapper flow function to use with the profile inference algorithm,
 /// and initialize its jumps and metadata.
 FlowFunction
 createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   FlowFunction Func;
 
   // Add a special "dummy" source so that there is always a unique entry point.
-  // Because of the extra source, for all other blocks in FlowFunction it holds
-  // that Block.Index == BB->getIndex() + 1
   FlowBlock EntryBlock;
   EntryBlock.Index = 0;
   Func.Blocks.push_back(EntryBlock);
 
-  // Create FlowBlock for every basic block in the binary function
+  // Create FlowBlock for every basic block in the binary function.
   for (const BinaryBasicBlock *BB : BlockOrder) {
     Func.Blocks.emplace_back();
     FlowBlock &Block = Func.Blocks.back();
@@ -325,7 +331,12 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
            "incorrectly assigned basic block index");
   }
 
-  // Create FlowJump for each jump between basic blocks in the binary function
+  // Add a special "dummy" sink block so there is always a unique sink.
+  FlowBlock SinkBlock;
+  SinkBlock.Index = Func.Blocks.size();
+  Func.Blocks.push_back(SinkBlock);
+
+  // Create FlowJump for each jump between basic blocks in the binary function.
   std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
   for (const BinaryBasicBlock *SrcBB : BlockOrder) {
     std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
@@ -342,6 +353,16 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
       InDegree[Jump.Target]++;
       UniqueSuccs.insert(DstBB);
     }
+    // TODO: set jump from exit block to landing pad to Unlikely.
+    // If the block is an exit, add a dummy edge from it to the sink block.
+    if (UniqueSuccs.empty()) {
+      Func.Jumps.emplace_back();
+      FlowJump &Jump = Func.Jumps.back();
+      Jump.Source = SrcBB->getIndex() + 1;
+      Jump.Target = Func.Blocks.size() - 1;
+      InDegree[Jump.Target]++;
+    }
+
     // Collect jumps to landing pads
     for (const BinaryBasicBlock *DstBB : SrcBB->landing_pads()) {
       // Ignoring parallel edges
@@ -358,9 +379,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   }
 
   // Add dummy edges to the extra sources. If there are multiple entry blocks,
-  // add an unlikely edge from 0 to the subsequent ones
+  // add an unlikely edge from 0 to the subsequent ones. Skips the sink block.
   assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
-  for (uint64_t I = 1; I < Func.Blocks.size(); I++) {
+  for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) {
     const BinaryBasicBlock *BB = BlockOrder[I - 1];
     if (BB->isEntryPoint() || InDegree[I] == 0) {
       Func.Jumps.emplace_back();
@@ -391,11 +412,10 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-void matchWeightsByHashes(BinaryContext &BC,
-                          const BinaryFunction::BasicBlockOrderType &BlockOrder,
-                          const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func) {
-  assert(Func.Blocks.size() == BlockOrder.size() + 1);
+size_t matchWeightsByHashes(
+    BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
+    const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
+  assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
@@ -500,6 +520,8 @@ void matchWeightsByHashes(BinaryContext &BC,
     Block.HasUnknownWeight = false;
     Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
   }
+
+  return MatchedBlocks.size();
 }
 
 /// The function finds all blocks that are (i) reachable from the Entry block
@@ -575,13 +597,19 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
 /// Decide if stale profile matching can be applied for a given function.
 /// Currently we skip inference for (very) large instances and for instances
 /// having "unexpected" control flow (e.g., having no sink basic blocks).
-bool canApplyInference(const FlowFunction &Func) {
+bool canApplyInference(const FlowFunction &Func,
+                       const yaml::bolt::BinaryFunctionProfile &YamlBF,
+                       const uint64_t &MatchedBlocks) {
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  bool HasExitBlocks = llvm::any_of(
-      Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
-  if (!HasExitBlocks)
+  if (MatchedBlocks * 100 <
+      opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
+    return false;
+
+  // Returns false if the artificial sink block has no predecessors meaning
+  // there are no exit blocks.
+  if (Func.Blocks[Func.Blocks.size() - 1].isEntry())
     return false;
 
   return true;
@@ -618,7 +646,7 @@ void assignProfile(BinaryFunction &BF,
                    FlowFunction &Func) {
   BinaryContext &BC = BF.getBinaryContext();
 
-  assert(Func.Blocks.size() == BlockOrder.size() + 1);
+  assert(Func.Blocks.size() == BlockOrder.size() + 2);
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     FlowBlock &Block = Func.Blocks[I + 1];
     BinaryBasicBlock *BB = BlockOrder[I];
@@ -640,6 +668,9 @@ void assignProfile(BinaryFunction &BF,
       if (Jump->Flow == 0)
         continue;
 
+      // Skips the artificial sink block.
+      if (Jump->Target == Func.Blocks.size() - 1)
+        continue;
       BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
       // Check if the edge corresponds to a regular jump or a landing pad
       if (BB->getSuccessor(SuccBB.getLabel())) {
@@ -725,18 +756,21 @@ bool YAMLProfileReader::inferStaleProfile(
   const BinaryFunction::BasicBlockOrderType BlockOrder(
       BF.getLayout().block_begin(), BF.getLayout().block_end());
 
+  // Tracks the number of matched blocks.
+
   // Create a wrapper flow function to use with the profile inference algorithm.
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
+  size_t MatchedBlocks =
+      matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
   preprocessUnreachableBlocks(Func);
 
   // Check if profile inference can be applied for the instance.
-  if (!canApplyInference(Func))
+  if (!canApplyInference(Func, YamlBF, MatchedBlocks))
     return false;
 
   // Apply the profile inference algorithm.
 
@@ -273,9 +273,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
 
   /// Handle alternative instruction info from .altinstructions.
   Error readAltInstructions();
+  void processAltInstructionsPostCFG();
   Error tryReadAltInstructions(uint32_t AltInstFeatureSize,
                                bool AltInstHasPadLen, bool ParseOnly);
-  Error rewriteAltInstructions();
 
   /// Read .pci_fixup
   Error readPCIFixupTable();
@@ -326,6 +326,8 @@ class LinuxKernelRewriter final : public MetadataRewriter {
     if (Error E = processORCPostCFG())
       return E;
 
+    processAltInstructionsPostCFG();
+
     return Error::success();
   }
 
@@ -335,9 +337,6 @@ class LinuxKernelRewriter final : public MetadataRewriter {
     if (Error E = rewriteExceptionTable())
       return E;
 
-    if (Error E = rewriteAltInstructions())
-      return E;
-
     if (Error E = rewriteParaInstructions())
       return E;
 
@@ -1486,12 +1485,11 @@ Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize,
   return Error::success();
 }
 
-Error LinuxKernelRewriter::rewriteAltInstructions() {
-  // Disable output of functions with alt instructions before the rewrite
-  // support is complete.
+void LinuxKernelRewriter::processAltInstructionsPostCFG() {
+  // Disable optimization and output of functions with alt instructions before
+  // the rewrite support is complete. Alt instructions can modify the control
+  // flow, hence we may end up deleting seemingly unreachable code.
   skipFunctionsWithAnnotation("AltInst");
-
-  return Error::success();
 }
 
 /// When the Linux kernel needs to handle an error associated with a given PCI