llvm
diff --git a/‎.git-blame-ignore-revs
Lines changed: 3 additions & 0 deletions b/‎.git-blame-ignore-revs
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/CODEOWNERS
Lines changed: 8 additions & 7 deletions b/‎.github/CODEOWNERS
Lines changed: 8 additions & 7 deletions
diff --git a/‎.github/workflows/issue-write.yml
Lines changed: 13 additions & 4 deletions b/‎.github/workflows/issue-write.yml
Lines changed: 13 additions & 4 deletions
diff --git a/‎.github/workflows/release-sources.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release-sources.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/unprivileged-download-artifact/action.yml
Lines changed: 81 additions & 0 deletions b/‎.github/workflows/unprivileged-download-artifact/action.yml
Lines changed: 81 additions & 0 deletions
diff --git a/‎.github/workflows/version-check.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/version-check.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎bolt/CMakeLists.txt
Lines changed: 53 additions & 1 deletion b/‎bolt/CMakeLists.txt
Lines changed: 53 additions & 1 deletion
diff --git a/‎bolt/docs/CommandLineArgumentReference.md
Lines changed: 12 additions & 7 deletions b/‎bolt/docs/CommandLineArgumentReference.md
Lines changed: 12 additions & 7 deletions
diff --git a/‎bolt/docs/HeatmapHeader.png
75 KB b/‎bolt/docs/HeatmapHeader.png
75 KB
diff --git a/‎bolt/docs/Heatmaps.md
Lines changed: 56 additions & 12 deletions b/‎bolt/docs/Heatmaps.md
Lines changed: 56 additions & 12 deletions
@@ -84,3 +84,6 @@ b9079baaddfed5e604fbfaa1d81a7a1c38e78c26
 
 # [libc++][NFC] Run clang-format on libcxx/include again (#95874)
 e2c2ffbe7a1b5d9e32a2ce64279475b50c4cba5b
+
+# [lldb][nfc] Deindent ProcessGDBRemote::SetThreadStopInfo by two levels
+b32931c5b32eb0d2cf37d688b34f8548c9674c19
@@ -67,11 +67,11 @@ clang/test/AST/Interp/ @tbaederr
 /mlir/include/mlir/Dialect/Linalg @dcaballe @nicolasvasilache @rengolin
 /mlir/lib/Dialect/Linalg @dcaballe @nicolasvasilache @rengolin
 /mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp @MaheshRavishankar @nicolasvasilache
-/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @MaheshRavishankar @nicolasvasilache
+/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @dcaballe @MaheshRavishankar @nicolasvasilache
 /mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @MaheshRavishankar @nicolasvasilache
 /mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @hanhanW @nicolasvasilache
-/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @hanhanW @nicolasvasilache
-/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @hanhanW @nicolasvasilache
+/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @dcaballe @hanhanW @nicolasvasilache
+/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @banach-space @dcaballe @hanhanW @nicolasvasilache
 
 # MemRef Dialect in MLIR.
 /mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp @MaheshRavishankar @nicolasvasilache
@@ -85,10 +85,11 @@ clang/test/AST/Interp/ @tbaederr
 /mlir/**/*VectorToSCF* @banach-space @dcaballe @matthias-springer @nicolasvasilache
 /mlir/**/*VectorToLLVM* @banach-space @dcaballe @nicolasvasilache
 /mlir/**/*X86Vector* @aartbik @dcaballe @nicolasvasilache
-/mlir/include/mlir/Dialect/Vector @dcaballe @nicolasvasilache
-/mlir/lib/Dialect/Vector @dcaballe @nicolasvasilache
-/mlir/lib/Dialect/Vector/Transforms/* @hanhanW @nicolasvasilache
-/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @MaheshRavishankar @nicolasvasilache
+/mlir/include/mlir/Dialect/Vector @banach-space @dcaballe @nicolasvasilache
+/mlir/include/mlir/Dialect/Vector/IR @kuhar
+/mlir/lib/Dialect/Vector @banach-space @dcaballe @nicolasvasilache
+/mlir/lib/Dialect/Vector/Transforms/* @banach-space @dcaballe @hanhanW @nicolasvasilache
+/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @banach-space @dcaballe @MaheshRavishankar @nicolasvasilache
 /mlir/**/*EmulateNarrowType* @dcaballe @hanhanW
 
 # Presburger library in MLIR
 
@@ -24,14 +24,21 @@ jobs:
         github.event.workflow_run.conclusion == 'failure'
       )
     steps:
+      - name: Fetch Sources
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: |
+            .github/workflows/unprivileged-download-artifact/action.yml
+          sparse-checkout-cone-mode: false
       - name: 'Download artifact'
-        uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1
+        uses: ./.github/workflows/unprivileged-download-artifact
+        id: download-artifact
         with:
-          github-token: ${{ secrets.ISSUE_WRITE_DOWNLOAD_ARTIFACT }}
           run-id: ${{ github.event.workflow_run.id }}
-          name: workflow-args
+          artifact-name: workflow-args
 
       - name: 'Comment on PR'
+        if: steps.download-artifact.outputs.artifact-id != ''
         uses: actions/github-script@v3
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
@@ -144,5 +151,7 @@ jobs:
             });
 
       - name: Dump comments file
-        if: always()
+        if: >-
+          always() &&
+          steps.download-artifact.outputs.artifact-id != ''
         run: cat comments
@@ -47,7 +47,7 @@ jobs:
     steps:
       - id: inputs
         run: |
-          ref=${{ inputs.release-version || github.sha }}
+          ref=${{ (inputs.release-version && format('llvmorg-{0}', inputs.release-version)) || github.sha }}
           if [ -n "${{ inputs.release-version }}" ]; then
             export_args="-release ${{ inputs.release-version }} -final"
           else
 
@@ -0,0 +1,81 @@
+name: Unprivileged Download Artifact
+description: >-
+  Download artifacts from another workflow run without using an access token.
+inputs:
+  run-id:
+    description: >-
+      The run-id for the workflow run that you want to download the artifact
+      from.  If ommitted it will download the most recently created artifact
+      from the repo with the artifact-name.
+    required: false
+  artifact-name:
+    desciption: The name of the artifact to download.
+    required: true
+
+
+outputs:
+  filename:
+    description: >-
+      The filename of the downloaded artifact or the empty string if the
+      artifact was not found.
+    value: ${{ steps.download-artifact.outputs.filename }}
+  artifact-id:
+    description: "The id of the artifact being downloaded."
+    value: ${{ steps.artifact-url.outputs.id }}
+
+
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
+      id: artifact-url
+      with:
+        script: |
+          var response;
+          if (!"${{ inputs.run-id }}") {
+            response = await github.rest.actions.listArtifactsForRepo({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              name: "${{ inputs.artifact-name }}"
+            })
+          } else {
+            response = await github.rest.actions.listWorkflowRunArtifacts({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              run_id: "${{ inputs.run-id }}",
+              name: "${{ inputs.artifact-name }}"
+            })
+          }
+
+          console.log(response)
+
+          for (artifact of response.data.artifacts) {
+            console.log(artifact);
+          }
+
+          if (response.data.artifacts.length == 0) {
+            console.log("Could not find artifact ${{ inputs.artifact-name }} for workflow run ${{ inputs.run-id }}")
+            return;
+          }
+
+          const url_response = await github.rest.actions.downloadArtifact({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            artifact_id: response.data.artifacts[0].id,
+            archive_format: "zip"
+          })
+
+          core.setOutput("url", url_response.url);
+          core.setOutput("id", response.data.artifacts[0].id);
+
+    - shell: bash
+      if: steps.artifact-url.outputs.url != ''
+      id: download-artifact
+      run: |
+        curl -L -o ${{ inputs.artifact-name }}.zip "${{ steps.artifact-url.outputs.url }}"
+        echo "filename=${{ inputs.artifact-name }}.zip" >> $GITHUB_OUTPUT
+
+    - shell: bash
+      if: steps.download-artifact.outputs.filename != ''
+      run: |
+        unzip ${{ steps.download-artifact.outputs.filename }}
@@ -27,5 +27,5 @@ jobs:
 
       - name: Version Check
         run: |
-          version=$(grep -o 'LLVM_VERSION_\(MAJOR\|MINOR\|PATCH\) [0-9]\+' llvm/CMakeLists.txt  | cut -d ' ' -f 2 | tr "\n" "." | sed 's/.$//g')
+          version=$(grep -o 'LLVM_VERSION_\(MAJOR\|MINOR\|PATCH\) [0-9]\+' cmake/Modules/LLVMVersion.cmake  | cut -d ' ' -f 2 | tr "\n" "." | sed 's/.$//g')
           .github/workflows/version-check.py "$version"
@@ -1,6 +1,17 @@
+cmake_minimum_required(VERSION 3.20.0)
+
 set(LLVM_SUBPROJECT_TITLE "BOLT")
 
-include(ExternalProject)
+if(NOT DEFINED LLVM_COMMON_CMAKE_UTILS)
+  set(LLVM_COMMON_CMAKE_UTILS ${CMAKE_CURRENT_SOURCE_DIR}/../cmake)
+endif()
+include(${LLVM_COMMON_CMAKE_UTILS}/Modules/CMakePolicy.cmake
+  NO_POLICY_SCOPE)
+
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+  project(bolt)
+  set(BOLT_BUILT_STANDALONE TRUE)
+endif()
 
 set(BOLT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(BOLT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@@ -9,6 +20,42 @@ set(CMAKE_CXX_STANDARD 17)
 # Add path for custom modules.
 list(INSERT CMAKE_MODULE_PATH 0 "${BOLT_SOURCE_DIR}/cmake/modules")
 
+include(GNUInstallDirs)
+
+# standalone build, copied from clang
+if(BOLT_BUILT_STANDALONE)
+  set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ standard to conform to")
+  set(CMAKE_CXX_STANDARD_REQUIRED YES)
+  set(CMAKE_CXX_EXTENSIONS NO)
+
+  if(NOT MSVC_IDE)
+    set(LLVM_ENABLE_ASSERTIONS ${ENABLE_ASSERTIONS}
+      CACHE BOOL "Enable assertions")
+    # Assertions should follow llvm-config's.
+    mark_as_advanced(LLVM_ENABLE_ASSERTIONS)
+  endif()
+
+  find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_DIR}")
+  list(APPEND CMAKE_MODULE_PATH "${LLVM_DIR}")
+
+  set(LLVM_MAIN_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" CACHE PATH "Path to LLVM source tree")
+  find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR}
+    NO_DEFAULT_PATH)
+
+  # They are used as destination of target generators.
+  set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
+  set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
+
+  include(AddLLVM)
+  include(TableGen)
+  include_directories(${LLVM_INCLUDE_DIRS})
+  link_directories("${LLVM_LIBRARY_DIR}")
+
+  set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_INSTALL_BINDIR}" )
+  set( CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_INSTALL_LIBDIR}/${LLVM_LIBDIR_SUFFIX}" )
+  set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_INSTALL_LIBDIR}/${LLVM_LIBDIR_SUFFIX}")
+endif() # standalone
+
 # Determine default set of targets to build -- the intersection of
 # those BOLT supports and those LLVM is targeting.
 set(BOLT_TARGETS_TO_BUILD_all "AArch64;X86;RISCV")
@@ -94,6 +141,8 @@ if (BOLT_ENABLE_RUNTIME)
   if(CMAKE_SYSROOT)
     list(APPEND extra_args -DCMAKE_SYSROOT=${CMAKE_SYSROOT})
   endif()
+
+  include(ExternalProject)
   ExternalProject_Add(bolt_rt
     SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/runtime"
     STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-stamps
@@ -104,6 +153,7 @@ if (BOLT_ENABLE_RUNTIME)
                -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
                -DLLVM_LIBDIR_SUFFIX=${LLVM_LIBDIR_SUFFIX}
                -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR}
+               -DBOLT_BUILT_STANDALONE=${BOLT_BUILT_STANDALONE}
                ${extra_args}
     INSTALL_COMMAND ""
     BUILD_ALWAYS True
@@ -113,6 +163,8 @@ if (BOLT_ENABLE_RUNTIME)
   add_llvm_install_targets(install-bolt_rt
     DEPENDS bolt_rt bolt
     COMPONENT bolt)
+  set(LIBBOLT_RT_INSTR "${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/lib/libbolt_rt_instr.a")
+  set(LIBBOLT_RT_HUGIFY "${CMAKE_CURRENT_BINARY_DIR}/bolt_rt-bins/lib/libbolt_rt_hugify.a")
 endif()
 
 find_program(GNU_LD_EXECUTABLE NAMES ${LLVM_DEFAULT_TARGET_TRIPLE}-ld.bfd ld.bfd DOC "GNU ld")
 
@@ -88,7 +88,7 @@
 
 - `--comp-dir-override=<string>`
 
-  Overrides DW_AT_comp_dir, and provides an alterantive base location, which is
+  Overrides DW_AT_comp_dir, and provides an alternative base location, which is
   used with DW_AT_dwo_name to construct a path to *.dwo files.
 
 - `--create-debug-names-section`
@@ -113,11 +113,6 @@
 
   Prints out offsets for abbrev and debug_info of Skeleton CUs that get patched.
 
-- `--deterministic-debuginfo`
-
-  Disables parallel execution of tasks that may produce nondeterministic debug
-  info
-
 - `--dot-tooltip-code`
 
   Add basic block instructions as tool tips on nodes
@@ -283,6 +278,12 @@
 
   List of functions to pad with amount of bytes
 
+- `--print-mappings`
+
+  Print mappings in the legend, between characters/blocks and text sections
+  (default false).
+
+
 - `--profile-format=<value>`
 
   Format to dump profile output in aggregation mode, default is fdata
@@ -680,6 +681,10 @@
   threshold means fewer functions to process. E.g threshold of 90 means only top
   10 percent of functions with profile will be processed.
 
+- `--match-with-call-graph`
+
+  Match functions with call graph
+
 - `--memcpy1-spec=<func1,func2:cs1:cs2,func3:cs1,...>`
 
   List of functions with call sites for which to specialize memcpy() for size 1
@@ -1240,4 +1245,4 @@
 
 - `--print-options`
 
-  Print non-default options after command line parsing
+  Print non-default options after command line parsing
@@ -1,9 +1,9 @@
 # Code Heatmaps
 
 BOLT has gained the ability to print code heatmaps based on
-sampling-based LBR profiles generated by `perf`. The output is produced
-in colored ASCII to be displayed in a color-capable terminal. It looks
-something like this:
+sampling-based profiles generated by `perf`, either with `LBR` data or not.
+The output is produced in colored ASCII to be displayed in a color-capable
+terminal. It looks something like this:
 
 ![](./Heatmap.png)
 
@@ -32,20 +32,64 @@ $ llvm-bolt-heatmap -p perf.data <executable>
 ```
 
 By default the heatmap will be dumped to *stdout*. You can change it
-with `-o <heatmapfile>` option. Each character/block in the heatmap
-shows the execution data accumulated for corresponding 64 bytes of
-code. You can change this granularity with a `-block-size` option.
-E.g. set it to 4096 to see code usage grouped by 4K pages.
-Other useful options are:
+with `-o <heatmapfile>` option.
 
-```bash
--line-size=<uint>   - number of entries per line (default 256)
--max-address=<uint> - maximum address considered valid for heatmap (default 4GB)
-```
 
 If you prefer to look at the data in a browser (or would like to share
 it that way), then you can use an HTML conversion tool. E.g.:
 
 ```bash
 $ aha -b -f <heatmapfile> > <heatmapfile>.html
 ```
+
+---
+
+## Background on heatmaps:
+A heatmap is effectively a histogram that is rendered into a grid for better
+visualization.
+In theory we can generate a heatmap using any binary and a perf profile.
+
+Each block/character in the heatmap shows the execution data accumulated for
+corresponding 64 bytes of code. You can change this granularity with a
+`-block-size` option.
+E.g. set it to 4096 to see code usage grouped by 4K pages.
+
+
+When a block is shown as a dot, it means that no samples were found for that
+address.
+When it is shown as a letter, it indicates a captured sample on a particular
+text section of the binary.
+To show a mapping between letters and text sections in the legend, use
+`-print-mappings`.
+When a sampled address does not belong to any of the text sections, the
+characters 'o' or 'O' will be shown.
+
+The legend shows by default the ranges in the heatmap according to the number
+of samples per block.
+A color is assigned per range, except the first two ranges that distinguished by
+lower and upper case letters.
+
+On the Y axis, each row/line starts with an actual address of the binary.
+Consecutive lines in the heatmap advance by the same amount, with the binary
+size covered by a line dependent on the block size and the line size.
+An empty new line is inserted for larger gaps between samples.
+
+On the X axis, the horizontally emitted hex numbers can help *estimate* where
+in the line the samples lie, but they cannot be combined to provide a full
+address, as they are relative to both the bucket and line sizes.
+
+In the example below, the highlighted `0x100` column is not an offset to each
+row's address, but instead, it points to the middle of the line.
+For the generation, the default bucket size was used with a line size of 128.
+
+
+![](./HeatmapHeader.png)
+
+
+Some useful options are:
+
+```
+-line-size=<uint>   - number of entries per line (default 256)
+-max-address=<uint> - maximum address considered valid for heatmap (default 4GB)
+-print-mappings     - print mappings in the legend, between characters/blocks and text sections (default false)
+```