Skip to content

Commit 76b2915

Browse files
committed
[Clang][CMake] Use perf-training for Clang-BOLT
Leverage perf-training flow for BOLT profile collection, enabling reproducible BOLT optimization. Remove the use of bootstrapped build for profile collection. Test Plan: - Regular (single-stage) build ``` $ cmake ... -C .../clang/cmake/caches/BOLT.cmake $ ninja clang-bolt ... [21/24] Instrumenting clang binary with BOLT [21/24] Generating BOLT profile for Clang [23/24] Merging BOLT fdata Profile from 2 files merged. [24/24] Optimizing Clang with BOLT ... 1291202496 : executed instructions (-1.1%) 27005133 : taken branches (-71.5%) ... ``` - Two stage build (ThinLTO+InstPGO) ``` $ cmake ... -C .../clang/cmake/caches/BOLT.cmake -C .../clang/cmake/caches/BOLT-PGO.cmake $ ninja clang-bolt $ ninja stage2-clang-bolt ... [2756/2759] Instrumenting clang binary with BOLT [2756/2759] Generating BOLT profile for Clang [2758/2759] Merging BOLT fdata [2759/2759] Optimizing Clang with BOLT ... BOLT-INFO: 7092 out of 184104 functions in the binary (3.9%) have non-empty execution profile 756531927 : executed instructions (-0.5%) 15399400 : taken branches (-40.3%) ... ``` Reviewed By: beanz Differential Revision: https://reviews.llvm.org/D143553
1 parent c19c248 commit 76b2915

File tree

5 files changed

+60
-69
lines changed

5 files changed

+60
-69
lines changed

clang/CMakeLists.txt

Lines changed: 3 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -851,9 +851,8 @@ endif()
851851

852852
if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
853853
set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
854-
set(CLANGXX_PATH ${CLANG_PATH}++)
855854
set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
856-
set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
855+
set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
857856

858857
# Instrument clang with BOLT
859858
add_custom_target(clang-instrumented
@@ -863,73 +862,11 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
863862
DEPENDS clang llvm-bolt
864863
COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
865864
-instrument --instrumentation-file-append-pid
866-
--instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
865+
--instrumentation-file=${BOLT_FDATA}
867866
COMMENT "Instrumenting clang binary with BOLT"
868867
VERBATIM
869868
)
870869

871-
# Make a symlink from clang-bolt.inst to clang++-bolt.inst
872-
add_custom_target(clang++-instrumented
873-
DEPENDS ${CLANGXX_INSTRUMENTED}
874-
)
875-
add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
876-
DEPENDS clang-instrumented
877-
COMMAND ${CMAKE_COMMAND} -E create_symlink
878-
${CLANG_INSTRUMENTED}
879-
${CLANGXX_INSTRUMENTED}
880-
COMMENT "Creating symlink from BOLT instrumented clang to clang++"
881-
VERBATIM
882-
)
883-
884-
# Build specified targets with instrumented Clang to collect the profile
885-
set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
886-
set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
887-
set(build_configuration "$<CONFIG>")
888-
include(ExternalProject)
889-
ExternalProject_Add(bolt-instrumentation-profile
890-
DEPENDS clang++-instrumented
891-
PREFIX bolt-instrumentation-profile
892-
SOURCE_DIR ${CMAKE_SOURCE_DIR}
893-
STAMP_DIR ${STAMP_DIR}
894-
BINARY_DIR ${BINARY_DIR}
895-
EXCLUDE_FROM_ALL 1
896-
CMAKE_ARGS
897-
${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
898-
# We shouldn't need to set this here, but INSTALL_DIR doesn't
899-
# seem to work, so instead I'm passing this through
900-
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
901-
-DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
902-
-DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
903-
-DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
904-
-DCMAKE_ASM_COMPILER_ID=Clang
905-
-DCMAKE_BUILD_TYPE=Release
906-
-DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
907-
-DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
908-
BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
909-
--config ${build_configuration}
910-
--target ${CLANG_BOLT_INSTRUMENT_TARGETS}
911-
INSTALL_COMMAND ""
912-
STEP_TARGETS configure build
913-
USES_TERMINAL_CONFIGURE 1
914-
USES_TERMINAL_BUILD 1
915-
USES_TERMINAL_INSTALL 1
916-
)
917-
918-
# Merge profiles into one using merge-fdata
919-
add_custom_target(clang-bolt-profile
920-
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
921-
)
922-
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
923-
DEPENDS merge-fdata bolt-instrumentation-profile-build
924-
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
925-
COMMAND ${Python3_EXECUTABLE}
926-
${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata
927-
$<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
928-
${CMAKE_CURRENT_BINARY_DIR}
929-
COMMENT "Preparing BOLT profile"
930-
VERBATIM
931-
)
932-
933870
# Optimize original (pre-bolt) Clang using the collected profile
934871
set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
935872
add_custom_target(clang-bolt
@@ -939,7 +876,7 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
939876
DEPENDS clang-bolt-profile
940877
COMMAND llvm-bolt ${CLANG_PATH}
941878
-o ${CLANG_OPTIMIZED}
942-
-data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
879+
-data ${BOLT_FDATA}
943880
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
944881
-split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
945882
COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>

clang/cmake/caches/BOLT.cmake

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
set(CMAKE_BUILD_TYPE Release CACHE STRING "")
22
set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
3-
set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
4-
set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
53
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
6-
set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
74

85
set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
96
set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")

clang/utils/perf-training/CMakeLists.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,26 @@ if(APPLE AND DTRACE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
6161
COMMENT "Generating order file"
6262
DEPENDS generate-dtrace-logs)
6363
endif()
64+
65+
if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
66+
configure_lit_site_cfg(
67+
${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
68+
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
69+
)
70+
71+
add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
72+
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
73+
EXCLUDE_FROM_CHECK_ALL
74+
DEPENDS clang-instrumented clear-bolt-fdata
75+
)
76+
77+
add_custom_target(clear-bolt-fdata
78+
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
79+
COMMENT "Clearing old BOLT fdata")
80+
81+
# Merge profiles into one using merge-fdata
82+
add_custom_target(clang-bolt-profile
83+
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
84+
COMMENT "Merging BOLT fdata"
85+
DEPENDS merge-fdata generate-bolt-fdata)
86+
endif()
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# -*- Python -*-
2+
3+
from lit import Test
4+
import lit.formats
5+
import lit.util
6+
import os
7+
import subprocess
8+
9+
config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
10+
11+
config.name = 'Clang Perf Training'
12+
config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
13+
14+
use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
15+
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
16+
config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
17+
config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
18+
config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
19+
config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
20+
config.substitutions.append( ('%test_root', config.test_exec_root ) )
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
@LIT_SITE_CFG_IN_HEADER@
2+
3+
import sys
4+
5+
config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@")
6+
config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@"
7+
config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
8+
config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
9+
config.target_triple = "@LLVM_TARGET_TRIPLE@"
10+
config.python_exe = "@Python3_EXECUTABLE@"
11+
config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
12+
13+
# Let the main config do the real work.
14+
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")

0 commit comments

Comments
 (0)