Skip to content

Commit 1ca9ce9

Browse files
committed
[Clang][CMake] Support perf, LBR, and Instrument CLANG_BOLT options
Split up and refactor CLANG_BOLT_INSTRUMENT into support for perf no-LBR and perf with LBR profiling modes. Differential Revision: https://reviews.llvm.org/D143617
1 parent 446e11a commit 1ca9ce9

File tree

6 files changed

+167
-26
lines changed

6 files changed

+167
-26
lines changed

clang/CMakeLists.txt

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -850,23 +850,38 @@ if (CLANG_ENABLE_BOOTSTRAP)
850850
endforeach()
851851
endif()
852852

853-
if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
853+
set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
854+
May be specified as Instrument or Perf or LBR to use a particular profiling \
855+
mechanism.")
856+
string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
857+
858+
if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
854859
set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
855-
set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
860+
set(CLANG_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CLANG_BOLT_INSTRUMENTED})
856861
set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
857862

858-
# Instrument clang with BOLT
859-
add_custom_target(clang-instrumented
860-
DEPENDS ${CLANG_INSTRUMENTED}
861-
)
862-
add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
863-
DEPENDS clang llvm-bolt
864-
COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
865-
-instrument --instrumentation-file-append-pid
866-
--instrumentation-file=${BOLT_FDATA}
867-
COMMENT "Instrumenting clang binary with BOLT"
868-
VERBATIM
869-
)
863+
# Pass extra flag in no-LBR mode
864+
if (uppercase_CLANG_BOLT STREQUAL "PERF")
865+
set(BOLT_NO_LBR "-nl")
866+
endif()
867+
868+
if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
869+
# Instrument clang with BOLT
870+
add_custom_target(clang-instrumented
871+
DEPENDS ${CLANG_INSTRUMENTED}
872+
)
873+
add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
874+
DEPENDS clang llvm-bolt
875+
COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
876+
-instrument --instrumentation-file-append-pid
877+
--instrumentation-file=${BOLT_FDATA}
878+
COMMENT "Instrumenting clang binary with BOLT"
879+
VERBATIM
880+
)
881+
add_custom_target(clang-bolt-training-deps DEPENDS clang-instrumented)
882+
else() # perf or LBR
883+
add_custom_target(clang-bolt-training-deps DEPENDS clang)
884+
endif()
870885

871886
# Optimize original (pre-bolt) Clang using the collected profile
872887
set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
@@ -880,6 +895,7 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
880895
-data ${BOLT_FDATA}
881896
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
882897
-split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
898+
${BOLT_NO_LBR}
883899
COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>
884900
COMMENT "Optimizing Clang with BOLT"
885901
VERBATIM

clang/cmake/caches/BOLT.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set(CMAKE_BUILD_TYPE Release CACHE STRING "")
2-
set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
2+
set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
33
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
44

55
set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")

clang/utils/perf-training/CMakeLists.txt

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ if(APPLE AND DTRACE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
6262
DEPENDS generate-dtrace-logs)
6363
endif()
6464

65-
if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
65+
if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
66+
set(CLANG_BOLT_INSTRUMENTED "clang-bolt.inst" CACHE STRING
67+
"Name of BOLT-instrumented Clang binary")
6668
configure_lit_site_cfg(
6769
${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
6870
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
@@ -71,16 +73,37 @@ if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
7173
add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
7274
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
7375
EXCLUDE_FROM_CHECK_ALL
74-
DEPENDS clang-instrumented clear-bolt-fdata
76+
DEPENDS clang-bolt-training-deps clear-bolt-fdata clear-perf-data
7577
)
7678

7779
add_custom_target(clear-bolt-fdata
7880
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
7981
COMMENT "Clearing old BOLT fdata")
8082

83+
add_custom_target(clear-perf-data
84+
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
85+
COMMENT "Clearing old perf data")
86+
87+
string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
88+
if (uppercase_CLANG_BOLT STREQUAL "LBR")
89+
set(BOLT_LBR "--lbr")
90+
endif()
91+
92+
add_custom_target(merge-fdata-deps)
93+
if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
94+
add_dependencies(merge-fdata-deps generate-bolt-fdata)
95+
else()
96+
# Convert perf profiles into fdata
97+
add_custom_target(convert-perf-fdata
98+
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py perf2bolt $<TARGET_FILE:llvm-bolt> ${CMAKE_CURRENT_BINARY_DIR} $<TARGET_FILE:clang> ${BOLT_LBR}
99+
COMMENT "Converting perf files to BOLT fdata"
100+
DEPENDS llvm-bolt generate-bolt-fdata)
101+
add_dependencies(merge-fdata-deps convert-perf-fdata)
102+
endif()
103+
81104
# Merge profiles into one using merge-fdata
82105
add_custom_target(clang-bolt-profile
83106
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
84107
COMMENT "Merging BOLT fdata"
85-
DEPENDS merge-fdata generate-bolt-fdata)
108+
DEPENDS merge-fdata merge-fdata-deps)
86109
endif()

clang/utils/perf-training/bolt.lit.cfg

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,52 @@ import lit.util
66
import os
77
import subprocess
88

9-
config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
9+
clang_binary = "clang"
10+
perf_wrapper = ""
11+
if config.clang_bolt_mode.lower() == "instrument":
12+
clang_binary = config.clang_bolt_name
13+
else: # perf or LBR
14+
perf_wrapper = "%s %s/perf-helper.py perf" % (
15+
config.python_exe,
16+
config.perf_helper_dir,
17+
)
18+
if config.clang_bolt_mode.lower() == "lbr":
19+
perf_wrapper += " --lbr"
20+
perf_wrapper += " -- "
1021

11-
config.name = 'Clang Perf Training'
12-
config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
22+
config.clang = os.path.realpath(
23+
lit.util.which(clang_binary, config.clang_tools_dir)
24+
).replace("\\", "/")
25+
26+
config.name = "Clang Perf Training"
27+
config.suffixes = [
28+
".c",
29+
".cc",
30+
".cpp",
31+
".m",
32+
".mm",
33+
".cu",
34+
".ll",
35+
".cl",
36+
".s",
37+
".S",
38+
".modulemap",
39+
".test",
40+
]
1341

1442
use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
1543
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
16-
config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
17-
config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
18-
config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
19-
config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
20-
config.substitutions.append( ('%test_root', config.test_exec_root ) )
44+
config.substitutions.append(
45+
(
46+
"%clang_cpp_skip_driver",
47+
" %s %s --driver-mode=g++ " % (perf_wrapper, config.clang),
48+
)
49+
)
50+
config.substitutions.append(
51+
("%clang_cpp", " %s %s --driver-mode=g++ " % (perf_wrapper, config.clang))
52+
)
53+
config.substitutions.append(
54+
("%clang_skip_driver", " %s %s " % (perf_wrapper, config.clang))
55+
)
56+
config.substitutions.append(("%clang", " %s %s " % (perf_wrapper, config.clang)))
57+
config.substitutions.append(("%test_root", config.test_exec_root))

clang/utils/perf-training/bolt.lit.site.cfg.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ config.test_source_root = "@CLANG_PGO_TRAINING_DATA@"
99
config.target_triple = "@LLVM_TARGET_TRIPLE@"
1010
config.python_exe = "@Python3_EXECUTABLE@"
1111
config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
12+
config.clang_bolt_mode = "@CLANG_BOLT@"
13+
config.clang_bolt_name = "@CLANG_BOLT_INSTRUMENTED@"
1214

1315
# Let the main config do the real work.
1416
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")

clang/utils/perf-training/perf-helper.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,67 @@ def merge_fdata(args):
6767
return 0
6868

6969

70+
def perf(args):
71+
parser = argparse.ArgumentParser(
72+
prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
73+
)
74+
parser.add_argument(
75+
"--lbr", action="store_true", help="Use perf with branch stacks"
76+
)
77+
parser.add_argument("cmd", nargs="*", help="")
78+
79+
# Use python's arg parser to handle all leading option arguments, but pass
80+
# everything else through to perf
81+
first_cmd = next(arg for arg in args if not arg.startswith("--"))
82+
last_arg_idx = args.index(first_cmd)
83+
84+
opts = parser.parse_args(args[:last_arg_idx])
85+
cmd = args[last_arg_idx:]
86+
87+
perf_args = [
88+
"perf",
89+
"record",
90+
"--event=cycles:u",
91+
"--freq=max",
92+
"--output=%d.perf.data" % os.getpid(),
93+
]
94+
if opts.lbr:
95+
perf_args += ["--branch-filter=any,u"]
96+
perf_args.extend(cmd)
97+
98+
start_time = time.time()
99+
subprocess.check_call(perf_args)
100+
101+
elapsed = time.time() - start_time
102+
print("... data collection took %.4fs" % elapsed)
103+
return 0
104+
105+
106+
def perf2bolt(args):
107+
parser = argparse.ArgumentParser(
108+
prog="perf-helper perf2bolt",
109+
description="perf2bolt conversion wrapper for perf.data files",
110+
)
111+
parser.add_argument("bolt", help="Path to llvm-bolt")
112+
parser.add_argument("path", help="Path containing perf.data files")
113+
parser.add_argument("binary", help="Input binary")
114+
parser.add_argument("--lbr", action="store_true", help="Use LBR perf2bolt mode")
115+
opts = parser.parse_args(args)
116+
117+
p2b_args = [
118+
opts.bolt,
119+
opts.binary,
120+
"--aggregate-only",
121+
"--profile-format=yaml",
122+
]
123+
if not opts.lbr:
124+
p2b_args += ["-nl"]
125+
p2b_args += ["-p"]
126+
for filename in findFilesWithExtension(opts.path, "perf.data"):
127+
subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
128+
return 0
129+
130+
70131
def dtrace(args):
71132
parser = argparse.ArgumentParser(
72133
prog="perf-helper dtrace",
@@ -507,6 +568,8 @@ def genOrderFile(args):
507568
"cc1": cc1,
508569
"gen-order-file": genOrderFile,
509570
"merge-fdata": merge_fdata,
571+
"perf": perf,
572+
"perf2bolt": perf2bolt,
510573
}
511574

512575

0 commit comments

Comments
 (0)