Skip to content

Commit d23c5c2

Browse files
authored
[CGData] Global Merge Functions (#112671)
This implements a global function merging pass. Unlike traditional function merging passes that use IR comparators, this pass employs a structurally stable hash to identify similar functions while ignoring certain constant operands. These ignored constants are tracked and encoded into a stable function summary. When merging, instead of explicitly folding similar functions and their call sites, we form a merging instance by supplying different parameters via thunks. The actual size reduction occurs when identically created merging instances are folded by the linker. Currently, this pass is wired to a pre-codegen pass, enabled by the `-enable-global-merge-func` flag. In a local merging mode, the analysis and merging steps occur sequentially within a module: - `analyze`: Collects stable function hashes and tracks locations of ignored constant operands. - `finalize`: Identifies merge candidates with matching hashes and computes the set of parameters that point to different constants. - `merge`: Uses the stable function map to optimistically create a merged function. We can enable a global merging mode similar to the global function outliner (https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-2-thinlto-nolto/78753/), which will perform the above steps separately. - `-codegen-data-generate`: During the first round of code generation, we analyze local merging instances and publish their summaries. - Offline using `llvm-cgdata` or at link-time, we can finalize all these merging summaries that are combined to determine parameters. - `-codegen-data-use`: During the second round of code generation, we optimistically create merging instances within each module, and finally, the linker folds identically created merging instances. Depends on #112664 This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent 6e614e1 commit d23c5c2

29 files changed

+1197
-11
lines changed

llvm/include/llvm/CGData/CodeGenData.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ class CodeGenData {
145145
const OutlinedHashTree *getOutlinedHashTree() {
146146
return PublishedHashTree.get();
147147
}
148+
const StableFunctionMap *getStableFunctionMap() {
149+
return PublishedStableFunctionMap.get();
150+
}
148151

149152
/// Returns true if we should write codegen data.
150153
bool emitCGData() { return EmitCGData; }
@@ -169,10 +172,18 @@ inline bool hasOutlinedHashTree() {
169172
return CodeGenData::getInstance().hasOutlinedHashTree();
170173
}
171174

175+
inline bool hasStableFunctionMap() {
176+
return CodeGenData::getInstance().hasStableFunctionMap();
177+
}
178+
172179
inline const OutlinedHashTree *getOutlinedHashTree() {
173180
return CodeGenData::getInstance().getOutlinedHashTree();
174181
}
175182

183+
inline const StableFunctionMap *getStableFunctionMap() {
184+
return CodeGenData::getInstance().getStableFunctionMap();
185+
}
186+
176187
inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
177188

178189
inline void

llvm/include/llvm/CGData/StableFunctionMap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ struct StableFunctionMap {
110110
size_t size(SizeType Type = UniqueHashCount) const;
111111

112112
/// Finalize the stable function map by trimming content.
113-
void finalize();
113+
void finalize(bool SkipTrim = false);
114114

115115
private:
116116
/// Insert a `StableFunctionEntry` into the function map directly. This

llvm/include/llvm/CGData/StableFunctionMapRecord.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct StableFunctionMapRecord {
4949
void deserializeYAML(yaml::Input &YIS);
5050

5151
/// Finalize the stable function map by trimming content.
52-
void finalize() { FunctionMap->finalize(); }
52+
void finalize(bool SkipTrim = false) { FunctionMap->finalize(SkipTrim); }
5353

5454
/// Merge the stable function map into this one.
5555
void merge(const StableFunctionMapRecord &Other) {
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass defines the implementation of a function merging mechanism
10+
// that utilizes a stable function hash to track differences in constants and
11+
// identify potential merge candidates. The process involves two rounds:
12+
// 1. The first round collects stable function hashes and identifies merge
13+
// candidates with matching hashes. It also computes the set of parameters
14+
// that point to different constants during the stable function merge.
15+
// 2. The second round leverages this collected global function information to
16+
// optimistically create a merged function in each module context, ensuring
17+
// correct transformation.
18+
// Similar to the global outliner, this approach uses the linker's deduplication
19+
// (ICF) to fold identical merged functions, thereby reducing the final binary
20+
// size. The work is inspired by the concepts discussed in the following paper:
21+
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
22+
//
23+
//===----------------------------------------------------------------------===//
24+
25+
#ifndef LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H
26+
#define LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H
27+
28+
#include "llvm/CGData/StableFunctionMap.h"
29+
#include "llvm/IR/Module.h"
30+
#include "llvm/IR/PassManager.h"
31+
#include "llvm/Pass.h"
32+
33+
enum class HashFunctionMode {
34+
Local,
35+
BuildingHashFuncion,
36+
UsingHashFunction,
37+
};
38+
39+
namespace llvm {
40+
41+
// A vector of locations (the pair of (instruction, operand) indices) reachable
42+
// from a parameter.
43+
using ParamLocs = SmallVector<IndexPair, 4>;
44+
// A vector of parameters
45+
using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
46+
47+
/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
48+
/// using stable function hashes. It identifies and merges functions with
49+
/// matching hashes across modules to optimize binary size.
50+
class GlobalMergeFunc {
51+
HashFunctionMode MergerMode = HashFunctionMode::Local;
52+
53+
std::unique_ptr<StableFunctionMap> LocalFunctionMap;
54+
55+
const ModuleSummaryIndex *Index;
56+
57+
public:
58+
/// The suffix used to identify the merged function that parameterizes
59+
/// the constant values. Note that the original function, without this suffix,
60+
/// becomes a thunk supplying contexts to the merged function via parameters.
61+
static constexpr const char MergingInstanceSuffix[] = ".Tgm";
62+
63+
GlobalMergeFunc(const ModuleSummaryIndex *Index) : Index(Index) {};
64+
65+
void initializeMergerMode(const Module &M);
66+
67+
bool run(Module &M);
68+
69+
/// Analyze module to create stable function into LocalFunctionMap.
70+
void analyze(Module &M);
71+
72+
/// Emit LocalFunctionMap into __llvm_merge section.
73+
void emitFunctionMap(Module &M);
74+
75+
/// Merge functions in the module using the given function map.
76+
bool merge(Module &M, const StableFunctionMap *FunctionMap);
77+
};
78+
79+
/// Global function merging pass for new pass manager.
80+
struct GlobalMergeFuncPass : public PassInfoMixin<GlobalMergeFuncPass> {
81+
PreservedAnalyses run(Module &M, AnalysisManager<Module> &);
82+
};
83+
84+
} // end namespace llvm
85+
#endif // LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,9 @@ namespace llvm {
507507
/// This pass frees the memory occupied by the MachineFunction.
508508
FunctionPass *createFreeMachineFunctionPass();
509509

510+
/// This pass performs merging similar functions globally.
511+
ModulePass *createGlobalMergeFuncPass();
512+
510513
/// This pass performs outlining on machine instructions directly before
511514
/// printing assembly.
512515
ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ void initializeGCEmptyBasicBlocksPass(PassRegistry &);
123123
void initializeGCMachineCodeAnalysisPass(PassRegistry &);
124124
void initializeGCModuleInfoPass(PassRegistry &);
125125
void initializeGVNLegacyPassPass(PassRegistry &);
126+
void initializeGlobalMergeFuncPassWrapperPass(PassRegistry &);
126127
void initializeGlobalMergePass(PassRegistry &);
127128
void initializeGlobalsAAWrapperPassPass(PassRegistry &);
128129
void initializeHardwareLoopsLegacyPass(PassRegistry &);

llvm/include/llvm/LinkAllPasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ struct ForcePassLinking {
7979
(void)llvm::createDomOnlyViewerWrapperPassPass();
8080
(void)llvm::createDomViewerWrapperPassPass();
8181
(void)llvm::createAlwaysInlinerLegacyPass();
82+
(void)llvm::createGlobalMergeFuncPass();
8283
(void)llvm::createGlobalsAAWrapperPass();
8384
(void)llvm::createInstSimplifyLegacyPass();
8485
(void)llvm::createInstructionCombiningPass();

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "llvm/CodeGen/FinalizeISel.h"
3636
#include "llvm/CodeGen/GCMetadata.h"
3737
#include "llvm/CodeGen/GlobalMerge.h"
38+
#include "llvm/CodeGen/GlobalMergeFunctions.h"
3839
#include "llvm/CodeGen/IndirectBrExpand.h"
3940
#include "llvm/CodeGen/InterleavedAccess.h"
4041
#include "llvm/CodeGen/InterleavedLoadCombine.h"
@@ -713,6 +714,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addIRPasses(
713714
// Convert conditional moves to conditional jumps when profitable.
714715
if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize)
715716
addPass(SelectOptimizePass(&TM));
717+
718+
if (Opt.EnableGlobalMergeFunc)
719+
addPass(GlobalMergeFuncPass());
716720
}
717721

718722
/// Turn exception handling constructs into something the code generators can

llvm/include/llvm/Passes/MachinePassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass())
2929
MODULE_PASS("lower-emutls", LowerEmuTLSPass())
3030
MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass())
3131
MODULE_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass())
32+
MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
3233
#undef MODULE_PASS
3334

3435
#ifndef FUNCTION_ANALYSIS

llvm/include/llvm/Target/CGPassBuilderOption.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ struct CGPassBuilderOption {
3131
bool DisableVerify = false;
3232
bool EnableImplicitNullChecks = false;
3333
bool EnableBlockPlacementStats = false;
34+
bool EnableGlobalMergeFunc = false;
3435
bool EnableMachineFunctionSplitter = false;
3536
bool MISchedPostRA = false;
3637
bool EarlyLiveIntervals = false;

llvm/lib/CGData/StableFunctionMap.cpp

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,43 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
#include "llvm/CGData/StableFunctionMap.h"
17+
#include "llvm/Support/CommandLine.h"
18+
#include "llvm/Support/Debug.h"
1719

1820
#define DEBUG_TYPE "stable-function-map"
1921

2022
using namespace llvm;
2123

24+
static cl::opt<unsigned>
25+
GlobalMergingMinMerges("global-merging-min-merges",
26+
cl::desc("Minimum number of similar functions with "
27+
"the same hash required for merging."),
28+
cl::init(2), cl::Hidden);
29+
static cl::opt<unsigned> GlobalMergingMinInstrs(
30+
"global-merging-min-instrs",
31+
cl::desc("The minimum instruction count required when merging functions."),
32+
cl::init(1), cl::Hidden);
33+
static cl::opt<unsigned> GlobalMergingMaxParams(
34+
"global-merging-max-params",
35+
cl::desc(
36+
"The maximum number of parameters allowed when merging functions."),
37+
cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
38+
static cl::opt<unsigned> GlobalMergingParamOverhead(
39+
"global-merging-param-overhead",
40+
cl::desc("The overhead cost associated with each parameter when merging "
41+
"functions."),
42+
cl::init(2), cl::Hidden);
43+
static cl::opt<unsigned>
44+
GlobalMergingCallOverhead("global-merging-call-overhead",
45+
cl::desc("The overhead cost associated with each "
46+
"function call when merging functions."),
47+
cl::init(1), cl::Hidden);
48+
static cl::opt<unsigned> GlobalMergingExtraThreshold(
49+
"global-merging-extra-threshold",
50+
cl::desc("An additional cost threshold that must be exceeded for merging "
51+
"to be considered beneficial."),
52+
cl::init(0), cl::Hidden);
53+
2254
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
2355
auto It = NameToId.find(Name);
2456
if (It != NameToId.end())
@@ -117,7 +149,38 @@ static void removeIdenticalIndexPair(
117149
SF->IndexOperandHashMap->erase(Pair);
118150
}
119151

120-
void StableFunctionMap::finalize() {
152+
static bool isProfitable(
153+
const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
154+
&SFS) {
155+
unsigned StableFunctionCount = SFS.size();
156+
if (StableFunctionCount < GlobalMergingMinMerges)
157+
return false;
158+
159+
unsigned InstCount = SFS[0]->InstCount;
160+
if (InstCount < GlobalMergingMinInstrs)
161+
return false;
162+
163+
unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
164+
if (ParamCount > GlobalMergingMaxParams)
165+
return false;
166+
167+
unsigned Benefit = InstCount * (StableFunctionCount - 1);
168+
unsigned Cost =
169+
(GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
170+
StableFunctionCount +
171+
GlobalMergingExtraThreshold;
172+
173+
bool Result = Benefit > Cost;
174+
LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", "
175+
<< "StableFunctionCount = " << StableFunctionCount
176+
<< ", InstCount = " << InstCount
177+
<< ", ParamCount = " << ParamCount
178+
<< ", Benefit = " << Benefit << ", Cost = " << Cost
179+
<< ", Result = " << (Result ? "true" : "false") << "\n");
180+
return Result;
181+
}
182+
183+
void StableFunctionMap::finalize(bool SkipTrim) {
121184
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
122185
auto &[StableHash, SFS] = *It;
123186

@@ -158,9 +221,15 @@ void StableFunctionMap::finalize() {
158221
continue;
159222
}
160223

224+
if (SkipTrim)
225+
continue;
226+
161227
// Trim the index pair that has the same operand hash across
162228
// stable functions.
163229
removeIdenticalIndexPair(SFS);
230+
231+
if (!isProfitable(SFS))
232+
HashToFuncs.erase(It);
164233
}
165234

166235
Finalized = true;

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ add_llvm_component_library(LLVMCodeGen
7171
GCMetadataPrinter.cpp
7272
GCRootLowering.cpp
7373
GlobalMerge.cpp
74+
GlobalMergeFunctions.cpp
7475
HardwareLoops.cpp
7576
IfConversion.cpp
7677
ImplicitNullChecks.cpp

0 commit comments

Comments
 (0)