Skip to content

[PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data #101224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalMerge.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ struct GlobalMergeOptions {
bool MergeConst = false;
/// Whether we should merge global variables that have external linkage.
bool MergeExternal = true;
/// Whether we should merge constant global variables.
bool MergeConstantGlobals = false;
/// Whether we should try to optimize for size only.
/// Currently, this applies a dead simple heuristic: only consider globals
/// used in minsize functions for merging.
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,8 @@ namespace llvm {
///
Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
bool OnlyOptimizeForSize = false,
bool MergeExternalByDefault = false);
bool MergeExternalByDefault = false,
bool MergeConstantByDefault = false);

/// This pass splits the stack into a safe stack and an unsafe stack to
/// protect against stack-based overflow vulnerabilities.
Expand Down
23 changes: 18 additions & 5 deletions llvm/lib/CodeGen/GlobalMerge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,13 @@ class GlobalMerge : public FunctionPass {
}

explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
bool OnlyOptimizeForSize, bool MergeExternalGlobals)
bool OnlyOptimizeForSize, bool MergeExternalGlobals,
bool MergeConstantGlobals)
: FunctionPass(ID), TM(TM) {
Opt.MaxOffset = MaximalOffset;
Opt.SizeOnly = OnlyOptimizeForSize;
Opt.MergeExternal = MergeExternalGlobals;
Opt.MergeConstantGlobals = MergeConstantGlobals;
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}

Expand Down Expand Up @@ -475,7 +477,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
auto &DL = M.getDataLayout();

LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
<< GlobalSet.find_first() << "\n");
<< GlobalSet.find_first() << ", total of " << Globals.size()
<< "\n");

bool Changed = false;
ssize_t i = GlobalSet.find_first();
Expand Down Expand Up @@ -551,6 +554,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
MergedGV->setAlignment(MaxAlign);
MergedGV->setSection(Globals[i]->getSection());

LLVM_DEBUG(dbgs() << "MergedGV: " << *MergedGV << "\n");

const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
Expand Down Expand Up @@ -700,6 +705,11 @@ bool GlobalMergeImpl::run(Module &M) {
else
Globals[{AddressSpace, Section}].push_back(&GV);
}
LLVM_DEBUG(dbgs() << "GV "
<< ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
? "to merge: "
: "not to merge: ")
<< GV << "\n");
}

for (auto &P : Globals)
Expand All @@ -710,7 +720,7 @@ bool GlobalMergeImpl::run(Module &M) {
if (P.second.size() > 1)
Changed |= doMerge(P.second, M, false, P.first.first);

if (EnableGlobalMergeOnConst)
if (Opt.MergeConstantGlobals)
for (auto &P : ConstGlobals)
if (P.second.size() > 1)
Changed |= doMerge(P.second, M, true, P.first.first);
Expand All @@ -720,8 +730,11 @@ bool GlobalMergeImpl::run(Module &M) {

Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
bool OnlyOptimizeForSize,
bool MergeExternalByDefault) {
bool MergeExternalByDefault,
bool MergeConstantByDefault) {
bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
MergeConstant);
}
13 changes: 13 additions & 0 deletions llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,15 @@ static cl::opt<bool> EnablePPCGenScalarMASSEntries(
"(scalar) entries"),
cl::Hidden);

static cl::opt<bool>
EnableGlobalMerge("ppc-global-merge", cl::Hidden, cl::init(false),
cl::desc("Enable the global merge pass"));

static cl::opt<unsigned>
GlobalMergeMaxOffset("ppc-global-merge-max-offset", cl::Hidden,
cl::init(0x7fff),
cl::desc("Maximum global merge offset"));

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
Expand Down Expand Up @@ -491,6 +500,10 @@ void PPCPassConfig::addIRPasses() {
}

bool PPCPassConfig::addPreISel() {
if (EnableGlobalMerge)
addPass(
createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true));

if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None)
addPass(createPPCMergeStringPoolPass());

Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/PowerPC/merge-private.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
; RUN: --check-prefix=AIX64
; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
; RUN: --check-prefix=AIX32
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
; RUN: --check-prefix=LINUX64LE
; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s \
; RUN: -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
; RUN: --check-prefix=LINUX64BE

@.str = private unnamed_addr constant [15 x i8] c"Private global\00", align 1
Expand All @@ -24,7 +24,7 @@ define dso_local void @print_func() {
; AIX64-NEXT: stdu r1, -128(r1)
; AIX64-NEXT: std r0, 144(r1)
; AIX64-NEXT: std r31, 120(r1) # 8-byte Folded Spill
; AIX64-NEXT: ld r31, L..C0(r2) # @__ModuleStringPool
; AIX64-NEXT: ld r31, L..C0(r2) # @_MergedGlobals
; AIX64-NEXT: mr r3, r31
; AIX64-NEXT: bl .puts[PR]
; AIX64-NEXT: nop
Expand All @@ -43,7 +43,7 @@ define dso_local void @print_func() {
; AIX32-NEXT: stwu r1, -64(r1)
; AIX32-NEXT: stw r0, 72(r1)
; AIX32-NEXT: stw r31, 60(r1) # 4-byte Folded Spill
; AIX32-NEXT: lwz r31, L..C0(r2) # @__ModuleStringPool
; AIX32-NEXT: lwz r31, L..C0(r2) # @_MergedGlobals
; AIX32-NEXT: mr r3, r31
; AIX32-NEXT: bl .puts[PR]
; AIX32-NEXT: nop
Expand All @@ -64,9 +64,9 @@ define dso_local void @print_func() {
; LINUX64LE-NEXT: .cfi_offset r30, -16
; LINUX64LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; LINUX64LE-NEXT: stdu r1, -48(r1)
; LINUX64LE-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha
; LINUX64LE-NEXT: addis r3, r2, .L_MergedGlobals@toc@ha
; LINUX64LE-NEXT: std r0, 64(r1)
; LINUX64LE-NEXT: addi r30, r3, .L__ModuleStringPool@toc@l
; LINUX64LE-NEXT: addi r30, r3, .L_MergedGlobals@toc@l
; LINUX64LE-NEXT: mr r3, r30
; LINUX64LE-NEXT: bl puts
; LINUX64LE-NEXT: nop
Expand All @@ -87,9 +87,9 @@ define dso_local void @print_func() {
; LINUX64BE-NEXT: .cfi_def_cfa_offset 128
; LINUX64BE-NEXT: .cfi_offset lr, 16
; LINUX64BE-NEXT: .cfi_offset r30, -16
; LINUX64BE-NEXT: addis r3, r2, .L__ModuleStringPool@toc@ha
; LINUX64BE-NEXT: addis r3, r2, .L_MergedGlobals@toc@ha
; LINUX64BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
; LINUX64BE-NEXT: addi r30, r3, .L__ModuleStringPool@toc@l
; LINUX64BE-NEXT: addi r30, r3, .L_MergedGlobals@toc@l
; LINUX64BE-NEXT: mr r3, r30
; LINUX64BE-NEXT: bl puts
; LINUX64BE-NEXT: nop
Expand Down
Loading