Skip to content

Commit 61b4702

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Fold constants into G_GLOBAL_VALUE
This is pretty much just ports `performGlobalAddressCombine` from AArch64ISelLowering. (AArch64 doesn't use the generic DAG combine for this.) This adds a pre-legalize combine which looks for this pattern: ``` %g = G_GLOBAL_VALUE @x %ptr1 = G_PTR_ADD %g, cst1 %ptr2 = G_PTR_ADD %g, cst2 ... %ptrN = G_PTR_ADD %g, cstN ``` And then, if possible, transforms it like so: ``` %g = G_GLOBAL_VALUE @x %offset_g = G_PTR_ADD %g, -min(cst) %ptr1 = G_PTR_ADD %offset_g, cst1 %ptr2 = G_PTR_ADD %offset_g, cst2 ... %ptrN = G_PTR_ADD %offset_g, cstN ``` Where min(cst) is the smallest out of the G_PTR_ADD constants. This means we should save at least one G_PTR_ADD. This also updates code in the legalizer + selector which assumes that G_GLOBAL_VALUE will never have an offset and adds/updates relevant tests. Differential Revision: https://reviews.llvm.org/D96624
1 parent 60a5533 commit 61b4702

12 files changed

+955
-50
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,20 @@ def icmp_redundant_trunc : GICombineRule<
2424
[{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
2525
(apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
2626

27+
// AArch64-specific offset folding for G_GLOBAL_VALUE.
28+
def fold_global_offset_matchdata : GIDefMatchData<"std::pair<uint64_t, uint64_t>">;
29+
def fold_global_offset : GICombineRule<
30+
(defs root:$root, fold_global_offset_matchdata:$matchinfo),
31+
(match (wip_match_opcode G_GLOBAL_VALUE):$root,
32+
[{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]),
33+
(apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
34+
>;
35+
2736
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
2837
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
2938
fconstant_to_constant,
30-
icmp_redundant_trunc]> {
39+
icmp_redundant_trunc,
40+
fold_global_offset]> {
3141
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
3242
let StateClass = "AArch64PreLegalizerCombinerHelperState";
3343
let AdditionalArguments = [];

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5628,8 +5628,10 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
56285628
return None;
56295629

56305630
// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5631-
// TODO: Need to check GV's offset % size if doing offset folding into globals.
5632-
assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5631+
auto Offset = Adrp.getOperand(1).getOffset();
5632+
if (Offset % Size != 0)
5633+
return None;
5634+
56335635
auto GV = Adrp.getOperand(1).getGlobal();
56345636
if (GV->isThreadLocal())
56355637
return None;
@@ -5643,7 +5645,7 @@ AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
56435645
Register AdrpReg = Adrp.getOperand(0).getReg();
56445646
return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
56455647
[=](MachineInstrBuilder &MIB) {
5646-
MIB.addGlobalAddress(GV, /* Offset */ 0,
5648+
MIB.addGlobalAddress(GV, Offset,
56475649
OpFlags | AArch64II::MO_PAGEOFF |
56485650
AArch64II::MO_NC);
56495651
}}};

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
781781
// G_ADD_LOW instructions.
782782
// By splitting this here, we can optimize accesses in the small code model by
783783
// folding in the G_ADD_LOW into the load/store offset.
784-
auto GV = MI.getOperand(1).getGlobal();
784+
auto &GlobalOp = MI.getOperand(1);
785+
const auto* GV = GlobalOp.getGlobal();
785786
if (GV->isThreadLocal())
786787
return true; // Don't want to modify TLS vars.
787788

@@ -791,9 +792,10 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
791792
if (OpFlags & AArch64II::MO_GOT)
792793
return true;
793794

795+
auto Offset = GlobalOp.getOffset();
794796
Register DstReg = MI.getOperand(0).getReg();
795797
auto ADRP = MIRBuilder.buildInstr(AArch64::ADRP, {LLT::pointer(0, 64)}, {})
796-
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
798+
.addGlobalAddress(GV, Offset, OpFlags | AArch64II::MO_PAGE);
797799
// Set the regclass on the dest reg too.
798800
MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass);
799801

@@ -811,6 +813,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
811813
// binary must also be loaded into address range [0, 2^48). Both of these
812814
// properties need to be ensured at runtime when using tagged addresses.
813815
if (OpFlags & AArch64II::MO_TAGGED) {
816+
assert(!Offset &&
817+
"Should not have folded in an offset for a tagged global!");
814818
ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP})
815819
.addGlobalAddress(GV, 0x100000000,
816820
AArch64II::MO_PREL | AArch64II::MO_G3)
@@ -819,7 +823,7 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
819823
}
820824

821825
MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP})
822-
.addGlobalAddress(GV, 0,
826+
.addGlobalAddress(GV, Offset,
823827
OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
824828
MI.eraseFromParent();
825829
return true;

llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,116 @@ static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
107107
return true;
108108
}
109109

110+
/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
111+
///
112+
/// e.g.
113+
///
114+
/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
115+
static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
116+
std::pair<uint64_t, uint64_t> &MatchInfo) {
117+
assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
118+
MachineFunction &MF = *MI.getMF();
119+
auto &GlobalOp = MI.getOperand(1);
120+
auto *GV = GlobalOp.getGlobal();
121+
122+
// Don't allow anything that could represent offsets etc.
123+
if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference(
124+
GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
125+
return false;
126+
127+
// Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
128+
//
129+
// %g = G_GLOBAL_VALUE @x
130+
// %ptr1 = G_PTR_ADD %g, cst1
131+
// %ptr2 = G_PTR_ADD %g, cst2
132+
// ...
133+
// %ptrN = G_PTR_ADD %g, cstN
134+
//
135+
// Identify the *smallest* constant. We want to be able to form this:
136+
//
137+
// %offset_g = G_GLOBAL_VALUE @x + min_cst
138+
// %g = G_PTR_ADD %offset_g, -min_cst
139+
// %ptr1 = G_PTR_ADD %g, cst1
140+
// ...
141+
Register Dst = MI.getOperand(0).getReg();
142+
uint64_t MinOffset = -1ull;
143+
for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
144+
if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
145+
return false;
146+
auto Cst =
147+
getConstantVRegValWithLookThrough(UseInstr.getOperand(2).getReg(), MRI);
148+
if (!Cst)
149+
return false;
150+
MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
151+
}
152+
153+
// Require that the new offset is larger than the existing one to avoid
154+
// infinite loops.
155+
uint64_t CurrOffset = GlobalOp.getOffset();
156+
uint64_t NewOffset = MinOffset + CurrOffset;
157+
if (NewOffset <= CurrOffset)
158+
return false;
159+
160+
// Check whether folding this offset is legal. It must not go out of bounds of
161+
// the referenced object to avoid violating the code model, and must be
162+
// smaller than 2^21 because this is the largest offset expressible in all
163+
// object formats.
164+
//
165+
// This check also prevents us from folding negative offsets, which will end
166+
// up being treated in the same way as large positive ones. They could also
167+
// cause code model violations, and aren't really common enough to matter.
168+
if (NewOffset >= (1 << 21))
169+
return false;
170+
171+
Type *T = GV->getValueType();
172+
if (!T->isSized() ||
173+
NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
174+
return false;
175+
MatchInfo = std::make_pair(NewOffset, MinOffset);
176+
return true;
177+
}
178+
179+
static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
180+
MachineIRBuilder &B,
181+
GISelChangeObserver &Observer,
182+
std::pair<uint64_t, uint64_t> &MatchInfo) {
183+
// Change:
184+
//
185+
// %g = G_GLOBAL_VALUE @x
186+
// %ptr1 = G_PTR_ADD %g, cst1
187+
// %ptr2 = G_PTR_ADD %g, cst2
188+
// ...
189+
// %ptrN = G_PTR_ADD %g, cstN
190+
//
191+
// To:
192+
//
193+
// %offset_g = G_GLOBAL_VALUE @x + min_cst
194+
// %g = G_PTR_ADD %offset_g, -min_cst
195+
// %ptr1 = G_PTR_ADD %g, cst1
196+
// ...
197+
// %ptrN = G_PTR_ADD %g, cstN
198+
//
199+
// Then, the original G_PTR_ADDs should be folded later on so that they look
200+
// like this:
201+
//
202+
// %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
203+
uint64_t Offset, MinOffset;
204+
std::tie(Offset, MinOffset) = MatchInfo;
205+
B.setInstrAndDebugLoc(MI);
206+
Observer.changingInstr(MI);
207+
auto &GlobalOp = MI.getOperand(1);
208+
auto *GV = GlobalOp.getGlobal();
209+
GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
210+
Register Dst = MI.getOperand(0).getReg();
211+
Register NewGVDst = MRI.cloneVirtualRegister(Dst);
212+
MI.getOperand(0).setReg(NewGVDst);
213+
Observer.changedInstr(MI);
214+
B.buildPtrAdd(
215+
Dst, NewGVDst,
216+
B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
217+
return true;
218+
}
219+
110220
class AArch64PreLegalizerCombinerHelperState {
111221
protected:
112222
CombinerHelper &Helper;

0 commit comments

Comments
 (0)