Skip to content

Commit 44bd807

Browse files
author
Krzysztof Parzyszek
committed
[Hexagon] Adjust handling of stack with variable-size and extra alignment
Make the stack alignment register (AP) reserved in the given function. This will make it available everywhere in the function, and allow aligned access to vector register spill slots.
1 parent 13ea134 commit 44bd807

File tree

8 files changed

+70
-107
lines changed

8 files changed

+70
-107
lines changed

llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp

Lines changed: 35 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,7 +1275,7 @@ HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
12751275
unsigned FrameSize = MFI.getStackSize();
12761276
Register SP = HRI.getStackRegister();
12771277
Register FP = HRI.getFrameRegister();
1278-
Register AP = HMFI.getStackAlignBasePhysReg();
1278+
Register AP = HMFI.getStackAlignBaseReg();
12791279
// It may happen that AP will be absent even HasAlloca && HasExtraAlign
12801280
// is true. HasExtraAlign may be set because of vector spills, without
12811281
// aligned locals or aligned outgoing function arguments. Since vector
@@ -1290,9 +1290,6 @@ HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
12901290
// assume that missing AP will be replaced by FP.
12911291
// (A better fix would be to rematerialize AP from FP and always align
12921292
// vector spills.)
1293-
if (AP == 0)
1294-
AP = FP;
1295-
12961293
bool UseFP = false, UseAP = false; // Default: use SP (except at -O0).
12971294
// Use FP at -O0, except when there are objects with extra alignment.
12981295
// That additional alignment requirement may cause a pad to be inserted,
@@ -1517,65 +1514,13 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
15171514
if (!HasAlloca || !NeedsAlign)
15181515
return;
15191516

1520-
SmallSet<int, 4> DealignSlots;
1521-
unsigned LFS = MFI.getLocalFrameSize();
1522-
for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
1523-
if (!MFI.isSpillSlotObjectIndex(i) || MFI.isDeadObjectIndex(i))
1524-
continue;
1525-
unsigned S = MFI.getObjectSize(i);
1526-
// Reduce the alignment to at most 8. This will require unaligned vector
1527-
// stores if they happen here.
1528-
Align A = std::max(MFI.getObjectAlign(i), Align(8));
1529-
MFI.setObjectAlignment(i, Align(8));
1530-
LFS = alignTo(LFS+S, A);
1531-
MFI.mapLocalFrameObject(i, -static_cast<int64_t>(LFS));
1532-
DealignSlots.insert(i);
1533-
}
1534-
1535-
MFI.setLocalFrameSize(LFS);
1536-
Align A = MFI.getLocalFrameMaxAlign();
1537-
assert(A <= 8 && "Unexpected local frame alignment");
1538-
if (A == 1)
1539-
MFI.setLocalFrameMaxAlign(Align(8));
1540-
MFI.setUseLocalStackAllocationBlock(true);
1541-
1542-
// Go over all MachineMemOperands in the code, and change the ones that
1543-
// refer to the dealigned stack slots to reflect the new alignment.
1544-
if (!DealignSlots.empty()) {
1545-
for (MachineBasicBlock &BB : MF) {
1546-
for (MachineInstr &MI : BB) {
1547-
bool KeepOld = true;
1548-
ArrayRef<MachineMemOperand*> memops = MI.memoperands();
1549-
SmallVector<MachineMemOperand*,1> new_memops;
1550-
for (MachineMemOperand *MMO : memops) {
1551-
auto *PV = MMO->getPseudoValue();
1552-
if (auto *FS = dyn_cast_or_null<FixedStackPseudoSourceValue>(PV)) {
1553-
int FI = FS->getFrameIndex();
1554-
if (DealignSlots.count(FI)) {
1555-
auto *NewMMO = MF.getMachineMemOperand(
1556-
MMO->getPointerInfo(), MMO->getFlags(), MMO->getSize(),
1557-
MFI.getObjectAlign(FI), MMO->getAAInfo(), MMO->getRanges(),
1558-
MMO->getSyncScopeID(), MMO->getSuccessOrdering(),
1559-
MMO->getFailureOrdering());
1560-
new_memops.push_back(NewMMO);
1561-
KeepOld = false;
1562-
continue;
1563-
}
1564-
}
1565-
new_memops.push_back(MMO);
1566-
}
1567-
if (!KeepOld)
1568-
MI.setMemRefs(MF, new_memops);
1569-
}
1570-
}
1571-
}
1572-
15731517
// Set the physical aligned-stack base address register.
15741518
Register AP = 0;
15751519
if (const MachineInstr *AI = getAlignaInstr(MF))
15761520
AP = AI->getOperand(0).getReg();
15771521
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1578-
HMFI.setStackAlignBasePhysReg(AP);
1522+
assert(!AP.isValid() || AP.isPhysical());
1523+
HMFI.setStackAlignBaseReg(AP);
15791524
}
15801525

15811526
/// Returns true if there are no caller-saved registers available in class RC.
@@ -1637,6 +1582,26 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
16371582
// (2) For each reserved register, remove that register and all of its
16381583
// sub- and super-registers from SRegs.
16391584
BitVector Reserved = TRI->getReservedRegs(MF);
1585+
// Unreserve the stack align register: it is reserved for this function
1586+
// only, it still needs to be saved/restored.
1587+
Register AP =
1588+
MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseReg();
1589+
if (AP.isValid()) {
1590+
Reserved[AP] = false;
1591+
// Unreserve super-regs if no other subregisters are reserved.
1592+
for (MCSuperRegIterator SP(AP, TRI, false); SP.isValid(); ++SP) {
1593+
bool HasResSub = false;
1594+
for (MCSubRegIterator SB(*SP, TRI, false); SB.isValid(); ++SB) {
1595+
if (!Reserved[*SB])
1596+
continue;
1597+
HasResSub = true;
1598+
break;
1599+
}
1600+
if (!HasResSub)
1601+
Reserved[*SP] = false;
1602+
}
1603+
}
1604+
16401605
for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
16411606
Register R = x;
16421607
for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
@@ -1936,21 +1901,16 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
19361901
Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
19371902
bool IsKill = MI->getOperand(2).isKill();
19381903
int FI = MI->getOperand(0).getIndex();
1939-
bool NeedsAligna = needsAligna(MF);
19401904

19411905
unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
19421906
Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
19431907
Align HasAlign = MFI.getObjectAlign(FI);
19441908
unsigned StoreOpc;
19451909

1946-
auto UseAligned = [&](Align NeedAlign, Align HasAlign) {
1947-
return !NeedsAligna && (NeedAlign <= HasAlign);
1948-
};
1949-
19501910
// Store low part.
19511911
if (LPR.contains(SrcLo)) {
1952-
StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai
1953-
: Hexagon::V6_vS32Ub_ai;
1912+
StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1913+
: Hexagon::V6_vS32Ub_ai;
19541914
BuildMI(B, It, DL, HII.get(StoreOpc))
19551915
.addFrameIndex(FI)
19561916
.addImm(0)
@@ -1960,8 +1920,8 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
19601920

19611921
// Store high part.
19621922
if (LPR.contains(SrcHi)) {
1963-
StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai
1964-
: Hexagon::V6_vS32Ub_ai;
1923+
StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1924+
: Hexagon::V6_vS32Ub_ai;
19651925
BuildMI(B, It, DL, HII.get(StoreOpc))
19661926
.addFrameIndex(FI)
19671927
.addImm(Size)
@@ -1988,28 +1948,23 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
19881948
Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
19891949
Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
19901950
int FI = MI->getOperand(1).getIndex();
1991-
bool NeedsAligna = needsAligna(MF);
19921951

19931952
unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
19941953
Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
19951954
Align HasAlign = MFI.getObjectAlign(FI);
19961955
unsigned LoadOpc;
19971956

1998-
auto UseAligned = [&](Align NeedAlign, Align HasAlign) {
1999-
return !NeedsAligna && (NeedAlign <= HasAlign);
2000-
};
2001-
20021957
// Load low part.
2003-
LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai
2004-
: Hexagon::V6_vL32Ub_ai;
1958+
LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1959+
: Hexagon::V6_vL32Ub_ai;
20051960
BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
20061961
.addFrameIndex(FI)
20071962
.addImm(0)
20081963
.cloneMemRefs(*MI);
20091964

20101965
// Load high part.
2011-
LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai
2012-
: Hexagon::V6_vL32Ub_ai;
1966+
LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1967+
: Hexagon::V6_vL32Ub_ai;
20131968
BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
20141969
.addFrameIndex(FI)
20151970
.addImm(Size)
@@ -2028,7 +1983,6 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
20281983
if (!MI->getOperand(0).isFI())
20291984
return false;
20301985

2031-
bool NeedsAligna = needsAligna(MF);
20321986
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
20331987
DebugLoc DL = MI->getDebugLoc();
20341988
Register SrcR = MI->getOperand(2).getReg();
@@ -2037,9 +1991,8 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
20371991

20381992
Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
20391993
Align HasAlign = MFI.getObjectAlign(FI);
2040-
bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
2041-
unsigned StoreOpc = UseAligned ? Hexagon::V6_vS32b_ai
2042-
: Hexagon::V6_vS32Ub_ai;
1994+
unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1995+
: Hexagon::V6_vS32Ub_ai;
20431996
BuildMI(B, It, DL, HII.get(StoreOpc))
20441997
.addFrameIndex(FI)
20451998
.addImm(0)
@@ -2059,17 +2012,15 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
20592012
if (!MI->getOperand(1).isFI())
20602013
return false;
20612014

2062-
bool NeedsAligna = needsAligna(MF);
20632015
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
20642016
DebugLoc DL = MI->getDebugLoc();
20652017
Register DstR = MI->getOperand(0).getReg();
20662018
int FI = MI->getOperand(1).getIndex();
20672019

20682020
Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
20692021
Align HasAlign = MFI.getObjectAlign(FI);
2070-
bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
2071-
unsigned LoadOpc = UseAligned ? Hexagon::V6_vL32b_ai
2072-
: Hexagon::V6_vL32Ub_ai;
2022+
unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
2023+
: Hexagon::V6_vL32Ub_ai;
20732024
BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
20742025
.addFrameIndex(FI)
20752026
.addImm(0)

llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
749749
R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
750750
} else {
751751
auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
752-
unsigned AR = HMFI.getStackAlignBaseVReg();
752+
Register AR = HMFI.getStackAlignBaseReg();
753753
SDValue CH = CurDAG->getEntryNode();
754754
SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
755755
R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
@@ -1285,11 +1285,22 @@ void HexagonDAGToDAGISel::emitFunctionEntryCode() {
12851285

12861286
MachineFrameInfo &MFI = MF->getFrameInfo();
12871287
MachineBasicBlock *EntryBB = &MF->front();
1288-
Register AR = FuncInfo->CreateReg(MVT::i32);
12891288
Align EntryMaxA = MFI.getMaxAlign();
1290-
BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
1289+
1290+
// Reserve the first non-volatile register.
1291+
Register AP = 0;
1292+
auto &HRI = *HST.getRegisterInfo();
1293+
BitVector Reserved = HRI.getReservedRegs(*MF);
1294+
for (const MCPhysReg *R = HRI.getCalleeSavedRegs(MF); *R; ++R) {
1295+
if (Reserved[*R])
1296+
continue;
1297+
AP = *R;
1298+
break;
1299+
}
1300+
assert(AP.isValid() && "Couldn't reserve stack align register");
1301+
BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AP)
12911302
.addImm(EntryMaxA.value());
1292-
MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
1303+
MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseReg(AP);
12931304
}
12941305

12951306
void HexagonDAGToDAGISel::updateAligna() {

llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
2727
// returning the value of the returned struct in a register. This field
2828
// holds the virtual register into which the sret argument is passed.
2929
unsigned SRetReturnReg = 0;
30-
unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual)
31-
unsigned StackAlignBasePhysReg = 0; // (physical)
30+
Register StackAlignBaseReg = 0; // Aligned-stack base register
3231
int VarArgsFrameIndex;
3332
int RegSavedAreaStartFrameIndex;
3433
int FirstNamedArgFrameIndex;
@@ -82,11 +81,8 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
8281
bool hasEHReturn() const { return HasEHReturn; };
8382
void setHasEHReturn(bool H = true) { HasEHReturn = H; };
8483

85-
void setStackAlignBaseVReg(unsigned R) { StackAlignBaseVReg = R; }
86-
unsigned getStackAlignBaseVReg() const { return StackAlignBaseVReg; }
87-
88-
void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; }
89-
unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; }
84+
void setStackAlignBaseReg(Register R) { StackAlignBaseReg = R; }
85+
Register getStackAlignBaseReg() const { return StackAlignBaseReg; }
9086
};
9187

9288
} // end namespace llvm

llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,11 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
194194
if (MF.getSubtarget<HexagonSubtarget>().hasReservedR19())
195195
Reserved.set(Hexagon::R19);
196196

197+
Register AP =
198+
MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseReg();
199+
if (AP.isValid())
200+
Reserved.set(AP);
201+
197202
for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x))
198203
markSuperRegs(Reserved, x);
199204

llvm/lib/Target/Hexagon/HexagonVExtract.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
105105
MachineRegisterInfo &MRI = MF.getRegInfo();
106106
MachineFrameInfo &MFI = MF.getFrameInfo();
107107
Register AR =
108-
MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseVReg();
108+
MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseReg();
109109
std::map<unsigned, SmallVector<MachineInstr *, 4>> VExtractMap;
110110
bool Changed = false;
111111

llvm/test/CodeGen/Hexagon/spill-vector-alignment.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# RUN: llc -march=hexagon -run-pass prologepilog %s -o - | FileCheck %s
22

3-
# Check that the spill of $q0 uses unaligned store instruction.
4-
# CHECK: V6_vS32Ub_ai $r30, -128, killed $v0
3+
# Check that the spill of $q0 no longer uses unaligned store instruction.
4+
# CHECK: V6_vS32b_ai $r16, -256, killed $v0
55

66
---
77
name: test
@@ -12,5 +12,6 @@ stack:
1212
body: |
1313
bb.0:
1414
liveins: $q0
15+
$r16 = PS_aligna 128, implicit $r30
1516
PS_vstorerq_ai %stack.1, 0, $q0
1617
...

llvm/test/CodeGen/Hexagon/stack-alloca2.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; RUN: llc -O0 -march=hexagon < %s | FileCheck %s
2-
; CHECK-DAG: r[[AP:[0-9]+]] = and(r30,#-32)
3-
; CHECK-DAG: r1 = add(r[[AP]],#-32)
4-
5-
; CHECK-DAG: sub(r29,r[[SP:[0-9]+]])
6-
; CHECK-DAG: r29 = r[[SP]]
2+
; CHECK: r[[AP:[0-9]+]] = and(r30,#-32)
3+
; CHECK: sub(r29,r[[SP:[0-9]+]])
4+
; CHECK: r29 = r[[SP]]
5+
; CHECK: r1 = r[[AP]]
6+
; CHECK: r1 = add(r1,#-32)
77

88
target triple = "hexagon-unknown-unknown"
99

llvm/test/CodeGen/Hexagon/v6-unaligned-spill.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
; RUN: llc -march=hexagon < %s | FileCheck %s
22

3-
; Test that we generate an unaligned vector store for a spill when a function
4-
; has an alloca. Also, make sure the addressing mode for unaligned store does
5-
; is not a base+offset with a non-zero offset that is not a multiple of 128.
3+
; Test that we no longer generate an unaligned vector store for a spill when
4+
; a function has an alloca.
65

7-
; CHECK: vmemu({{.*}}) =
6+
; CHECK: vmem({{.*}}) =
87

98
%s.0 = type { [5 x [4 x i8]], i32, i32, i32, i32 }
109

0 commit comments

Comments
 (0)