Skip to content

Commit 634b024

Browse files
authored
[llvm][arm] add T1 and T2 assembly options for vlldm and vlstm (#83116)
T1 allows for an optional registers list, the register list must be {d0-d15}. T2 defines a mandatory register list, the register list must be {d0-d31}. The requirements for T1/T2 are as follows: T1 T2 Require: v8-M.Main, v8.1-M.Main, secure state secure state 16 D Regs valid valid 32 D Regs UNDEFINED valid No D Regs NOP NOP
1 parent 2640277 commit 634b024

File tree

15 files changed

+362
-61
lines changed

15 files changed

+362
-61
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,15 +1468,21 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
14681468
if (passesFPReg)
14691469
assert(STI->hasFPRegs() && "Subtarget needs fpregs");
14701470

1471-
// Lazy store all fp registers to the stack.
1471+
// Lazy store all fp registers to the stack
14721472
// This executes as NOP in the absence of floating-point support.
1473-
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1474-
.addReg(ARM::SP)
1475-
.add(predOps(ARMCC::AL));
1476-
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
1477-
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
1478-
VLSTM.addReg(R, RegState::Implicit |
1479-
(LiveRegs.contains(R) ? 0 : RegState::Undef));
1473+
MachineInstrBuilder VLSTM =
1474+
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1475+
.addReg(ARM::SP)
1476+
.add(predOps(ARMCC::AL))
1477+
.addImm(0); // Represents a pseoudo register list, has no effect on
1478+
// the encoding.
1479+
// Mark non-live registers as undef
1480+
for (MachineOperand &MO : VLSTM->implicit_operands()) {
1481+
if (MO.isReg() && !MO.isDef()) {
1482+
Register Reg = MO.getReg();
1483+
MO.setIsUndef(!LiveRegs.contains(Reg));
1484+
}
1485+
}
14801486

14811487
// Restore all arguments
14821488
for (const auto &Regs : ClearedFPRegs) {
@@ -1563,14 +1569,20 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
15631569
.addImm(CMSE_FP_SAVE_SIZE >> 2)
15641570
.add(predOps(ARMCC::AL));
15651571

1566-
// Lazy store all FP registers to the stack
1567-
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1568-
.addReg(ARM::SP)
1569-
.add(predOps(ARMCC::AL));
1570-
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
1571-
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
1572-
VLSTM.addReg(R, RegState::Implicit |
1573-
(LiveRegs.contains(R) ? 0 : RegState::Undef));
1572+
// Lazy store all fp registers to the stack.
1573+
MachineInstrBuilder VLSTM =
1574+
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1575+
.addReg(ARM::SP)
1576+
.add(predOps(ARMCC::AL))
1577+
.addImm(0); // Represents a pseoudo register list, has no effect on
1578+
// the encoding.
1579+
// Mark non-live registers as undef
1580+
for (MachineOperand &MO : VLSTM->implicit_operands()) {
1581+
if (MO.isReg() && MO.isImplicit() && !MO.isDef()) {
1582+
Register Reg = MO.getReg();
1583+
MO.setIsUndef(!LiveRegs.contains(Reg));
1584+
}
1585+
}
15741586
} else {
15751587
// Push all the callee-saved registers (s16-s31).
15761588
MachineInstrBuilder VPUSH =
@@ -1673,9 +1685,12 @@ void ARMExpandPseudo::CMSERestoreFPRegsV8(
16731685

16741686
// Lazy load fp regs from stack.
16751687
// This executes as NOP in the absence of floating-point support.
1676-
MachineInstrBuilder VLLDM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
1677-
.addReg(ARM::SP)
1678-
.add(predOps(ARMCC::AL));
1688+
MachineInstrBuilder VLLDM =
1689+
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
1690+
.addReg(ARM::SP)
1691+
.add(predOps(ARMCC::AL))
1692+
.addImm(0); // Represents a pseoudo register list, has no effect on
1693+
// the encoding.
16791694

16801695
if (STI->fixCMSE_CVE_2021_35465()) {
16811696
auto Bundler = MIBundleBuilder(MBB, VLLDM);
@@ -1757,7 +1772,9 @@ void ARMExpandPseudo::CMSERestoreFPRegsV81(
17571772
// Load FP registers from stack.
17581773
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
17591774
.addReg(ARM::SP)
1760-
.add(predOps(ARMCC::AL));
1775+
.add(predOps(ARMCC::AL))
1776+
.addImm(0); // Represents a pseoudo register list, has no effect on the
1777+
// encoding.
17611778

17621779
// Pop the stack space
17631780
BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)

llvm/lib/Target/ARM/ARMInstrFormats.td

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,37 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
17491749
let Inst{8} = 0; // Single precision
17501750
}
17511751

1752+
// Single Precision with fixed registers.
1753+
// For when the registers-to-be-stored/loaded are fixed, e.g. VLLDM and VLSTM
1754+
class AXSI4FR<string asm, bit et, bit load>
1755+
: InstARM<AddrMode4, 4, IndexModeNone, VFPLdStMulFrm, VFPDomain, "", NoItinerary> {
1756+
// Instruction operands.
1757+
bits<4> Rn;
1758+
bits<13> regs; // Does not affect encoding, for assembly/disassembly only.
1759+
list<Predicate> Predicates = [HasVFP2];
1760+
let OutOperandList = (outs);
1761+
let InOperandList = (ins GPRnopc:$Rn, pred:$p, dpr_reglist:$regs);
1762+
let AsmString = asm;
1763+
let Pattern = [];
1764+
let DecoderNamespace = "VFP";
1765+
// Encode instruction operands.
1766+
let Inst{19-16} = Rn;
1767+
let Inst{31-28} = 0b1110;
1768+
let Inst{27-25} = 0b110;
1769+
let Inst{24} = 0b0;
1770+
let Inst{23} = 0b0;
1771+
let Inst{22} = 0b0;
1772+
let Inst{21} = 0b1;
1773+
let Inst{20} = load; // Distinguishes vlldm from vlstm
1774+
let Inst{15-12} = 0b0000;
1775+
let Inst{11-9} = 0b101;
1776+
let Inst{8} = 0; // Single precision
1777+
let Inst{7} = et; // encoding type, 0 for T1 and 1 for T2.
1778+
let Inst{6-0} = 0b0000000;
1779+
let mayLoad = load;
1780+
let mayStore = !eq(load, 0);
1781+
}
1782+
17521783
// Double precision, unary
17531784
class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
17541785
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,

llvm/lib/Target/ARM/ARMInstrVFP.td

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -313,29 +313,51 @@ def : MnemonicAlias<"vstm", "vstmia">;
313313
//===----------------------------------------------------------------------===//
314314
// Lazy load / store multiple Instructions
315315
//
316-
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
317-
NoItinerary, "vlldm${p}\t$Rn", "", []>,
316+
// VLLDM and VLSTM:
317+
// 2 encoding options:
318+
// T1 (bit 7 is 0):
319+
// T1 takes an optional dpr_reglist, must be '{d0-d15}' (exactly)
320+
// T1 require v8-M.Main, secure state, target with 16 D registers (or with no D registers - NOP)
321+
// T2 (bit 7 is 1):
322+
// T2 takes a mandatory dpr_reglist, must be '{d0-d31}' (exactly)
323+
// T2 require v8.1-M.Main, secure state, target with 16/32 D registers (or with no D registers - NOP)
324+
// (source: Arm v8-M ARM, DDI0553B.v ID16122022)
325+
326+
def VLLDM : AXSI4FR<"vlldm${p}\t$Rn, $regs", 0, 1>,
318327
Requires<[HasV8MMainline, Has8MSecExt]> {
319-
let Inst{24-23} = 0b00;
320-
let Inst{22} = 0;
321-
let Inst{21} = 1;
322-
let Inst{20} = 1;
323-
let Inst{15-12} = 0;
324-
let Inst{7-0} = 0;
325-
let mayLoad = 1;
326-
let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
327-
}
328-
329-
def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
330-
NoItinerary, "vlstm${p}\t$Rn", "", []>,
328+
let Defs = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15];
329+
let DecoderMethod = "DecodeLazyLoadStoreMul";
330+
}
331+
// T1: assembly does not contains the register list.
332+
def : InstAlias<"vlldm${p}\t$Rn", (VLLDM GPRnopc:$Rn, pred:$p, 0)>,
333+
Requires<[HasV8MMainline, Has8MSecExt]>;
334+
// T2: assembly must contains the register list.
335+
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
336+
def VLLDM_T2 : AXSI4FR<"vlldm${p}\t$Rn, $regs", 1, 1>,
337+
Requires<[HasV8_1MMainline, Has8MSecExt]> {
338+
let Defs = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
339+
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31];
340+
let DecoderMethod = "DecodeLazyLoadStoreMul";
341+
}
342+
// T1: assembly contains the register list.
343+
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
344+
def VLSTM : AXSI4FR<"vlstm${p}\t$Rn, $regs", 0, 0>,
331345
Requires<[HasV8MMainline, Has8MSecExt]> {
332-
let Inst{24-23} = 0b00;
333-
let Inst{22} = 0;
334-
let Inst{21} = 1;
335-
let Inst{20} = 0;
336-
let Inst{15-12} = 0;
337-
let Inst{7-0} = 0;
338-
let mayStore = 1;
346+
let Defs = [VPR, FPSCR, FPSCR_NZCV];
347+
let Uses = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15];
348+
let DecoderMethod = "DecodeLazyLoadStoreMul";
349+
}
350+
// T1: assembly does not contain the register list.
351+
def : InstAlias<"vlstm${p}\t$Rn", (VLSTM GPRnopc:$Rn, pred:$p, 0)>,
352+
Requires<[HasV8MMainline, Has8MSecExt]>;
353+
// T2: assembly must contain the register list.
354+
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
355+
def VLSTM_T2 : AXSI4FR<"vlstm${p}\t$Rn, $regs", 1, 0>,
356+
Requires<[HasV8_1MMainline, Has8MSecExt]> {
357+
let Defs = [VPR, FPSCR, FPSCR_NZCV];
358+
let Uses = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
359+
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31];
360+
let DecoderMethod = "DecodeLazyLoadStoreMul";
339361
}
340362

341363
def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,

llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -450,11 +450,12 @@ class ARMAsmParser : public MCTargetAsmParser {
450450
bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,
451451
unsigned ListNo);
452452

453-
int tryParseRegister();
453+
int tryParseRegister(bool AllowOutofBoundReg = false);
454454
bool tryParseRegisterWithWriteBack(OperandVector &);
455455
int tryParseShiftRegister(OperandVector &);
456456
bool parseRegisterList(OperandVector &, bool EnforceOrder = true,
457-
bool AllowRAAC = false);
457+
bool AllowRAAC = false,
458+
bool AllowOutOfBoundReg = false);
458459
bool parseMemory(OperandVector &);
459460
bool parseOperand(OperandVector &, StringRef Mnemonic);
460461
bool parseImmExpr(int64_t &Out);
@@ -4072,7 +4073,7 @@ ParseStatus ARMAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
40724073
/// Try to parse a register name. The token must be an Identifier when called,
40734074
/// and if it is a register name the token is eaten and the register number is
40744075
/// returned. Otherwise return -1.
4075-
int ARMAsmParser::tryParseRegister() {
4076+
int ARMAsmParser::tryParseRegister(bool AllowOutOfBoundReg) {
40764077
MCAsmParser &Parser = getParser();
40774078
const AsmToken &Tok = Parser.getTok();
40784079
if (Tok.isNot(AsmToken::Identifier)) return -1;
@@ -4116,7 +4117,8 @@ int ARMAsmParser::tryParseRegister() {
41164117
}
41174118

41184119
// Some FPUs only have 16 D registers, so D16-D31 are invalid
4119-
if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
4120+
if (!AllowOutOfBoundReg && !hasD32() && RegNum >= ARM::D16 &&
4121+
RegNum <= ARM::D31)
41204122
return -1;
41214123

41224124
Parser.Lex(); // Eat identifier token.
@@ -4456,7 +4458,7 @@ insertNoDuplicates(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
44564458

44574459
/// Parse a register list.
44584460
bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
4459-
bool AllowRAAC) {
4461+
bool AllowRAAC, bool AllowOutOfBoundReg) {
44604462
MCAsmParser &Parser = getParser();
44614463
if (Parser.getTok().isNot(AsmToken::LCurly))
44624464
return TokError("Token is not a Left Curly Brace");
@@ -4510,7 +4512,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
45104512
return Error(RegLoc, "pseudo-register not allowed");
45114513
Parser.Lex(); // Eat the minus.
45124514
SMLoc AfterMinusLoc = Parser.getTok().getLoc();
4513-
int EndReg = tryParseRegister();
4515+
int EndReg = tryParseRegister(AllowOutOfBoundReg);
45144516
if (EndReg == -1)
45154517
return Error(AfterMinusLoc, "register expected");
45164518
if (EndReg == ARM::RA_AUTH_CODE)
@@ -4545,7 +4547,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
45454547
RegLoc = Parser.getTok().getLoc();
45464548
int OldReg = Reg;
45474549
const AsmToken RegTok = Parser.getTok();
4548-
Reg = tryParseRegister();
4550+
Reg = tryParseRegister(AllowOutOfBoundReg);
45494551
if (Reg == -1)
45504552
return Error(RegLoc, "register expected");
45514553
if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE)
@@ -6085,8 +6087,11 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
60856087
}
60866088
case AsmToken::LBrac:
60876089
return parseMemory(Operands);
6088-
case AsmToken::LCurly:
6089-
return parseRegisterList(Operands, !Mnemonic.starts_with("clr"));
6090+
case AsmToken::LCurly: {
6091+
bool AllowOutOfBoundReg = Mnemonic == "vlldm" || Mnemonic == "vlstm";
6092+
return parseRegisterList(Operands, !Mnemonic.starts_with("clr"), false,
6093+
AllowOutOfBoundReg);
6094+
}
60906095
case AsmToken::Dollar:
60916096
case AsmToken::Hash: {
60926097
// #42 -> immediate
@@ -7596,6 +7601,33 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
75967601

75977602
const unsigned Opcode = Inst.getOpcode();
75987603
switch (Opcode) {
7604+
case ARM::VLLDM:
7605+
case ARM::VLLDM_T2:
7606+
case ARM::VLSTM:
7607+
case ARM::VLSTM_T2: {
7608+
// Since in some cases both T1 and T2 are valid, tablegen can not always
7609+
// pick the correct instruction.
7610+
if (Operands.size() == 4) { // a register list has been provided
7611+
ARMOperand &Op = static_cast<ARMOperand &>(
7612+
*Operands[3]); // the register list, a dpr_reglist
7613+
assert(Op.isDPRRegList());
7614+
auto &RegList = Op.getRegList();
7615+
// T2 requires v8.1-M.Main (cannot be handled by tablegen)
7616+
if (RegList.size() == 32 && !hasV8_1MMainline()) {
7617+
return Error(Op.getEndLoc(), "T2 version requires v8.1-M.Main");
7618+
}
7619+
// When target has 32 D registers, T1 is undefined.
7620+
if (hasD32() && RegList.size() != 32) {
7621+
return Error(Op.getEndLoc(), "operand must be exactly {d0-d31}");
7622+
}
7623+
// When target has 16 D registers, both T1 and T2 are valid.
7624+
if (!hasD32() && (RegList.size() != 16 && RegList.size() != 32)) {
7625+
return Error(Op.getEndLoc(),
7626+
"operand must be exactly {d0-d15} (T1) or {d0-d31} (T2)");
7627+
}
7628+
}
7629+
return false;
7630+
}
75997631
case ARM::t2IT: {
76007632
// Encoding is unpredictable if it ever results in a notional 'NV'
76017633
// predicate. Since we don't parse 'NV' directly this means an 'AL'
@@ -8731,6 +8763,32 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
87318763
}
87328764

87338765
switch (Inst.getOpcode()) {
8766+
case ARM::VLLDM:
8767+
case ARM::VLSTM: {
8768+
// In some cases both T1 and T2 are valid, causing tablegen pick T1 instead
8769+
// of T2
8770+
if (Operands.size() == 4) { // a register list has been provided
8771+
ARMOperand &Op = static_cast<ARMOperand &>(
8772+
*Operands[3]); // the register list, a dpr_reglist
8773+
assert(Op.isDPRRegList());
8774+
auto &RegList = Op.getRegList();
8775+
// When the register list is {d0-d31} the instruction has to be the T2
8776+
// variant
8777+
if (RegList.size() == 32) {
8778+
const unsigned Opcode =
8779+
(Inst.getOpcode() == ARM::VLLDM) ? ARM::VLLDM_T2 : ARM::VLSTM_T2;
8780+
MCInst TmpInst;
8781+
TmpInst.setOpcode(Opcode);
8782+
TmpInst.addOperand(Inst.getOperand(0));
8783+
TmpInst.addOperand(Inst.getOperand(1));
8784+
TmpInst.addOperand(Inst.getOperand(2));
8785+
TmpInst.addOperand(Inst.getOperand(3));
8786+
Inst = TmpInst;
8787+
return true;
8788+
}
8789+
}
8790+
return false;
8791+
}
87348792
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
87358793
case ARM::LDRT_POST:
87368794
case ARM::LDRBT_POST: {

llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,9 @@ DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
700700
static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
701701
uint64_t Address,
702702
const MCDisassembler *Decoder);
703+
static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn,
704+
uint64_t Address,
705+
const MCDisassembler *Decoder);
703706

704707
#include "ARMGenDisassemblerTables.inc"
705708

@@ -7030,3 +7033,23 @@ static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
70307033

70317034
return DS;
70327035
}
7036+
7037+
static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn,
7038+
uint64_t Address,
7039+
const MCDisassembler *Decoder) {
7040+
DecodeStatus S = MCDisassembler::Success;
7041+
7042+
const unsigned Rn = fieldFromInstruction(Insn, 16, 4);
7043+
// Adding Rn, holding memory location to save/load to/from, the only argument
7044+
// that is being encoded.
7045+
// '$Rn' in the assembly.
7046+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
7047+
return MCDisassembler::Fail;
7048+
// An optional predicate, '$p' in the assembly.
7049+
DecodePredicateOperand(Inst, ARMCC::AL, Address, Decoder);
7050+
// An immediate that represents a floating point registers list. '$regs' in
7051+
// the assembly.
7052+
Inst.addOperand(MCOperand::createImm(0)); // Arbitrary value, has no effect.
7053+
7054+
return S;
7055+
}

0 commit comments

Comments
 (0)