Skip to content

[llvm][arm] add T1 and T2 assembly options for vlldm and vlstm #83116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 37 additions & 20 deletions llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1468,15 +1468,21 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
if (passesFPReg)
assert(STI->hasFPRegs() && "Subtarget needs fpregs");

// Lazy store all fp registers to the stack.
// Lazy store all fp registers to the stack
// This executes as NOP in the absence of floating-point support.
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
VLSTM.addReg(R, RegState::Implicit |
(LiveRegs.contains(R) ? 0 : RegState::Undef));
MachineInstrBuilder VLSTM =
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on
// the encoding.
// Mark non-live registers as undef
for (MachineOperand &MO : VLSTM->implicit_operands()) {
if (MO.isReg() && !MO.isDef()) {
Register Reg = MO.getReg();
MO.setIsUndef(!LiveRegs.contains(Reg));
}
}

// Restore all arguments
for (const auto &Regs : ClearedFPRegs) {
Expand Down Expand Up @@ -1563,14 +1569,20 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
.addImm(CMSE_FP_SAVE_SIZE >> 2)
.add(predOps(ARMCC::AL));

// Lazy store all FP registers to the stack
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
VLSTM.addReg(R, RegState::Implicit |
(LiveRegs.contains(R) ? 0 : RegState::Undef));
// Lazy store all fp registers to the stack.
MachineInstrBuilder VLSTM =
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on
// the encoding.
// Mark non-live registers as undef
for (MachineOperand &MO : VLSTM->implicit_operands()) {
if (MO.isReg() && MO.isImplicit() && !MO.isDef()) {
Register Reg = MO.getReg();
MO.setIsUndef(!LiveRegs.contains(Reg));
}
}
} else {
// Push all the callee-saved registers (s16-s31).
MachineInstrBuilder VPUSH =
Expand Down Expand Up @@ -1673,9 +1685,12 @@ void ARMExpandPseudo::CMSERestoreFPRegsV8(

// Lazy load fp regs from stack.
// This executes as NOP in the absence of floating-point support.
MachineInstrBuilder VLLDM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
MachineInstrBuilder VLLDM =
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on
// the encoding.

if (STI->fixCMSE_CVE_2021_35465()) {
auto Bundler = MIBundleBuilder(MBB, VLLDM);
Expand Down Expand Up @@ -1757,7 +1772,9 @@ void ARMExpandPseudo::CMSERestoreFPRegsV81(
// Load FP registers from stack.
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
.add(predOps(ARMCC::AL))
.addImm(0); // Represents a pseoudo register list, has no effect on the
// encoding.

// Pop the stack space
BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
Expand Down
31 changes: 31 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,37 @@ class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let Inst{8} = 0; // Single precision
}

// Single Precision with fixed registers.
// For when the registers-to-be-stored/loaded are fixed, e.g. VLLDM and VLSTM
class AXSI4FR<string asm, bit et, bit load>
: InstARM<AddrMode4, 4, IndexModeNone, VFPLdStMulFrm, VFPDomain, "", NoItinerary> {
// Instruction operands.
bits<4> Rn;
bits<13> regs; // Does not affect encoding, for assembly/disassembly only.
list<Predicate> Predicates = [HasVFP2];
let OutOperandList = (outs);
let InOperandList = (ins GPRnopc:$Rn, pred:$p, dpr_reglist:$regs);
let AsmString = asm;
let Pattern = [];
let DecoderNamespace = "VFP";
// Encode instruction operands.
let Inst{19-16} = Rn;
let Inst{31-28} = 0b1110;
let Inst{27-25} = 0b110;
let Inst{24} = 0b0;
let Inst{23} = 0b0;
let Inst{22} = 0b0;
let Inst{21} = 0b1;
let Inst{20} = load; // Distinguishes vlldm from vlstm
let Inst{15-12} = 0b0000;
let Inst{11-9} = 0b101;
let Inst{8} = 0; // Single precision
let Inst{7} = et; // encoding type, 0 for T1 and 1 for T2.
let Inst{6-0} = 0b0000000;
let mayLoad = load;
let mayStore = !eq(load, 0);
}

// Double precision, unary
class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
Expand Down
64 changes: 43 additions & 21 deletions llvm/lib/Target/ARM/ARMInstrVFP.td
Original file line number Diff line number Diff line change
Expand Up @@ -313,29 +313,51 @@ def : MnemonicAlias<"vstm", "vstmia">;
//===----------------------------------------------------------------------===//
// Lazy load / store multiple Instructions
//
def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlldm${p}\t$Rn", "", []>,
// VLLDM and VLSTM:
// 2 encoding options:
// T1 (bit 7 is 0):
// T1 takes an optional dpr_reglist, must be '{d0-d15}' (exactly)
// T1 require v8-M.Main, secure state, target with 16 D registers (or with no D registers - NOP)
// T2 (bit 7 is 1):
// T2 takes a mandatory dpr_reglist, must be '{d0-d31}' (exactly)
// T2 require v8.1-M.Main, secure state, target with 16/32 D registers (or with no D registers - NOP)
// (source: Arm v8-M ARM, DDI0553B.v ID16122022)

def VLLDM : AXSI4FR<"vlldm${p}\t$Rn, $regs", 0, 1>,
Requires<[HasV8MMainline, Has8MSecExt]> {
let Inst{24-23} = 0b00;
let Inst{22} = 0;
let Inst{21} = 1;
let Inst{20} = 1;
let Inst{15-12} = 0;
let Inst{7-0} = 0;
let mayLoad = 1;
let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV];
}

def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
NoItinerary, "vlstm${p}\t$Rn", "", []>,
let Defs = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}
// T1: assembly does not contains the register list.
def : InstAlias<"vlldm${p}\t$Rn", (VLLDM GPRnopc:$Rn, pred:$p, 0)>,
Requires<[HasV8MMainline, Has8MSecExt]>;
// T2: assembly must contains the register list.
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
def VLLDM_T2 : AXSI4FR<"vlldm${p}\t$Rn, $regs", 1, 1>,
Requires<[HasV8_1MMainline, Has8MSecExt]> {
let Defs = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}
// T1: assembly contains the register list.
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
def VLSTM : AXSI4FR<"vlstm${p}\t$Rn, $regs", 0, 0>,
Requires<[HasV8MMainline, Has8MSecExt]> {
let Inst{24-23} = 0b00;
let Inst{22} = 0;
let Inst{21} = 1;
let Inst{20} = 0;
let Inst{15-12} = 0;
let Inst{7-0} = 0;
let mayStore = 1;
let Defs = [VPR, FPSCR, FPSCR_NZCV];
let Uses = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}
// T1: assembly does not contain the register list.
def : InstAlias<"vlstm${p}\t$Rn", (VLSTM GPRnopc:$Rn, pred:$p, 0)>,
Requires<[HasV8MMainline, Has8MSecExt]>;
// T2: assembly must contain the register list.
// The register list has no effect on the encoding, it is for assembly/disassembly purposes only.
def VLSTM_T2 : AXSI4FR<"vlstm${p}\t$Rn, $regs", 1, 0>,
Requires<[HasV8_1MMainline, Has8MSecExt]> {
let Defs = [VPR, FPSCR, FPSCR_NZCV];
let Uses = [VPR, FPSCR, FPSCR_NZCV, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31];
let DecoderMethod = "DecodeLazyLoadStoreMul";
}

def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,
Expand Down
76 changes: 67 additions & 9 deletions llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,12 @@ class ARMAsmParser : public MCTargetAsmParser {
bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,
unsigned ListNo);

int tryParseRegister();
int tryParseRegister(bool AllowOutofBoundReg = false);
bool tryParseRegisterWithWriteBack(OperandVector &);
int tryParseShiftRegister(OperandVector &);
bool parseRegisterList(OperandVector &, bool EnforceOrder = true,
bool AllowRAAC = false);
bool AllowRAAC = false,
bool AllowOutOfBoundReg = false);
bool parseMemory(OperandVector &);
bool parseOperand(OperandVector &, StringRef Mnemonic);
bool parseImmExpr(int64_t &Out);
Expand Down Expand Up @@ -4072,7 +4073,7 @@ ParseStatus ARMAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
/// Try to parse a register name. The token must be an Identifier when called,
/// and if it is a register name the token is eaten and the register number is
/// returned. Otherwise return -1.
int ARMAsmParser::tryParseRegister() {
int ARMAsmParser::tryParseRegister(bool AllowOutOfBoundReg) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) return -1;
Expand Down Expand Up @@ -4116,7 +4117,8 @@ int ARMAsmParser::tryParseRegister() {
}

// Some FPUs only have 16 D registers, so D16-D31 are invalid
if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
if (!AllowOutOfBoundReg && !hasD32() && RegNum >= ARM::D16 &&
RegNum <= ARM::D31)
return -1;

Parser.Lex(); // Eat identifier token.
Expand Down Expand Up @@ -4456,7 +4458,7 @@ insertNoDuplicates(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,

/// Parse a register list.
bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
bool AllowRAAC) {
bool AllowRAAC, bool AllowOutOfBoundReg) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::LCurly))
return TokError("Token is not a Left Curly Brace");
Expand Down Expand Up @@ -4510,7 +4512,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
return Error(RegLoc, "pseudo-register not allowed");
Parser.Lex(); // Eat the minus.
SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
int EndReg = tryParseRegister(AllowOutOfBoundReg);
if (EndReg == -1)
return Error(AfterMinusLoc, "register expected");
if (EndReg == ARM::RA_AUTH_CODE)
Expand Down Expand Up @@ -4545,7 +4547,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
const AsmToken RegTok = Parser.getTok();
Reg = tryParseRegister();
Reg = tryParseRegister(AllowOutOfBoundReg);
if (Reg == -1)
return Error(RegLoc, "register expected");
if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE)
Expand Down Expand Up @@ -6085,8 +6087,11 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
case AsmToken::LBrac:
return parseMemory(Operands);
case AsmToken::LCurly:
return parseRegisterList(Operands, !Mnemonic.starts_with("clr"));
case AsmToken::LCurly: {
bool AllowOutOfBoundReg = Mnemonic == "vlldm" || Mnemonic == "vlstm";
return parseRegisterList(Operands, !Mnemonic.starts_with("clr"), false,
AllowOutOfBoundReg);
}
case AsmToken::Dollar:
case AsmToken::Hash: {
// #42 -> immediate
Expand Down Expand Up @@ -7596,6 +7601,33 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,

const unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
case ARM::VLLDM:
case ARM::VLLDM_T2:
case ARM::VLSTM:
case ARM::VLSTM_T2: {
// Since in some cases both T1 and T2 are valid, tablegen can not always
// pick the correct instruction.
if (Operands.size() == 4) { // a register list has been provided
ARMOperand &Op = static_cast<ARMOperand &>(
*Operands[3]); // the register list, a dpr_reglist
assert(Op.isDPRRegList());
auto &RegList = Op.getRegList();
// T2 requires v8.1-M.Main (cannot be handled by tablegen)
if (RegList.size() == 32 && !hasV8_1MMainline()) {
return Error(Op.getEndLoc(), "T2 version requires v8.1-M.Main");
}
// When target has 32 D registers, T1 is undefined.
if (hasD32() && RegList.size() != 32) {
return Error(Op.getEndLoc(), "operand must be exactly {d0-d31}");
}
// When target has 16 D registers, both T1 and T2 are valid.
if (!hasD32() && (RegList.size() != 16 && RegList.size() != 32)) {
return Error(Op.getEndLoc(),
"operand must be exactly {d0-d15} (T1) or {d0-d31} (T2)");
}
}
return false;
}
case ARM::t2IT: {
// Encoding is unpredictable if it ever results in a notional 'NV'
// predicate. Since we don't parse 'NV' directly this means an 'AL'
Expand Down Expand Up @@ -8731,6 +8763,32 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}

switch (Inst.getOpcode()) {
case ARM::VLLDM:
case ARM::VLSTM: {
// In some cases both T1 and T2 are valid, causing tablegen pick T1 instead
// of T2
if (Operands.size() == 4) { // a register list has been provided
ARMOperand &Op = static_cast<ARMOperand &>(
*Operands[3]); // the register list, a dpr_reglist
assert(Op.isDPRRegList());
auto &RegList = Op.getRegList();
// When the register list is {d0-d31} the instruction has to be the T2
// variant
if (RegList.size() == 32) {
const unsigned Opcode =
(Inst.getOpcode() == ARM::VLLDM) ? ARM::VLLDM_T2 : ARM::VLSTM_T2;
MCInst TmpInst;
TmpInst.setOpcode(Opcode);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(Inst.getOperand(2));
TmpInst.addOperand(Inst.getOperand(3));
Inst = TmpInst;
return true;
}
}
return false;
}
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
case ARM::LDRBT_POST: {
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,9 @@ DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address,
static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder);
static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder);

#include "ARMGenDisassemblerTables.inc"

Expand Down Expand Up @@ -7030,3 +7033,23 @@ static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn,

return DS;
}

static DecodeStatus DecodeLazyLoadStoreMul(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
DecodeStatus S = MCDisassembler::Success;

const unsigned Rn = fieldFromInstruction(Insn, 16, 4);
// Adding Rn, holding memory location to save/load to/from, the only argument
// that is being encoded.
// '$Rn' in the assembly.
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
// An optional predicate, '$p' in the assembly.
DecodePredicateOperand(Inst, ARMCC::AL, Address, Decoder);
// An immediate that represents a floating point registers list. '$regs' in
// the assembly.
Inst.addOperand(MCOperand::createImm(0)); // Arbitrary value, has no effect.

return S;
}
Loading