[ARM] Fix problems with register list in vscclrm (#111825)

john-brawn-arm · web-flow · commit ad45eb4a9c74 · 2024-10-17T11:15:08.000+01:00
The register list in vscclrm is unusual in three ways:
 * The encoded size can be zero, meaning the list contains only vpr.
* Double-precision registers past d15 are permitted even when the
subtarget doesn't have them, they are instead ignored when the
instruction executes.
* The single-precision variant allows double-precision registers d16
onwards, which are encoded as a pair of single-precision registers.

Fixing this also incidentally changes a vlldm/vlstm error message: when
the first register is in the range d16-d31 we now get the "operand must
be exactly..." error instead of "register expected".
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -200,9 +200,9 @@ def FPEXC   : ARMReg<8,  "fpexc">;
 def FPINST  : ARMReg<9,  "fpinst">;
 def FPINST2 : ARMReg<10, "fpinst2">;
 // These encodings aren't actual instruction encodings, their encoding depends
-// on the instruction they are used in and for VPR 32 was chosen such that it
+// on the instruction they are used in and for VPR 64 was chosen such that it
 // always comes last in spr_reglist_with_vpr.
-def VPR     : ARMReg<32, "vpr">;
+def VPR     : ARMReg<64, "vpr">;
 def FPSCR_NZCVQC
             : ARMReg<2, "fpscr_nzcvqc">;
 def P0      : ARMReg<13, "p0">;
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -446,8 +446,8 @@ class ARMAsmParser : public MCTargetAsmParser {
   int tryParseShiftRegister(OperandVector &);
   std::optional<ARM_AM::ShiftOpc> tryParseShiftToken();
   bool parseRegisterList(OperandVector &, bool EnforceOrder = true,
-                         bool AllowRAAC = false,
-                         bool AllowOutOfBoundReg = false);
+                         bool AllowRAAC = false, bool IsLazyLoadStore = false,
+                         bool IsVSCCLRM = false);
   bool parseMemory(OperandVector &);
   bool parseOperand(OperandVector &, StringRef Mnemonic);
   bool parseImmExpr(int64_t &Out);
@@ -3811,6 +3811,10 @@ class ARMOperand : public MCParsedAsmOperand {
         Kind = k_FPSRegisterListWithVPR;
       else
         Kind = k_SPRRegisterList;
+    } else if (Regs.front().second == ARM::VPR) {
+      assert(Regs.size() == 1 &&
+             "Register list starting with VPR expected to only contain VPR");
+      Kind = k_FPSRegisterListWithVPR;
     }
 
     if (Kind == k_RegisterList && Regs.back().second == ARM::APSR)
@@ -4608,7 +4612,8 @@ insertNoDuplicates(SmallVectorImpl<std::pair<unsigned, MCRegister>> &Regs,
 
 /// Parse a register list.
 bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
-                                     bool AllowRAAC, bool AllowOutOfBoundReg) {
+                                     bool AllowRAAC, bool IsLazyLoadStore,
+                                     bool IsVSCCLRM) {
   MCAsmParser &Parser = getParser();
   if (Parser.getTok().isNot(AsmToken::LCurly))
     return TokError("Token is not a Left Curly Brace");
@@ -4618,15 +4623,23 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
 
   // Check the first register in the list to see what register class
   // this is a list of.
-  MCRegister Reg = tryParseRegister();
+  bool AllowOutOfBoundReg = IsLazyLoadStore || IsVSCCLRM;
+  MCRegister Reg = tryParseRegister(AllowOutOfBoundReg);
   if (!Reg)
     return Error(RegLoc, "register expected");
   if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE)
     return Error(RegLoc, "pseudo-register not allowed");
-  // The reglist instructions have at most 16 registers, so reserve
+  // The reglist instructions have at most 32 registers, so reserve
   // space for that many.
   int EReg = 0;
-  SmallVector<std::pair<unsigned, MCRegister>, 16> Registers;
+  SmallVector<std::pair<unsigned, MCRegister>, 32> Registers;
+
+  // Single-precision VSCCLRM can have double-precision registers in the
+  // register list. When VSCCLRMAdjustEncoding is true then we've switched from
+  // single-precision to double-precision and we pretend that these registers
+  // are encoded as S32 onwards, which we can do by adding 16 to the encoding
+  // value.
+  bool VSCCLRMAdjustEncoding = false;
 
   // Allow Q regs and just interpret them as the two D sub-registers.
   if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
@@ -4645,6 +4658,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
     RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
   else if (ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
     RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
+  else if (Reg == ARM::VPR)
+    RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID];
   else
     return Error(RegLoc, "invalid register in register list");
 
@@ -4685,6 +4700,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
       while (Reg != EndReg) {
         Reg = getNextRegister(Reg);
         EReg = MRI->getEncodingValue(Reg);
+        if (VSCCLRMAdjustEncoding)
+          EReg += 16;
         if (!insertNoDuplicates(Registers, EReg, Reg)) {
           Warning(AfterMinusLoc, StringRef("duplicated register (") +
                                      ARMInstPrinter::getRegisterName(Reg) +
@@ -4696,6 +4713,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
     Parser.Lex(); // Eat the comma.
     RegLoc = Parser.getTok().getLoc();
     MCRegister OldReg = Reg;
+    int EOldReg = EReg;
     const AsmToken RegTok = Parser.getTok();
     Reg = tryParseRegister(AllowOutOfBoundReg);
     if (!Reg)
@@ -4727,6 +4745,12 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
       }
       continue;
     }
+    // VSCCLRM can switch from single-precision to double-precision only when
+    // S31 is followed by D16.
+    if (IsVSCCLRM && OldReg == ARM::S31 && Reg == ARM::D16) {
+      VSCCLRMAdjustEncoding = true;
+      RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID];
+    }
     // The register must be in the same register class as the first.
     if ((Reg == ARM::RA_AUTH_CODE &&
          RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) ||
@@ -4736,8 +4760,10 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
     // exception is CLRM, which is order-independent anyway, so
     // there's no potential for confusion if you write clrm {r2,r1}
     // instead of clrm {r1,r2}.
-    if (EnforceOrder &&
-        MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
+    EReg = MRI->getEncodingValue(Reg);
+    if (VSCCLRMAdjustEncoding)
+      EReg += 16;
+    if (EnforceOrder && EReg < EOldReg) {
       if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
         Warning(RegLoc, "register list not in ascending order");
       else if (!ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
@@ -4746,9 +4772,9 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
     // VFP register lists must also be contiguous.
     if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
         RC != &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID] &&
-        Reg != OldReg + 1)
+        EReg != EOldReg + 1)
       return Error(RegLoc, "non-contiguous register range");
-    EReg = MRI->getEncodingValue(Reg);
+
     if (!insertNoDuplicates(Registers, EReg, Reg)) {
       Warning(RegLoc, "duplicated register (" + RegTok.getString() +
                           ") in register list");
@@ -6336,9 +6362,10 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   case AsmToken::LBrac:
     return parseMemory(Operands);
   case AsmToken::LCurly: {
-    bool AllowOutOfBoundReg = Mnemonic == "vlldm" || Mnemonic == "vlstm";
+    bool IsLazyLoadStore = Mnemonic == "vlldm" || Mnemonic == "vlstm";
+    bool IsVSCCLRM = Mnemonic == "vscclrm";
     return parseRegisterList(Operands, !Mnemonic.starts_with("clr"), false,
-                             AllowOutOfBoundReg);
+                             IsLazyLoadStore, IsVSCCLRM);
   }
   case AsmToken::Dollar:
   case AsmToken::Hash: {
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1529,15 +1529,19 @@ static const uint16_t DPRDecoderTable[] = {
     ARM::D28, ARM::D29, ARM::D30, ARM::D31
 };
 
-static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
-                                           uint64_t Address,
-                                           const MCDisassembler *Decoder) {
+// Does this instruction/subtarget permit use of registers d16-d31?
+static bool PermitsD32(const MCInst &Inst, const MCDisassembler *Decoder) {
+  if (Inst.getOpcode() == ARM::VSCCLRMD || Inst.getOpcode() == ARM::VSCCLRMS)
+    return true;
   const FeatureBitset &featureBits =
     ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+  return featureBits[ARM::FeatureD32];
+}
 
-  bool hasD32 = featureBits[ARM::FeatureD32];
-
-  if (RegNo > 31 || (!hasD32 && RegNo > 15))
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const MCDisassembler *Decoder) {
+  if (RegNo > (PermitsD32(Inst, Decoder) ? 31 : 15))
     return MCDisassembler::Fail;
 
   unsigned Register = DPRDecoderTable[RegNo];
@@ -1816,10 +1820,11 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
   unsigned regs = fieldFromInstruction(Val, 1, 7);
 
   // In case of unpredictable encoding, tweak the operands.
-  if (regs == 0 || regs > 16 || (Vd + regs) > 32) {
-    regs = Vd + regs > 32 ? 32 - Vd : regs;
+  unsigned MaxReg = PermitsD32(Inst, Decoder) ? 32 : 16;
+  if (regs == 0 || (Vd + regs) > MaxReg) {
+    regs = Vd + regs > MaxReg ? MaxReg - Vd : regs;
     regs = std::max( 1u, regs);
-    regs = std::min(16u, regs);
+    regs = std::min(MaxReg, regs);
     S = MCDisassembler::SoftFail;
   }
 
@@ -6447,20 +6452,31 @@ static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
 
   Inst.addOperand(MCOperand::createImm(ARMCC::AL));
   Inst.addOperand(MCOperand::createReg(0));
-  if (Inst.getOpcode() == ARM::VSCCLRMD) {
-    unsigned reglist = (fieldFromInstruction(Insn, 1, 7) << 1) |
-                       (fieldFromInstruction(Insn, 12, 4) << 8) |
+  unsigned regs = fieldFromInstruction(Insn, 0, 8);
+  if (regs == 0) {
+    // Register list contains only VPR
+  } else if (Inst.getOpcode() == ARM::VSCCLRMD) {
+    unsigned reglist = regs | (fieldFromInstruction(Insn, 12, 4) << 8) |
                        (fieldFromInstruction(Insn, 22, 1) << 12);
     if (!Check(S, DecodeDPRRegListOperand(Inst, reglist, Address, Decoder))) {
       return MCDisassembler::Fail;
     }
   } else {
-    unsigned reglist = fieldFromInstruction(Insn, 0, 8) |
-                       (fieldFromInstruction(Insn, 22, 1) << 8) |
-                       (fieldFromInstruction(Insn, 12, 4) << 9);
-    if (!Check(S, DecodeSPRRegListOperand(Inst, reglist, Address, Decoder))) {
-      return MCDisassembler::Fail;
-    }
+    unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 1) |
+                  fieldFromInstruction(Insn, 22, 1);
+    // Registers past s31 are permitted and treated as being half of a d
+    // register, though both halves of each d register must be present.
+    unsigned max_reg = Vd + regs;
+    if (max_reg > 64 || (max_reg > 32 && (max_reg & 1)))
+      S = MCDisassembler::SoftFail;
+    unsigned max_sreg = std::min(32u, max_reg);
+    unsigned max_dreg = std::min(32u, max_reg / 2);
+    for (unsigned i = Vd; i < max_sreg; ++i)
+      if (!Check(S, DecodeSPRRegisterClass(Inst, i, Address, Decoder)))
+        return MCDisassembler::Fail;
+    for (unsigned i = 16; i < max_dreg; ++i)
+      if (!Check(S, DecodeDPRRegisterClass(Inst, i, Address, Decoder)))
+        return MCDisassembler::Fail;
   }
   Inst.addOperand(MCOperand::createReg(ARM::VPR));
 
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -851,7 +851,7 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
                                        const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
-  if (MI->getOpcode() != ARM::t2CLRM) {
+  if (MI->getOpcode() != ARM::t2CLRM && MI->getOpcode() != ARM::VSCCLRMS) {
     assert(is_sorted(drop_begin(*MI, OpNum),
                      [&](const MCOperand &LHS, const MCOperand &RHS) {
                        return MRI.getEncodingValue(LHS.getReg()) <
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1743,15 +1743,28 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
 
   unsigned Binary = 0;
 
-  if (SPRRegs || DPRRegs) {
+  if (SPRRegs || DPRRegs || Reg == ARM::VPR) {
     // VLDM/VSTM/VSCCLRM
     unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
     unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
     Binary |= (RegNo & 0x1f) << 8;
 
-    // Ignore VPR
-    if (MI.getOpcode() == ARM::VSCCLRMD || MI.getOpcode() == ARM::VSCCLRMS)
+    if (MI.getOpcode() == ARM::VSCCLRMD)
+      // Ignore VPR
       --NumRegs;
+    else if (MI.getOpcode() == ARM::VSCCLRMS) {
+      // The register list can contain both S registers and D registers, with D
+      // registers counting as two registers. VPR doesn't count towards the
+      // number of registers.
+      NumRegs = 0;
+      for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+        Reg = MI.getOperand(I).getReg();
+        if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
+          NumRegs += 1;
+        else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
+          NumRegs += 2;
+      }
+    }
     if (SPRRegs)
       Binary |= NumRegs;
     else
diff --git a/llvm/test/MC/ARM/vlstm-vlldm-diag.s b/llvm/test/MC/ARM/vlstm-vlldm-diag.s
@@ -36,6 +36,14 @@ vlldm r8, {d3 - d31}
 // ERR: error: operand must be exactly {d0-d15} (T1) or {d0-d31} (T2)
 // ERR-NEXT: vlldm r8, {d3 - d31}
 
+vlstm r8, {d31}
+// ERR: error: operand must be exactly {d0-d15} (T1) or {d0-d31} (T2)
+// ERR-NEXT: vlstm r8, {d31}
+
+vlldm r8, {d31}
+// ERR: error: operand must be exactly {d0-d15} (T1) or {d0-d31} (T2)
+// ERR-NEXT: vlldm r8, {d31}
+
 vlstm r8, {d0 - d35}
 // ERR: error: register expected
 // ERR-NEXT: vlstm r8, {d0 - d35}
diff --git a/llvm/test/MC/ARM/vscclrm-asm.s b/llvm/test/MC/ARM/vscclrm-asm.s
@@ -35,11 +35,62 @@ it hi
 // CHECK: vscclrmhi          {s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr} @ encoding: [0xdf,0xec,0x1d,0x1a]
 vscclrmhi {s3-s31, vpr}
 
+// CHECK: vscclrm            {vpr} @ encoding: [0x9f,0xec,0x00,0x0a]
+vscclrm {vpr}
+
+// CHECK: vscclrm            {d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, vpr} @ encoding: [0x9f,0xec,0x40,0x0b]
+vscclrm {d0-d31, vpr}
+
+// CHECK: vscclrm            {d31, vpr} @ encoding: [0xdf,0xec,0x02,0xfb]
+vscclrm {d31, vpr}
+
+// CHECK: vscclrm            {s31, d16, vpr} @ encoding: [0xdf,0xec,0x03,0xfa]
+vscclrm {s31, d16, vpr}
+
+// CHECK: vscclrm            {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, vpr} @ encoding: [0x9f,0xec,0x40,0x0a]
+vscclrm {s0-s31, d16-d31, vpr}
+
+// CHECK: vscclrm            {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, vpr} @ encoding: [0x9f,0xec,0x40,0x0a]
+vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, vpr}
+
 // ERROR: non-contiguous register range
 vscclrm {s0, s3-s4, vpr}
 
+// ERROR: non-contiguous register range
+vscclrm {s31, d16, s30, vpr}
+
 // ERROR: register expected
 vscclrm {s32, vpr}
 
+// ERROR: register expected
+vscclrm {d32, vpr}
+
+// ERROR: register expected
+vscclrm {s31-s32, vpr}
+
+// ERROR: register expected
+vscclrm {d31-d32, vpr}
+
 // ERROR: invalid operand for instruction
 vscclrm {s0-s1}
+
+// ERROR: register list not in ascending order
+vscclrm {vpr, s0}
+
+// ERROR: register list not in ascending order
+vscclrm {vpr, s31}
+
+// ERROR: register list not in ascending order
+vscclrm {vpr, d0}
+
+// ERROR: register list not in ascending order
+vscclrm {vpr, d31}
+
+// ERROR: invalid register in register list
+vscclrm {s0, d0, vpr}
+
+// ERROR: invalid register in register list
+vscclrm {s0, d1, vpr}
+
+// ERROR: invalid register in register list
+vscclrm {d16, s31, vpr}
diff --git a/llvm/test/MC/Disassembler/ARM/vscclrm.txt b/llvm/test/MC/Disassembler/ARM/vscclrm.txt