-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][FEAT_CMPBR] Codegen for Armv9.6-a compare-and-branch #116465
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][FEAT_CMPBR] Codegen for Armv9.6-a compare-and-branch #116465
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: David Tellenbach (dtellenbach) ChangesThis patch adds codegen for all Arm9.6-a compare-and-branch instructions, that operate on full w or x registers. The instruction variants operating on half-words (cbh) and bytes (cbb) are added in a subsequent patch. Since CB doesn't use standard 4-bit Arm condition codes but a reduced set of conditions, encoded in 3 bits, some conditions are expressed by modifying operands, namely incrementing or decrementing immediate operands and swapping register operands. To invert a CB instruction it's therefore not enough to just modify the condition code which doesn't play particularly well with how the backend is currently organized. We therefore introduce a number of pseudos which operate on the standard 4-bit condition codes and lower them late during codegen. Patch is 48.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116465.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index af26fc62292377..0a403f077f23b6 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -181,6 +181,9 @@ class AArch64AsmPrinter : public AsmPrinter {
/// pseudo instructions.
bool lowerPseudoInstExpansion(const MachineInstr *MI, MCInst &Inst);
+ // Emit expansion of Compare-and-branch pseudo instructions
+ void emitCBPseudoExpansion(const MachineInstr *MI);
+
void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
void EmitToStreamer(const MCInst &Inst) {
EmitToStreamer(*OutStreamer, Inst);
@@ -2427,6 +2430,151 @@ AArch64AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) {
return BAE;
}
+void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) {
+ bool IsImm = false;
+ bool Is32Bit = false;
+
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("This is not a CB pseudo instruction");
+ case AArch64::CBWPrr:
+ IsImm = false;
+ Is32Bit = true;
+ break;
+ case AArch64::CBXPrr:
+ IsImm = false;
+ Is32Bit = false;
+ break;
+ case AArch64::CBWPri:
+ IsImm = true;
+ Is32Bit = true;
+ break;
+ case AArch64::CBXPri:
+ IsImm = true;
+ Is32Bit = false;
+ break;
+ }
+
+ AArch64CC::CondCode CC =
+ static_cast<AArch64CC::CondCode>(MI->getOperand(0).getImm());
+ bool NeedsRegSwap = false;
+ bool NeedsImmDec = false;
+ bool NeedsImmInc = false;
+
+
+ unsigned MCOpC;
+ switch(CC) {
+ default:
+ llvm_unreachable("Invalid CB condition code");
+ case AArch64CC::EQ:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBEQWri : AArch64::CBEQXri)
+ : (Is32Bit ? AArch64::CBEQWrr : AArch64::CBEQXrr);
+ NeedsRegSwap = false;
+ NeedsImmDec = false;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::NE:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBNEWri : AArch64::CBNEXri)
+ : (Is32Bit ? AArch64::CBNEWrr : AArch64::CBNEXrr);
+ NeedsRegSwap = false;
+ NeedsImmDec = false;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::HS:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri)
+ : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr);
+ NeedsRegSwap = false;
+ NeedsImmDec = true;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::LO:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri)
+ : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr);
+ NeedsRegSwap = true;
+ NeedsImmDec = false;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::HI:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri)
+ : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr);
+ NeedsRegSwap = false;
+ NeedsImmDec = false;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::LS:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri)
+ : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr);
+ NeedsRegSwap = !IsImm;
+ NeedsImmDec = false;
+ NeedsImmInc = IsImm;
+ break;
+ case AArch64CC::GE:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri)
+ : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr);
+ NeedsRegSwap = false;
+ NeedsImmDec = IsImm;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::LT:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri)
+ : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr);
+ NeedsRegSwap = !IsImm;
+ NeedsImmDec = false;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::GT:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri)
+ : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr);
+ NeedsRegSwap = false;
+ NeedsImmDec = false;
+ NeedsImmInc = false;
+ break;
+ case AArch64CC::LE:
+ MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri)
+ : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr);
+ NeedsRegSwap = !IsImm;
+ NeedsImmDec = false;
+ NeedsImmInc = IsImm;
+ break;
+ }
+
+ MCInst Inst;
+ Inst.setOpcode(MCOpC);
+
+ MCOperand Lhs, Rhs, Trgt;
+ lowerOperand(MI->getOperand(1), Lhs);
+ lowerOperand(MI->getOperand(2), Rhs);
+ lowerOperand(MI->getOperand(3), Trgt);
+
+ if (NeedsRegSwap) {
+ assert(
+ !IsImm &&
+ "Unexpected register swap for CB instruction with immediate operand");
+ assert(Lhs.isReg() && "Expected register operand for CB");
+ assert(Rhs.isReg() && "Expected register operand for CB");
+ // Swap register operands
+ Inst.addOperand(Rhs);
+ Inst.addOperand(Lhs);
+ } else if (IsImm && NeedsImmDec) {
+ assert(IsImm && "Unexpected immediate decrement for CB instruction with "
+ "reg-reg operands");
+ Rhs.setImm(Rhs.getImm() - 1);
+ Inst.addOperand(Lhs);
+ Inst.addOperand(Rhs);
+ } else if (NeedsImmInc) {
+ assert(IsImm && "Unexpected immediate increment for CB instruction with "
+ "reg-reg operands");
+ Rhs.setImm(Rhs.getImm() + 1);
+ Inst.addOperand(Lhs);
+ Inst.addOperand(Rhs);
+ } else {
+ Inst.addOperand(Lhs);
+ Inst.addOperand(Rhs);
+ }
+ Inst.addOperand(Trgt);
+ EmitToStreamer(*OutStreamer, Inst);
+}
+
// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.
#include "AArch64GenMCPseudoLowering.inc"
@@ -2948,6 +3096,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
TS->emitARM64WinCFISaveAnyRegQPX(MI->getOperand(0).getImm(),
-MI->getOperand(2).getImm());
return;
+
+ case AArch64::CBWPri:
+ case AArch64::CBXPri:
+ case AArch64::CBWPrr:
+ case AArch64::CBXPrr:
+ emitCBPseudoExpansion(MI);
+ return;
}
// Finally, do the automated lowerings for everything else.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9d1c3d4eddc880..3e35a85d4fe806 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2954,6 +2954,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CTTZ_ELTS)
MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64)
MAKE_CASE(AArch64ISD::URSHR_I_PRED)
+ MAKE_CASE(AArch64ISD::CBRR)
+ MAKE_CASE(AArch64ISD::CBRI)
}
#undef MAKE_CASE
return nullptr;
@@ -10396,6 +10398,28 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
+ // Try to emit Armv9.6 CB instructions. We prefer tb{n}z/cb{n}z due to their
+ // larger branch displacement but do prefer CB over cmp + br.
+ if (Subtarget->hasCMPBR() &&
+ AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) &&
+ ProduceNonFlagSettingCondBr) {
+ AArch64CC::CondCode ACC = changeIntCCToAArch64CC(CC);
+ unsigned Opc = AArch64ISD::CBRR;
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(RHS)) {
+ APInt NewImm = Imm->getAPIntValue();
+ if (ACC == AArch64CC::GE || ACC == AArch64CC::HS)
+ NewImm = Imm->getAPIntValue() - 1;
+ else if (ACC == AArch64CC::LE || ACC == AArch64CC::LS)
+ NewImm = Imm->getAPIntValue() + 1;
+
+ if (NewImm.uge(0) && NewImm.ult(64))
+ Opc = AArch64ISD::CBRI;
+ }
+
+ SDValue Cond = DAG.getTargetConstant(ACC, dl, MVT::i32);
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Cond, LHS, RHS, Dest);
+ }
+
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d11da64d3f84eb..7de5f4490e78db 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -520,6 +520,10 @@ enum NodeType : unsigned {
MOPS_MEMSET_TAGGING,
MOPS_MEMCOPY,
MOPS_MEMMOVE,
+
+ // Compare-and-branch
+ CBRR,
+ CBRI,
};
} // end namespace AArch64ISD
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 15d4e93b915c14..ca2bfae8d7e8a0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -13065,6 +13065,7 @@ class BaseCmpBranchRegister<RegisterClass regtype, bit sf, bits<3> cc,
Sched<[WriteBr]> {
let isBranch = 1;
let isTerminator = 1;
+ let isCompare = 1;
bits<5> Rm;
bits<5> Rt;
@@ -13091,6 +13092,7 @@ class BaseCmpBranchImmediate<RegisterClass regtype, bit sf, bits<3> cc,
Sched<[WriteBr]> {
let isBranch = 1;
let isTerminator = 1;
+ let isCompare = 1;
bits<5> Rt;
bits<6> imm;
@@ -13131,6 +13133,23 @@ multiclass CmpBranchRegisterAlias<string mnemonic, string insn> {
def : InstAlias<mnemonic # "\t$Rt, $Rm, $target",
(!cast<Instruction>(insn # "Xrr") GPR64:$Rm, GPR64:$Rt, am_brcmpcond:$target), 0>;
}
+
+class CmpBranchRegisterPseudo<RegisterClass regtype>
+ : Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, regtype:$Rm, am_brcmpcond:$Target), []>,
+ Sched<[WriteBr]> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isCompare = 1;
+}
+
+class CmpBranchImmediatePseudo<RegisterClass regtype, ImmLeaf imtype>
+ : Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, imtype:$Imm, am_brcmpcond:$Target), []>,
+ Sched<[WriteBr]> {
+ let isBranch = true;
+ let isTerminator = true;
+ let isCompare = true;
+}
+
//----------------------------------------------------------------------------
// Allow the size specifier tokens to be upper case, not just lower.
def : TokenAlias<".4B", ".4b">; // Add dot product
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a470c03efd5eb4..73cc235982c392 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -62,6 +62,10 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
+static cl::opt<unsigned>
+ CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
+ cl::desc("Restrict range of CB instructions (DEBUG)"));
+
static cl::opt<unsigned> TBZDisplacementBits(
"aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
@@ -216,6 +220,17 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
+ break;
+ case AArch64::CBWPri:
+ case AArch64::CBXPri:
+ case AArch64::CBWPrr:
+ case AArch64::CBXPrr:
+ Target = LastInst->getOperand(3).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(-1));
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ Cond.push_back(LastInst->getOperand(2));
}
}
@@ -237,6 +252,11 @@ static unsigned getBranchDisplacementBits(unsigned Opc) {
return CBZDisplacementBits;
case AArch64::Bcc:
return BCCDisplacementBits;
+ case AArch64::CBWPri:
+ case AArch64::CBXPri:
+ case AArch64::CBWPrr:
+ case AArch64::CBXPrr:
+ return CBDisplacementBits;
}
}
@@ -266,6 +286,11 @@ AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
case AArch64::CBNZX:
case AArch64::Bcc:
return MI.getOperand(1).getMBB();
+ case AArch64::CBWPri:
+ case AArch64::CBXPri:
+ case AArch64::CBWPrr:
+ case AArch64::CBXPrr:
+ return MI.getOperand(3).getMBB();
}
}
@@ -543,6 +568,17 @@ bool AArch64InstrInfo::reverseBranchCondition(
case AArch64::TBNZX:
Cond[1].setImm(AArch64::TBZX);
break;
+
+ // Cond is { -1, Opcode, CC, Op0, Op1 }
+ case AArch64::CBWPri:
+ case AArch64::CBXPri:
+ case AArch64::CBWPrr:
+ case AArch64::CBXPrr: {
+ // Pseudos using standard 4bit Arm condition codes
+ AArch64CC::CondCode CC =
+ static_cast<AArch64CC::CondCode>(Cond[2].getImm());
+ Cond[2].setImm(AArch64CC::getInvertedCondCode(CC));
+ } break;
}
}
@@ -593,10 +629,19 @@ void AArch64InstrInfo::instantiateCondBranch(
} else {
// Folded compare-and-branch
// Note that we use addOperand instead of addReg to keep the flags.
+
+ // cbz, cbnz
const MachineInstrBuilder MIB =
BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
+
+ // tbz/tbnz
if (Cond.size() > 3)
- MIB.addImm(Cond[3].getImm());
+ MIB.add(Cond[3]);
+
+ // cb
+ if (Cond.size() > 4)
+ MIB.add(Cond[4]);
+
MIB.addMBB(TBB);
}
}
@@ -842,6 +887,51 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
break;
}
+ case 5: { // cb
+ // We must insert a cmp, that is a subs
+ // 0 1 2 3 4
+ // Cond is { -1, Opcode, CC, Op0, Op1 }
+ unsigned SUBSOpC, SUBSDestReg;
+ bool IsImm = false;
+ switch (Cond[1].getImm()) {
+ default:
+ llvm_unreachable("Unknown branch opcode in Cond");
+ case AArch64::CBWPri:
+ SUBSOpC = AArch64::SUBSWri;
+ SUBSDestReg = AArch64::WZR;
+ IsImm = true;
+ CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
+ break;
+ case AArch64::CBXPri:
+ SUBSOpC = AArch64::SUBSXri;
+ SUBSDestReg = AArch64::XZR;
+ IsImm = true;
+ CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
+ break;
+ case AArch64::CBWPrr:
+ SUBSOpC = AArch64::SUBSWrr;
+ SUBSDestReg = AArch64::WZR;
+ IsImm = false;
+ CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
+ break;
+ case AArch64::CBXPrr:
+ SUBSOpC = AArch64::SUBSXrr;
+ SUBSDestReg = AArch64::XZR;
+ IsImm = false;
+ CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
+ break;
+ }
+
+ if (IsImm)
+ BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
+ .addReg(Cond[3].getReg())
+ .addImm(Cond[4].getImm())
+ .addImm(0);
+ else
+ BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
+ .addReg(Cond[3].getReg())
+ .addReg(Cond[4].getReg());
+ }
}
unsigned Opc = 0;
@@ -8393,6 +8483,10 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
default:
llvm_unreachable("Unknown branch instruction?");
case AArch64::Bcc:
+ case AArch64::CBWPri:
+ case AArch64::CBXPri:
+ case AArch64::CBWPrr:
+ case AArch64::CBXPrr:
return false;
case AArch64::CBZW:
case AArch64::CBZX:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index e37f70f7d985de..151e397edd6195 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -693,6 +693,10 @@ static inline bool isCondBranchOpcode(int Opc) {
case AArch64::TBZX:
case AArch64::TBNZW:
case AArch64::TBNZX:
+ case AArch64::CBWPri:
+ case AArch64::CBXPri:
+ case AArch64::CBWPrr:
+ case AArch64::CBXPrr:
return true;
default:
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index e4ad27d4bcfc00..67efe50bc1f5f3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -508,6 +508,9 @@ def SDT_AArch64TBL : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
]>;
+def SDT_AArch64cbrr : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>]>;
+def SDT_AArch64cbri : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisInt<2>, SDTCisVT<3, OtherVT>]>;
+
// non-extending masked load fragment.
def nonext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
@@ -684,6 +687,8 @@ def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
}]>;
// Node definitions.
+def AArch64CBrr : SDNode<"AArch64ISD::CBRR", SDT_AArch64cbrr, [SDNPHasChain]>;
+def AArch64CBri : SDNode<"AArch64ISD::CBRI", SDT_AArch64cbri, [SDNPHasChain]>;
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
@@ -10481,6 +10486,10 @@ defm : PromoteBinaryv8f16Tov4f32<any_fdiv, FDIVv4f32>;
defm : PromoteBinaryv8f16Tov4f32<any_fmul, FMULv4f32>;
defm : PromoteBinaryv8f16Tov4f32<any_fsub, FSUBv4f32>;
+//===----------------------------------------------------------------------===//
+// Compare and Branch (FEAT_CMPBR)
+//===----------------------------------------------------------------------===//
+
let Predicates = [HasCMPBR] in {
defm CBGT : CmpBranchRegister<0b000, "cbgt">;
defm CBGE : CmpBranchRegister<0b001, "cbge">;
@@ -10529,6 +10538,22 @@ let Predicates = [HasCMPBR] in {
defm : CmpBranchWRegisterAlias<"cbhlo", "CBHHI">;
defm : CmpBranchWRegisterAlias<"cbhls", "CBHHS">;
defm : CmpBranchWRegisterAlias<"cbhlt", "CBHGT">;
+
+ // Pseudos for codegen
+ def CBWPrr : CmpBranchRegisterPseudo<GPR32>;
+ def CBXPrr : CmpBranchRegisterPseudo<GPR64>;
+ def CBWPri : CmpBranchImmediatePseudo<GPR32, uimm6_32b>;
+ def CBXPri : CmpBranchImmediatePseudo<GPR64, uimm6_64b>;
+
+def : Pat<(AArch64CBrr i32:$Cond, GPR32:$Rn, GPR32:$Rt, bb:$Target),
+ (CBWPrr ccode:$Cond, GPR32:$Rn, GPR32:$Rt, am_brcmpcond:$Target)>;
+def : Pat<(AArch64CBrr i32:$Cond, GPR64:$Rn, GPR64:$Rt, bb:$Target),
+ (CBXPrr ccode:$Cond, GPR64:$Rn, GPR64:$Rt, am_brcmpcond:$Target)>;
+def : Pat<(AArch64CBri i32:$Cond, GPR32:$Rn, i32:$Imm, bb:$Target),
+ (CBWPri ccode:$Cond, GPR32:$Rn, uimm6_32b:$Imm, am_brcmpcond:$Target)>;
+def : Pat<(AArch64CBri i32:$Cond, GPR64:$Rn, i64:$Imm, bb:$Target),
+ (CBXPri ccode:$Cond, GPR64:$Rn, uimm6_64b:$Imm, am_brcmpcond:$Target)>;
+
} // HasCMPBR
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 8f34cf054fe286..417c152eebf24a 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -332,6 +332,26 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
}
}
+/// True, if a given condition code can be used in a fused compare-and-branch
+/// instructions, false otherwise.
+inline static bool isValidCBCond(AArch64CC::CondCode Code) {
+ switch (Code) {
+ default:
+ return false;
+ case AArch64CC::EQ:
+ case AArch64CC::NE:
+ case AArch64CC::HS:
+ case AArch64CC::LO:
+ case AArch64CC::HI:
+ case AArch64CC::LS:
+ case AArch64CC::GE:
+ case AArch64CC::LT:
+ case AArch64CC::GT:
+ case AArch64CC::LE:
+ return true;
+ }
+}
+
} // end namespace AArch64CC
struct SysAlias {
diff --git a/llvm/test/CodeGen/AArch64/cmpbr-branch-relaxation.mir b/llvm/test/CodeGen/AArch64/cmpbr-branch-relaxation.mir
new file mode 100644
index 00000000000000..5fccb452e9642b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cmpbr-branch-relaxation.mir
@@ -0,0 +1,156 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -o - -aarch64-cb-offset-bits=3 \
+# RUN: -run-pass=branch-relaxation -verify-machineinstrs -simplify-mir %s | \
+# RUN: FileCheck -check-prefix=RELAX %s
+# RUN: llc -mtriple arm64-apple-ios -mattr +cmpbr -o - -aarch64-cb-offset-bits=9 \
+# RUN: -run-pass=branch-relaxation -verify-machineinstrs -simplify-mir %s | \
+# RUN: FileCheck -check-prefix=NO-RELAX %s
+---
+name: relax_cb
+registers:
+ - { id: 0, class: gpr32 }
+ - { id: 1, class: gpr32 }
+liveins:
+ - { reg: '$w0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+body: |
+ ; RELAX-LABEL: name: relax_cb
+ ; RELAX: bb.0:
+ ; RELAX-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; RELAX-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; RELAX-NEXT: ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
42e7ad6
to
a61f4d6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggest adding a couple more tests, but otherwise LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be done without introducing new pseudo instructions? They often have a higher cost than you would expect and I'm not sure it makes sense to define the instructions in the way they are currently specified.
The problem is the restricted set of conditions CB supports, which uses a very different encoding than the standard 4-bit condition codes. A number of conditions are expressed by either swapping register operands, e.g. there is no encoding for Also, I think given the numbers of conditions + instruction variants for CB it makes more sense to treat the conditions as operands and not as part of the instructions, just as we do with Does this work for you? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The problem is the restricted set of conditions CB supports, which uses a very different encoding than the standard 4-bit condition codes. A number of conditions are expressed by either swapping register operands, e.g. there is no encoding for
cble
but you usecbge
with swapped operands, or by incrementing/decrementing immediate operands. Now, if you don't use pseudos you have to be careful if you want to invert a condition which happens in a number of places all over the backend. I think it makes sense to reason about the conditions on a more abstract level in terms of standard Arm conditions and then lower them to the real encodings.Also, I think given the numbers of conditions + instruction variants for CB it makes more sense to treat the conditions as operands and not as part of the instructions, just as we do with
b.<cc>
.
I agree - I would suggest changing the instructions to work more like the psuedos you have added, where the condition is an operand. That looks like a nicer way of specifying the instruction for codegen. Unfortunate I don't know if it is possible to easily have an instruction where the mnemonic is based on one of the operands, so maybe this is OK.
Yeah, I've looked into doing that a while ago, and it was more painful than helpful. I think there is no out-of-the-box functionality to treat an operand as part of the mnemonic. I think CB is especially unthankful for that as the condition codes are not only different from the standard 4-bit ones but register and immediate instruction variants use different encodings for the same conditions. That means that, e.g. during Asm parsing, you can't decode the condition code operand before you handle the later immediate or register operands. It's doable but different from what we currently do. |
ced869b
to
7a69134
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, I've left a few comments for now and plan to provide a more complete review, but I wanted to make sure that we can tackle the issue I raised about inverting condition codes.
Cheers!
This patch adds codegen for all Arm9.6-a compare-and-branch instructions, that operate on full w or x registers. The instruction variants operating on half-words (cbh) and bytes (cbb) are added in a subsequent patch. Since CB doesn't use standard 4-bit Arm condition codes but a reduced set of conditions, encoded in 3 bits, some conditions are expressed by modifying operands, namely incrementing or decrementing immediate operands and swapping register operands. To invert a CB instruction it's therefore not enough to just modify the condition code which doesn't play particularly well with how the backend is currently organized. We therefore introduce a number of pseudos which operate on the standard 4-bit condition codes and lower them late during codegen.
7a69134
to
55a4f0f
Compare
Sorry for the horrible delay, I somehow didn't get around to finish this. Thanks for all the review comments, should all be taken care of now. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for continuing this, the work is really appreciated.
A few comments from me, though I'm not the best person keep on this as I'm less involved in this area at the moment - I'd think that @CarolineConcatto, @davemgreen et al. might want a look.
Cheers
assert(!(NeedsImmDec && NeedsImmInc) && | ||
"Cannot require increment and decrement of CB immediate operand at " | ||
"the same time"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like we can determine statically that this will never fire - so maybe it is worth removing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.
// Now swap, increment or decrement | ||
if (NeedsRegSwap) { | ||
assert( | ||
!IsImm && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similarly as above around here, though don't let me stop you from keeping them if you prefer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.
; CHECK-NO-CMPBR-NEXT: LBB14_2: ; %if.then | ||
; CHECK-NO-CMPBR-NEXT: brk #0x1 | ||
entry: | ||
%cmp = icmp sgt i32 %a, 63 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this (and the test below) use sge
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, thanks for catching that!
@@ -0,0 +1,94 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these tests now not a subset of those in cmpbr-reg-imm-at-bounds.ll
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It makes sense to have these tests together, done. Thanks!
Thanks a lot, @SpencerAbson! @davemgreen mind taking another look? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - looks sensible to me, just a question about removing one of the two ISD nodes.
@@ -2983,6 +2983,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { | |||
MAKE_CASE(AArch64ISD::CTTZ_ELTS) | |||
MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64) | |||
MAKE_CASE(AArch64ISD::URSHR_I_PRED) | |||
MAKE_CASE(AArch64ISD::CBRR) | |||
MAKE_CASE(AArch64ISD::CBRI) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I forget if this was asked before but can we drop AArch64ISD::CBRI and just rely on AArch64ISD::CBRR, creating a CBWPri if the operands an immediate in the right range? It just helps reduce the number of nodes a bit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just checking if the immediate is in the right range depends on the condition code, so I'm not sure if that's really worth it because you complicate the matching patterns a lot: You'd need individual patterns for every condition code or at least check every code individually when matching the condition.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would need WantsRoot (or WantParent?) on a ComplexPattern, so that it can check the Imm is in range whilst checking the condition on the CB.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh right, yeah, that works! Thanks!
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, LGTM.
template <int Bits> | ||
bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, | ||
SDValue &Imm) { | ||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(P->getOperand(1)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can assume this is always the case, so rely upon P->getConstantOperandVal(1) (it has an assert inside it).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
CN->getAPIntValue().ult(UpperBound)) { | ||
SDLoc DL(N); | ||
Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, | ||
Bits == 32 ? MVT::i32 : MVT::i64); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could use Op.getValueType() to get the type, I believe?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
def CmpBranchUImm6Operand_32b | ||
: Operand<i32>, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might only need to be a ComplexPattern, not a Operand as well?
Adding "SelectCmpBranchUImm6Operand<32>", [imm]>
might help a little too, to reduce the number of times the C++ gets called (or at least communicate the intent, even if it is not used).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
|
||
; slt, sle, sgt, sge, ult, ule, ugt, uge, eq, ne | ||
|
||
define void @slt_0(i32 %a) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add a few basic i64 bounds tests too, to make sure they have test coverage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
This patch adds codegen for all Arm9.6-a compare-and-branch instructions, that operate on full w or x registers. The instruction variants operating on half-words (cbh) and bytes (cbb) are added in a subsequent patch.
Since CB doesn't use standard 4-bit Arm condition codes but a reduced set of conditions, encoded in 3 bits, some conditions are expressed by modifying operands, namely incrementing or decrementing immediate operands and swapping register operands. To invert a CB instruction it's therefore not enough to just modify the condition code which doesn't play particularly well with how the backend is currently organized. We therefore introduce a number of pseudos which operate on the standard 4-bit condition codes and lower them late during codegen.