Skip to content

Commit 42e7ad6

Browse files
committed
[AArch64][FEAT_CMPBR] Codegen for Armv9.6-a compare-and-branch
This patch adds codegen for all Arm9.6-a compare-and-branch instructions, that operate on full w or x registers. The instruction variants operating on half-words (cbh) and bytes (cbb) are added in a subsequent patch. Since CB doesn't use standard 4-bit Arm condition codes but a reduced set of conditions, encoded in 3 bits, some conditions are expressed by modifying operands, namely incrementing or decrementing immediate operands and swapping register operands. To invert a CB instruction it's therefore not enough to just modify the condition code which doesn't play particularly well with how the backend is currently organized. We therefore introduce a number of pseudos which operate on the standard 4-bit condition codes and lower them late during codegen.
1 parent 4f2651c commit 42e7ad6

13 files changed

+1490
-1
lines changed

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ class AArch64AsmPrinter : public AsmPrinter {
181181
/// pseudo instructions.
182182
bool lowerPseudoInstExpansion(const MachineInstr *MI, MCInst &Inst);
183183

184+
// Emit expansion of Compare-and-branch pseudo instructions
185+
void emitCBPseudoExpansion(const MachineInstr *MI);
186+
184187
void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
185188
void EmitToStreamer(const MCInst &Inst) {
186189
EmitToStreamer(*OutStreamer, Inst);
@@ -2427,6 +2430,150 @@ AArch64AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) {
24272430
return BAE;
24282431
}
24292432

2433+
void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) {
2434+
bool IsImm = false;
2435+
bool Is32Bit = false;
2436+
2437+
switch (MI->getOpcode()) {
2438+
default:
2439+
llvm_unreachable("This is not a CB pseudo instruction");
2440+
case AArch64::CBWPrr:
2441+
IsImm = false;
2442+
Is32Bit = true;
2443+
break;
2444+
case AArch64::CBXPrr:
2445+
IsImm = false;
2446+
Is32Bit = false;
2447+
break;
2448+
case AArch64::CBWPri:
2449+
IsImm = true;
2450+
Is32Bit = true;
2451+
break;
2452+
case AArch64::CBXPri:
2453+
IsImm = true;
2454+
Is32Bit = false;
2455+
break;
2456+
}
2457+
2458+
AArch64CC::CondCode CC =
2459+
static_cast<AArch64CC::CondCode>(MI->getOperand(0).getImm());
2460+
bool NeedsRegSwap = false;
2461+
bool NeedsImmDec = false;
2462+
bool NeedsImmInc = false;
2463+
2464+
unsigned MCOpC;
2465+
switch (CC) {
2466+
default:
2467+
llvm_unreachable("Invalid CB condition code");
2468+
case AArch64CC::EQ:
2469+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBEQWri : AArch64::CBEQXri)
2470+
: (Is32Bit ? AArch64::CBEQWrr : AArch64::CBEQXrr);
2471+
NeedsRegSwap = false;
2472+
NeedsImmDec = false;
2473+
NeedsImmInc = false;
2474+
break;
2475+
case AArch64CC::NE:
2476+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBNEWri : AArch64::CBNEXri)
2477+
: (Is32Bit ? AArch64::CBNEWrr : AArch64::CBNEXrr);
2478+
NeedsRegSwap = false;
2479+
NeedsImmDec = false;
2480+
NeedsImmInc = false;
2481+
break;
2482+
case AArch64CC::HS:
2483+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri)
2484+
: (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr);
2485+
NeedsRegSwap = false;
2486+
NeedsImmDec = true;
2487+
NeedsImmInc = false;
2488+
break;
2489+
case AArch64CC::LO:
2490+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri)
2491+
: (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr);
2492+
NeedsRegSwap = true;
2493+
NeedsImmDec = false;
2494+
NeedsImmInc = false;
2495+
break;
2496+
case AArch64CC::HI:
2497+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri)
2498+
: (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr);
2499+
NeedsRegSwap = false;
2500+
NeedsImmDec = false;
2501+
NeedsImmInc = false;
2502+
break;
2503+
case AArch64CC::LS:
2504+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri)
2505+
: (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr);
2506+
NeedsRegSwap = !IsImm;
2507+
NeedsImmDec = false;
2508+
NeedsImmInc = IsImm;
2509+
break;
2510+
case AArch64CC::GE:
2511+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri)
2512+
: (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr);
2513+
NeedsRegSwap = false;
2514+
NeedsImmDec = IsImm;
2515+
NeedsImmInc = false;
2516+
break;
2517+
case AArch64CC::LT:
2518+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri)
2519+
: (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr);
2520+
NeedsRegSwap = !IsImm;
2521+
NeedsImmDec = false;
2522+
NeedsImmInc = false;
2523+
break;
2524+
case AArch64CC::GT:
2525+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri)
2526+
: (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr);
2527+
NeedsRegSwap = false;
2528+
NeedsImmDec = false;
2529+
NeedsImmInc = false;
2530+
break;
2531+
case AArch64CC::LE:
2532+
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri)
2533+
: (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr);
2534+
NeedsRegSwap = !IsImm;
2535+
NeedsImmDec = false;
2536+
NeedsImmInc = IsImm;
2537+
break;
2538+
}
2539+
2540+
MCInst Inst;
2541+
Inst.setOpcode(MCOpC);
2542+
2543+
MCOperand Lhs, Rhs, Trgt;
2544+
lowerOperand(MI->getOperand(1), Lhs);
2545+
lowerOperand(MI->getOperand(2), Rhs);
2546+
lowerOperand(MI->getOperand(3), Trgt);
2547+
2548+
if (NeedsRegSwap) {
2549+
assert(
2550+
!IsImm &&
2551+
"Unexpected register swap for CB instruction with immediate operand");
2552+
assert(Lhs.isReg() && "Expected register operand for CB");
2553+
assert(Rhs.isReg() && "Expected register operand for CB");
2554+
// Swap register operands
2555+
Inst.addOperand(Rhs);
2556+
Inst.addOperand(Lhs);
2557+
} else if (IsImm && NeedsImmDec) {
2558+
assert(IsImm && "Unexpected immediate decrement for CB instruction with "
2559+
"reg-reg operands");
2560+
Rhs.setImm(Rhs.getImm() - 1);
2561+
Inst.addOperand(Lhs);
2562+
Inst.addOperand(Rhs);
2563+
} else if (NeedsImmInc) {
2564+
assert(IsImm && "Unexpected immediate increment for CB instruction with "
2565+
"reg-reg operands");
2566+
Rhs.setImm(Rhs.getImm() + 1);
2567+
Inst.addOperand(Lhs);
2568+
Inst.addOperand(Rhs);
2569+
} else {
2570+
Inst.addOperand(Lhs);
2571+
Inst.addOperand(Rhs);
2572+
}
2573+
Inst.addOperand(Trgt);
2574+
EmitToStreamer(*OutStreamer, Inst);
2575+
}
2576+
24302577
// Simple pseudo-instructions have their lowering (with expansion to real
24312578
// instructions) auto-generated.
24322579
#include "AArch64GenMCPseudoLowering.inc"
@@ -2948,6 +3095,13 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
29483095
TS->emitARM64WinCFISaveAnyRegQPX(MI->getOperand(0).getImm(),
29493096
-MI->getOperand(2).getImm());
29503097
return;
3098+
3099+
case AArch64::CBWPri:
3100+
case AArch64::CBXPri:
3101+
case AArch64::CBWPrr:
3102+
case AArch64::CBXPrr:
3103+
emitCBPseudoExpansion(MI);
3104+
return;
29513105
}
29523106

29533107
// Finally, do the automated lowerings for everything else.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2954,6 +2954,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
29542954
MAKE_CASE(AArch64ISD::CTTZ_ELTS)
29552955
MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64)
29562956
MAKE_CASE(AArch64ISD::URSHR_I_PRED)
2957+
MAKE_CASE(AArch64ISD::CBRR)
2958+
MAKE_CASE(AArch64ISD::CBRI)
29572959
}
29582960
#undef MAKE_CASE
29592961
return nullptr;
@@ -10396,6 +10398,28 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
1039610398
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
1039710399
}
1039810400

10401+
// Try to emit Armv9.6 CB instructions. We prefer tb{n}z/cb{n}z due to their
10402+
// larger branch displacement but do prefer CB over cmp + br.
10403+
if (Subtarget->hasCMPBR() &&
10404+
AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) &&
10405+
ProduceNonFlagSettingCondBr) {
10406+
AArch64CC::CondCode ACC = changeIntCCToAArch64CC(CC);
10407+
unsigned Opc = AArch64ISD::CBRR;
10408+
if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(RHS)) {
10409+
APInt NewImm = Imm->getAPIntValue();
10410+
if (ACC == AArch64CC::GE || ACC == AArch64CC::HS)
10411+
NewImm = Imm->getAPIntValue() - 1;
10412+
else if (ACC == AArch64CC::LE || ACC == AArch64CC::LS)
10413+
NewImm = Imm->getAPIntValue() + 1;
10414+
10415+
if (NewImm.uge(0) && NewImm.ult(64))
10416+
Opc = AArch64ISD::CBRI;
10417+
}
10418+
10419+
SDValue Cond = DAG.getTargetConstant(ACC, dl, MVT::i32);
10420+
return DAG.getNode(Opc, dl, MVT::Other, Chain, Cond, LHS, RHS, Dest);
10421+
}
10422+
1039910423
SDValue CCVal;
1040010424
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
1040110425
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,10 @@ enum NodeType : unsigned {
520520
MOPS_MEMSET_TAGGING,
521521
MOPS_MEMCOPY,
522522
MOPS_MEMMOVE,
523+
524+
// Compare-and-branch
525+
CBRR,
526+
CBRI,
523527
};
524528

525529
} // end namespace AArch64ISD

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13065,6 +13065,7 @@ class BaseCmpBranchRegister<RegisterClass regtype, bit sf, bits<3> cc,
1306513065
Sched<[WriteBr]> {
1306613066
let isBranch = 1;
1306713067
let isTerminator = 1;
13068+
let isCompare = 1;
1306813069

1306913070
bits<5> Rm;
1307013071
bits<5> Rt;
@@ -13091,6 +13092,7 @@ class BaseCmpBranchImmediate<RegisterClass regtype, bit sf, bits<3> cc,
1309113092
Sched<[WriteBr]> {
1309213093
let isBranch = 1;
1309313094
let isTerminator = 1;
13095+
let isCompare = 1;
1309413096

1309513097
bits<5> Rt;
1309613098
bits<6> imm;
@@ -13131,6 +13133,23 @@ multiclass CmpBranchRegisterAlias<string mnemonic, string insn> {
1313113133
def : InstAlias<mnemonic # "\t$Rt, $Rm, $target",
1313213134
(!cast<Instruction>(insn # "Xrr") GPR64:$Rm, GPR64:$Rt, am_brcmpcond:$target), 0>;
1313313135
}
13136+
13137+
class CmpBranchRegisterPseudo<RegisterClass regtype>
13138+
: Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, regtype:$Rm, am_brcmpcond:$Target), []>,
13139+
Sched<[WriteBr]> {
13140+
let isBranch = 1;
13141+
let isTerminator = 1;
13142+
let isCompare = 1;
13143+
}
13144+
13145+
class CmpBranchImmediatePseudo<RegisterClass regtype, ImmLeaf imtype>
13146+
: Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, imtype:$Imm, am_brcmpcond:$Target), []>,
13147+
Sched<[WriteBr]> {
13148+
let isBranch = true;
13149+
let isTerminator = true;
13150+
let isCompare = true;
13151+
}
13152+
1313413153
//----------------------------------------------------------------------------
1313513154
// Allow the size specifier tokens to be upper case, not just lower.
1313613155
def : TokenAlias<".4B", ".4b">; // Add dot product

0 commit comments

Comments
 (0)