Skip to content

Commit 69e1312

Browse files
authored
[AArch64][GlobalISel] Select G_ICMP instruction through TableGen (#89932)
G_ICMP NE => XOR(G_ICMP EQ, -1) moved to Legalizer to allow for combines if they come up in following passes.
1 parent b0b6c16 commit 69e1312

File tree

6 files changed

+86
-525
lines changed

6 files changed

+86
-525
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5403,6 +5403,52 @@ def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
54035403
def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
54045404
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
54055405

5406+
// The following SetCC patterns are used for GlobalISel only
5407+
multiclass SelectSetCC<PatFrags InFrag, string INST> {
5408+
def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5409+
(v8i8 (!cast<Instruction>(INST # v8i8) (v8i8 V64:$Rn), (v8i8 V64:$Rm)))>;
5410+
def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
5411+
(v16i8 (!cast<Instruction>(INST # v16i8) (v16i8 V128:$Rn), (v16i8 V128:$Rm)))>;
5412+
def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5413+
(v4i16 (!cast<Instruction>(INST # v4i16) (v4i16 V64:$Rn), (v4i16 V64:$Rm)))>;
5414+
def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
5415+
(v8i16 (!cast<Instruction>(INST # v8i16) (v8i16 V128:$Rn), (v8i16 V128:$Rm)))>;
5416+
def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5417+
(v2i32 (!cast<Instruction>(INST # v2i32) (v2i32 V64:$Rn), (v2i32 V64:$Rm)))>;
5418+
def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
5419+
(v4i32 (!cast<Instruction>(INST # v4i32) (v4i32 V128:$Rn), (v4i32 V128:$Rm)))>;
5420+
def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))),
5421+
(v2i64 (!cast<Instruction>(INST # v2i64) (v2i64 V128:$Rn), (v2i64 V128:$Rm)))>;
5422+
}
5423+
5424+
defm : SelectSetCC<seteq, "CMEQ">;
5425+
defm : SelectSetCC<setgt, "CMGT">;
5426+
defm : SelectSetCC<setge, "CMGE">;
5427+
defm : SelectSetCC<setugt, "CMHI">;
5428+
defm : SelectSetCC<setuge, "CMHS">;
5429+
5430+
multiclass SelectSetCCSwapOperands<PatFrags InFrag, string INST> {
5431+
def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5432+
(v8i8 (!cast<Instruction>(INST # v8i8) (v8i8 V64:$Rm), (v8i8 V64:$Rn)))>;
5433+
def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
5434+
(v16i8 (!cast<Instruction>(INST # v16i8) (v16i8 V128:$Rm), (v16i8 V128:$Rn)))>;
5435+
def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5436+
(v4i16 (!cast<Instruction>(INST # v4i16) (v4i16 V64:$Rm), (v4i16 V64:$Rn)))>;
5437+
def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
5438+
(v8i16 (!cast<Instruction>(INST # v8i16) (v8i16 V128:$Rm), (v8i16 V128:$Rn)))>;
5439+
def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5440+
(v2i32 (!cast<Instruction>(INST # v2i32) (v2i32 V64:$Rm), (v2i32 V64:$Rn)))>;
5441+
def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
5442+
(v4i32 (!cast<Instruction>(INST # v4i32) (v4i32 V128:$Rm), (v4i32 V128:$Rn)))>;
5443+
def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))),
5444+
(v2i64 (!cast<Instruction>(INST # v2i64) (v2i64 V128:$Rm), (v2i64 V128:$Rn)))>;
5445+
}
5446+
5447+
defm : SelectSetCCSwapOperands<setlt, "CMGT">;
5448+
defm : SelectSetCCSwapOperands<setle, "CMGE">;
5449+
defm : SelectSetCCSwapOperands<setult, "CMHI">;
5450+
defm : SelectSetCCSwapOperands<setule, "CMHS">;
5451+
54065452
let Predicates = [HasNEON] in {
54075453
def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
54085454
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 1 addition & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ class AArch64InstructionSelector : public InstructionSelector {
221221
bool selectIntrinsicWithSideEffects(MachineInstr &I,
222222
MachineRegisterInfo &MRI);
223223
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224-
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
225224
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
226225
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
227226
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -3403,7 +3402,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
34033402
}
34043403
case TargetOpcode::G_ICMP: {
34053404
if (Ty.isVector())
3406-
return selectVectorICmp(I, MRI);
3405+
return false;
34073406

34083407
if (Ty != LLT::scalar(32)) {
34093408
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
@@ -3652,177 +3651,6 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
36523651
return true;
36533652
}
36543653

3655-
bool AArch64InstructionSelector::selectVectorICmp(
3656-
MachineInstr &I, MachineRegisterInfo &MRI) {
3657-
Register DstReg = I.getOperand(0).getReg();
3658-
LLT DstTy = MRI.getType(DstReg);
3659-
Register SrcReg = I.getOperand(2).getReg();
3660-
Register Src2Reg = I.getOperand(3).getReg();
3661-
LLT SrcTy = MRI.getType(SrcReg);
3662-
3663-
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3664-
unsigned NumElts = DstTy.getNumElements();
3665-
3666-
// First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3667-
// Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3668-
// Third index is cc opcode:
3669-
// 0 == eq
3670-
// 1 == ugt
3671-
// 2 == uge
3672-
// 3 == ult
3673-
// 4 == ule
3674-
// 5 == sgt
3675-
// 6 == sge
3676-
// 7 == slt
3677-
// 8 == sle
3678-
// ne is done by negating 'eq' result.
3679-
3680-
// This table below assumes that for some comparisons the operands will be
3681-
// commuted.
3682-
// ult op == commute + ugt op
3683-
// ule op == commute + uge op
3684-
// slt op == commute + sgt op
3685-
// sle op == commute + sge op
3686-
unsigned PredIdx = 0;
3687-
bool SwapOperands = false;
3688-
CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3689-
switch (Pred) {
3690-
case CmpInst::ICMP_NE:
3691-
case CmpInst::ICMP_EQ:
3692-
PredIdx = 0;
3693-
break;
3694-
case CmpInst::ICMP_UGT:
3695-
PredIdx = 1;
3696-
break;
3697-
case CmpInst::ICMP_UGE:
3698-
PredIdx = 2;
3699-
break;
3700-
case CmpInst::ICMP_ULT:
3701-
PredIdx = 3;
3702-
SwapOperands = true;
3703-
break;
3704-
case CmpInst::ICMP_ULE:
3705-
PredIdx = 4;
3706-
SwapOperands = true;
3707-
break;
3708-
case CmpInst::ICMP_SGT:
3709-
PredIdx = 5;
3710-
break;
3711-
case CmpInst::ICMP_SGE:
3712-
PredIdx = 6;
3713-
break;
3714-
case CmpInst::ICMP_SLT:
3715-
PredIdx = 7;
3716-
SwapOperands = true;
3717-
break;
3718-
case CmpInst::ICMP_SLE:
3719-
PredIdx = 8;
3720-
SwapOperands = true;
3721-
break;
3722-
default:
3723-
llvm_unreachable("Unhandled icmp predicate");
3724-
return false;
3725-
}
3726-
3727-
// This table obviously should be tablegen'd when we have our GISel native
3728-
// tablegen selector.
3729-
3730-
static const unsigned OpcTable[4][4][9] = {
3731-
{
3732-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3733-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3734-
0 /* invalid */},
3735-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3736-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3737-
0 /* invalid */},
3738-
{AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3739-
AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3740-
AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3741-
{AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3742-
AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3743-
AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3744-
},
3745-
{
3746-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3747-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3748-
0 /* invalid */},
3749-
{AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3750-
AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3751-
AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3752-
{AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3753-
AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3754-
AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3755-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3756-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3757-
0 /* invalid */}
3758-
},
3759-
{
3760-
{AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3761-
AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3762-
AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3763-
{AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3764-
AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3765-
AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3766-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3767-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3768-
0 /* invalid */},
3769-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3770-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3771-
0 /* invalid */}
3772-
},
3773-
{
3774-
{AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3775-
AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3776-
AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3777-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3778-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3779-
0 /* invalid */},
3780-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3781-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3782-
0 /* invalid */},
3783-
{0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3784-
0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3785-
0 /* invalid */}
3786-
},
3787-
};
3788-
unsigned EltIdx = Log2_32(SrcEltSize / 8);
3789-
unsigned NumEltsIdx = Log2_32(NumElts / 2);
3790-
unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3791-
if (!Opc) {
3792-
LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3793-
return false;
3794-
}
3795-
3796-
const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3797-
const TargetRegisterClass *SrcRC =
3798-
getRegClassForTypeOnBank(SrcTy, VecRB, true);
3799-
if (!SrcRC) {
3800-
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3801-
return false;
3802-
}
3803-
3804-
unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3805-
if (SrcTy.getSizeInBits() == 128)
3806-
NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3807-
3808-
if (SwapOperands)
3809-
std::swap(SrcReg, Src2Reg);
3810-
3811-
auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3812-
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3813-
3814-
// Invert if we had a 'ne' cc.
3815-
if (NotOpc) {
3816-
Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3817-
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3818-
} else {
3819-
MIB.buildCopy(DstReg, Cmp.getReg(0));
3820-
}
3821-
RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3822-
I.eraseFromParent();
3823-
return true;
3824-
}
3825-
38263654
MachineInstr *AArch64InstructionSelector::emitScalarToVector(
38273655
unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
38283656
MachineIRBuilder &MIRBuilder) const {

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -495,17 +495,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
495495

496496
// FIXME: fix moreElementsToNextPow2
497497
getActionDefinitionsBuilder(G_ICMP)
498-
.legalFor({{s32, s32},
499-
{s32, s64},
500-
{s32, p0},
501-
{v4s32, v4s32},
502-
{v2s32, v2s32},
503-
{v2s64, v2s64},
504-
{v2s64, v2p0},
505-
{v4s16, v4s16},
506-
{v8s16, v8s16},
507-
{v8s8, v8s8},
508-
{v16s8, v16s8}})
498+
.legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
509499
.widenScalarOrEltToNextPow2(1)
510500
.clampScalar(1, s32, s64)
511501
.clampScalar(0, s32, s32)
@@ -527,7 +517,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
527517
.clampNumElements(1, v8s8, v16s8)
528518
.clampNumElements(1, v4s16, v8s16)
529519
.clampNumElements(1, v2s32, v4s32)
530-
.clampNumElements(1, v2s64, v2s64);
520+
.clampNumElements(1, v2s64, v2s64)
521+
.customIf(isVector(0));
531522

532523
getActionDefinitionsBuilder(G_FCMP)
533524
.legalFor({{s32, MinFPScalar},
@@ -1266,6 +1257,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
12661257
return legalizePrefetch(MI, Helper);
12671258
case TargetOpcode::G_ABS:
12681259
return Helper.lowerAbsToCNeg(MI);
1260+
case TargetOpcode::G_ICMP:
1261+
return legalizeICMP(MI, MRI, MIRBuilder);
12691262
}
12701263

12711264
llvm_unreachable("expected switch to return");
@@ -1324,6 +1317,36 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
13241317
return true;
13251318
}
13261319

1320+
bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
1321+
MachineRegisterInfo &MRI,
1322+
MachineIRBuilder &MIRBuilder) const {
1323+
Register DstReg = MI.getOperand(0).getReg();
1324+
Register SrcReg1 = MI.getOperand(2).getReg();
1325+
Register SrcReg2 = MI.getOperand(3).getReg();
1326+
LLT DstTy = MRI.getType(DstReg);
1327+
LLT SrcTy = MRI.getType(SrcReg1);
1328+
1329+
// Check the vector types are legal
1330+
if (DstTy.getScalarSizeInBits() != SrcTy.getScalarSizeInBits() ||
1331+
DstTy.getNumElements() != SrcTy.getNumElements() ||
1332+
(DstTy.getSizeInBits() != 64 && DstTy.getSizeInBits() != 128))
1333+
return false;
1334+
1335+
// Lowers G_ICMP NE => G_ICMP EQ to allow better pattern matching for
1336+
// following passes
1337+
CmpInst::Predicate Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
1338+
if (Pred != CmpInst::ICMP_NE)
1339+
return true;
1340+
Register CmpReg =
1341+
MIRBuilder
1342+
.buildICmp(CmpInst::ICMP_EQ, MRI.getType(DstReg), SrcReg1, SrcReg2)
1343+
.getReg(0);
1344+
MIRBuilder.buildNot(DstReg, CmpReg);
1345+
1346+
MI.eraseFromParent();
1347+
return true;
1348+
}
1349+
13271350
bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI,
13281351
MachineRegisterInfo &MRI,
13291352
LegalizerHelper &Helper) const {

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
5050
LegalizerHelper &Helper) const;
5151
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
5252
LegalizerHelper &Helper) const;
53+
bool legalizeICMP(MachineInstr &MI, MachineRegisterInfo &MRI,
54+
MachineIRBuilder &MIRBuilder) const;
5355
bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI,
5456
MachineIRBuilder &MIRBuilder,
5557
GISelChangeObserver &Observer,

llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,8 @@ body: |
361361
; CHECK-NEXT: %cmp_lhs:fpr128 = COPY $q0
362362
; CHECK-NEXT: %cmp_rhs:fpr128 = COPY $q1
363363
; CHECK-NEXT: %add_lhs:fpr128 = COPY $q2
364-
; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
365-
; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, [[CMEQv4i32_]]
364+
; CHECK-NEXT: %cmp:fpr128 = CMEQv4i32 %cmp_lhs, %cmp_rhs
365+
; CHECK-NEXT: %add:fpr128 = ADDv4i32 %add_lhs, %cmp
366366
; CHECK-NEXT: $q0 = COPY %add
367367
; CHECK-NEXT: RET_ReallyLR implicit $q0
368368
%cmp_lhs:fpr(<4 x s32>) = COPY $q0

0 commit comments

Comments
 (0)