Skip to content

Commit bc43ddf

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] NFC: Refactor G_FCMP selection code
Refactor this so it's similar to the existing integer comparison code. Also add some missing 64-bit testcases to select-fcmp.mir. Refactoring to prep for improving selection for G_FCMP-related conditional branches etc. Differential Revision: https://reviews.llvm.org/D88614
1 parent e4f50e5 commit bc43ddf

File tree

2 files changed

+139
-84
lines changed

2 files changed

+139
-84
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 86 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ class AArch64InstructionSelector : public InstructionSelector {
172172
emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
173173
MachineOperand &Predicate,
174174
MachineIRBuilder &MIRBuilder) const;
175+
176+
/// Emit a floating point comparison between \p LHS and \p RHS.
177+
MachineInstr *emitFPCompare(Register LHS, Register RHS,
178+
MachineIRBuilder &MIRBuilder) const;
179+
175180
MachineInstr *emitInstr(unsigned Opcode,
176181
std::initializer_list<llvm::DstOp> DstOps,
177182
std::initializer_list<llvm::SrcOp> SrcOps,
@@ -238,9 +243,16 @@ class AArch64InstructionSelector : public InstructionSelector {
238243
MachineInstr *emitFMovForFConstant(MachineInstr &MI,
239244
MachineRegisterInfo &MRI) const;
240245

241-
/// Emit a CSet for a compare.
246+
/// Emit a CSet for an integer compare.
247+
///
248+
/// \p DefReg is expected to be a 32-bit scalar register.
242249
MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
243250
MachineIRBuilder &MIRBuilder) const;
251+
/// Emit a CSet for a FP compare.
252+
///
253+
/// \p Dst is expected to be a 32-bit scalar register.
254+
MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
255+
MachineIRBuilder &MIRBuilder) const;
244256

245257
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
246258
/// \p IsNegative is true if the test should be "not zero".
@@ -998,20 +1010,6 @@ static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
9981010
return 0;
9991011
}
10001012

1001-
/// Helper function to select the opcode for a G_FCMP.
1002-
static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
1003-
// If this is a compare against +0.0, then we don't have to explicitly
1004-
// materialize a constant.
1005-
const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
1006-
bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
1007-
unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
1008-
if (OpSize != 32 && OpSize != 64)
1009-
return 0;
1010-
unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
1011-
{AArch64::FCMPSri, AArch64::FCMPDri}};
1012-
return CmpOpcTbl[ShouldUseImm][OpSize == 64];
1013-
}
1014-
10151013
/// Returns true if \p P is an unsigned integer comparison predicate.
10161014
static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
10171015
switch (P) {
@@ -2882,64 +2880,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
28822880
}
28832881

28842882
case TargetOpcode::G_FCMP: {
2885-
if (Ty != LLT::scalar(32)) {
2886-
LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2887-
<< ", expected: " << LLT::scalar(32) << '\n');
2888-
return false;
2889-
}
2890-
2891-
unsigned CmpOpc = selectFCMPOpc(I, MRI);
2892-
if (!CmpOpc)
2883+
MachineIRBuilder MIRBuilder(I);
2884+
CmpInst::Predicate Pred =
2885+
static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
2886+
if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(),
2887+
MIRBuilder) ||
2888+
!emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder))
28932889
return false;
2894-
2895-
// FIXME: regbank
2896-
2897-
AArch64CC::CondCode CC1, CC2;
2898-
changeFCMPPredToAArch64CC(
2899-
(CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2900-
2901-
// Partially build the compare. Decide if we need to add a use for the
2902-
// third operand based off whether or not we're comparing against 0.0.
2903-
auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2904-
.addUse(I.getOperand(2).getReg());
2905-
2906-
// If we don't have an immediate compare, then we need to add a use of the
2907-
// register which wasn't used for the immediate.
2908-
// Note that the immediate will always be the last operand.
2909-
if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2910-
CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2911-
2912-
const Register DefReg = I.getOperand(0).getReg();
2913-
Register Def1Reg = DefReg;
2914-
if (CC2 != AArch64CC::AL)
2915-
Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2916-
2917-
MachineInstr &CSetMI =
2918-
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2919-
.addDef(Def1Reg)
2920-
.addUse(AArch64::WZR)
2921-
.addUse(AArch64::WZR)
2922-
.addImm(getInvertedCondCode(CC1));
2923-
2924-
if (CC2 != AArch64CC::AL) {
2925-
Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2926-
MachineInstr &CSet2MI =
2927-
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2928-
.addDef(Def2Reg)
2929-
.addUse(AArch64::WZR)
2930-
.addUse(AArch64::WZR)
2931-
.addImm(getInvertedCondCode(CC2));
2932-
MachineInstr &OrMI =
2933-
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2934-
.addDef(DefReg)
2935-
.addUse(Def1Reg)
2936-
.addUse(Def2Reg);
2937-
constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2938-
constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2939-
}
2940-
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2941-
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2942-
29432890
I.eraseFromParent();
29442891
return true;
29452892
}
@@ -3984,6 +3931,66 @@ AArch64InstructionSelector::emitIntegerCompare(
39843931
return {&*CmpMI, P};
39853932
}
39863933

3934+
MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
3935+
Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
3936+
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3937+
#ifndef NDEBUG
3938+
LLT Ty = MRI.getType(Dst);
3939+
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
3940+
"Expected a 32-bit scalar register?");
3941+
#endif
3942+
const Register ZeroReg = AArch64::WZR;
3943+
auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
3944+
auto CSet =
3945+
MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
3946+
.addImm(getInvertedCondCode(CC));
3947+
constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
3948+
return &*CSet;
3949+
};
3950+
3951+
AArch64CC::CondCode CC1, CC2;
3952+
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
3953+
if (CC2 == AArch64CC::AL)
3954+
return EmitCSet(Dst, CC1);
3955+
3956+
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
3957+
Register Def1Reg = MRI.createVirtualRegister(RC);
3958+
Register Def2Reg = MRI.createVirtualRegister(RC);
3959+
EmitCSet(Def1Reg, CC1);
3960+
EmitCSet(Def2Reg, CC2);
3961+
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
3962+
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
3963+
return &*OrMI;
3964+
}
3965+
3966+
MachineInstr *
3967+
AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
3968+
MachineIRBuilder &MIRBuilder) const {
3969+
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3970+
LLT Ty = MRI.getType(LHS);
3971+
if (Ty.isVector())
3972+
return nullptr;
3973+
unsigned OpSize = Ty.getSizeInBits();
3974+
if (OpSize != 32 && OpSize != 64)
3975+
return nullptr;
3976+
3977+
// If this is a compare against +0.0, then we don't have
3978+
// to explicitly materialize a constant.
3979+
const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
3980+
bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
3981+
unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
3982+
{AArch64::FCMPSri, AArch64::FCMPDri}};
3983+
unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
3984+
3985+
// Partially build the compare. Decide if we need to add a use for the
3986+
// third operand based off whether or not we're comparing against 0.0.
3987+
auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
3988+
if (!ShouldUseImm)
3989+
CmpMI.addUse(RHS);
3990+
constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3991+
return &*CmpMI;
3992+
}
3993+
39873994
MachineInstr *AArch64InstructionSelector::emitVectorConcat(
39883995
Optional<Register> Dst, Register Op1, Register Op2,
39893996
MachineIRBuilder &MIRBuilder) const {
@@ -4169,10 +4176,10 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
41694176
CondCode = changeICMPPredToAArch64CC(Pred);
41704177
} else {
41714178
// Get the condition code for the select.
4179+
CmpInst::Predicate Pred =
4180+
static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
41724181
AArch64CC::CondCode CondCode2;
4173-
changeFCMPPredToAArch64CC(
4174-
(CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
4175-
CondCode2);
4182+
changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
41764183

41774184
// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
41784185
// instructions to emit the comparison.
@@ -4181,16 +4188,11 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
41814188
if (CondCode2 != AArch64CC::AL)
41824189
return false;
41834190

4184-
// Make sure we'll be able to select the compare.
4185-
unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
4186-
if (!CmpOpc)
4191+
if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4192+
CondDef->getOperand(3).getReg(), MIB)) {
4193+
LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
41874194
return false;
4188-
4189-
// Emit a new compare.
4190-
auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
4191-
if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
4192-
Cmp.addUse(CondDef->getOperand(3).getReg());
4193-
constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
4195+
}
41944196
}
41954197

41964198
// Emit the select.

llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,56 @@ body: |
5454
%3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
5555
$s0 = COPY %3(s32)
5656
RET_ReallyLR implicit $s0
57+
58+
...
59+
---
60+
name: notzero_s64
61+
alignment: 4
62+
legalized: true
63+
regBankSelected: true
64+
tracksRegLiveness: true
65+
machineFunctionInfo: {}
66+
body: |
67+
bb.1:
68+
liveins: $d0, $d1
69+
70+
; CHECK-LABEL: name: notzero_s64
71+
; CHECK: liveins: $d0, $d1
72+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
73+
; CHECK: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 112
74+
; CHECK: FCMPDrr [[COPY]], [[FMOVDi]], implicit-def $nzcv
75+
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
76+
; CHECK: $s0 = COPY [[CSINCWr]]
77+
; CHECK: RET_ReallyLR implicit $s0
78+
%0:fpr(s64) = COPY $d0
79+
%1:fpr(s64) = COPY $d1
80+
%2:fpr(s64) = G_FCONSTANT double 1.000000e+00
81+
%3:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2
82+
$s0 = COPY %3(s32)
83+
RET_ReallyLR implicit $s0
84+
85+
86+
...
87+
---
88+
name: zero_s64
89+
alignment: 4
90+
legalized: true
91+
regBankSelected: true
92+
tracksRegLiveness: true
93+
body: |
94+
bb.1:
95+
liveins: $d0, $d1, $s0
96+
97+
; CHECK-LABEL: name: zero_s64
98+
; CHECK: liveins: $d0, $d1, $s0
99+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
100+
; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
101+
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
102+
; CHECK: $s0 = COPY [[CSINCWr]]
103+
; CHECK: RET_ReallyLR implicit $s0
104+
%0:fpr(s64) = COPY $d0
105+
%1:fpr(s64) = COPY $d1
106+
%2:fpr(s64) = G_FCONSTANT double 0.000000e+00
107+
%3:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2
108+
$s0 = COPY %3(s32)
109+
RET_ReallyLR implicit $s0

0 commit comments

Comments
 (0)