-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][GISel] Add support for G_FCMP with F and D extensions. #70624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-globalisel Author: Craig Topper (topperc) ChangesWe only have instructions for OEQ, OLT, and OGT. We need to convert other comparison codes into those. I think we'll likely want to split this up in the future to support optimizations. Maybe do some of it in the legalizer or in a new post legalizer lowering pass. So this patch is just enough to get something working without adding 11 additional patterns to tablegen for each type. Patch is 94.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70624.diff 9 Files Affected:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index bd3662c942de889..ca879a5e81c0499 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -67,6 +67,9 @@ class RISCVInstructionSelector : public InstructionSelector {
bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB,
MachineRegisterInfo &MRI) const;
+ bool emitFPCompare(CmpInst::Predicate Pred, Register DstReg, Register LHS,
+ Register RHS, MachineIRBuilder &MIB) const;
+
ComplexRendererFns selectShiftMask(MachineOperand &Root) const;
ComplexRendererFns selectAddrRegImm(MachineOperand &Root) const;
@@ -366,6 +369,18 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
}
case TargetOpcode::G_SELECT:
return selectSelect(MI, MIB, MRI);
+ case TargetOpcode::G_FCMP: {
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ if (!emitFPCompare(Pred, MI.getOperand(0).getReg(),
+ MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+ MIB))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+ }
default:
return false;
}
@@ -674,6 +689,123 @@ bool RISCVInstructionSelector::selectSelect(MachineInstr &MI,
return constrainSelectedInstRegOperands(*Result, TII, TRI, RBI);
}
+// Convert an FCMP predicate to one of the supported F or D instructions.
+static unsigned getFCmpOpcode(CmpInst::Predicate Pred, unsigned Size) {
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unsupported predicate");
+ case CmpInst::FCMP_OLT:
+ return Size == 32 ? RISCV::FLT_S : RISCV::FLT_D;
+ case CmpInst::FCMP_OLE:
+ return Size == 32 ? RISCV::FLE_S : RISCV::FLE_D;
+ case CmpInst::FCMP_OEQ:
+ return Size == 32 ? RISCV::FEQ_S : RISCV::FEQ_D;
+ }
+}
+
+// Try legalizing an FCMP by swapping or inverting the predicate to one that
+// is supported.
+static bool legalizeFCmpPredicate(Register &LHS, Register &RHS,
+ CmpInst::Predicate &Pred, bool &NeedInvert) {
+ auto isLegalFCmpPredicate = [](CmpInst::Predicate Pred) {
+ return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE ||
+ Pred == CmpInst::FCMP_OEQ;
+ };
+
+ CmpInst::Predicate InvPred = CmpInst::getSwappedPredicate(Pred);
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ std::swap(LHS, RHS);
+ return true;
+ }
+
+ InvPred = CmpInst::getInversePredicate(Pred);
+ NeedInvert = true;
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ return true;
+ }
+ InvPred = CmpInst::getSwappedPredicate(InvPred);
+ if (isLegalFCmpPredicate(InvPred)) {
+ Pred = InvPred;
+ std::swap(LHS, RHS);
+ return true;
+ }
+
+ return false;
+}
+
+// Emit a sequence of instructions to compare LHS and RHS using Pred. Return
+// the result in DstReg.
+// FIXME: Maybe we should expand this earlier.
+bool RISCVInstructionSelector::emitFPCompare(CmpInst::Predicate Pred,
+ Register DstReg, Register LHS,
+ Register RHS,
+ MachineIRBuilder &MIB) const {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ unsigned Size = MRI.getType(LHS).getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unexpected size");
+
+ Register TmpReg = DstReg;
+
+ bool NeedInvert = false;
+ // First try swapping operands or inverting.
+ if (legalizeFCmpPredicate(LHS, RHS, Pred, NeedInvert)) {
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto Cmp = MIB.buildInstr(getFCmpOpcode(Pred, Size), {TmpReg}, {LHS, RHS});
+ if (!Cmp.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UEQ) {
+ // fcmp ogt LHS, RHS => (OR (FLT LHS, RHS), (FLT RHS, LHS))
+ NeedInvert = Pred == CmpInst::FCMP_UEQ;
+ auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size),
+ {&RISCV::GPRRegClass}, {LHS, RHS});
+ if (!Cmp1.constrainAllUses(TII, TRI, RBI))
+ return false;
+ auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OLT, Size),
+ {&RISCV::GPRRegClass}, {RHS, LHS});
+ if (!Cmp2.constrainAllUses(TII, TRI, RBI))
+ return false;
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto Or =
+ MIB.buildInstr(RISCV::OR, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)});
+ if (!Or.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
+ // fcmp ord LHS, RHS => (AND (FEQ LHS, LHS), (FEQ RHS, RHS))
+ // FIXME: If LHS and RHS are the same we can use a single FEQ.
+ NeedInvert = Pred == CmpInst::FCMP_UNO;
+ auto Cmp1 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size),
+ {&RISCV::GPRRegClass}, {LHS, LHS});
+ if (!Cmp1.constrainAllUses(TII, TRI, RBI))
+ return false;
+ auto Cmp2 = MIB.buildInstr(getFCmpOpcode(CmpInst::FCMP_OEQ, Size),
+ {&RISCV::GPRRegClass}, {RHS, RHS});
+ if (!Cmp2.constrainAllUses(TII, TRI, RBI))
+ return false;
+ if (NeedInvert)
+ TmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ auto And =
+ MIB.buildInstr(RISCV::AND, {TmpReg}, {Cmp1.getReg(0), Cmp2.getReg(0)});
+ if (!And.constrainAllUses(TII, TRI, RBI))
+ return false;
+ } else
+ llvm_unreachable("Unhandled predicate");
+
+ // Emit an XORI to invert the result if needed.
+ if (NeedInvert) {
+ auto Xor = MIB.buildInstr(RISCV::XORI, {DstReg}, {TmpReg}).addImm(1);
+ if (!Xor.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+
+ return true;
+}
+
namespace llvm {
InstructionSelector *
createRISCVInstructionSelector(const RISCVTargetMachine &TM,
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index e80d620936e893b..7e15d39016a3403 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -208,6 +208,14 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) {
typeIs(1, s32)(Query));
});
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalIf([=, &ST](const LegalityQuery &Query) -> bool {
+ return typeIs(0, sXLen)(Query) &&
+ ((ST.hasStdExtF() && typeIs(1, s32)(Query)) ||
+ (ST.hasStdExtD() && typeIs(1, s64)(Query)));
+ })
+ .clampScalar(0, sXLen, sXLen);
+
getLegacyLegalizerInfo().computeTables();
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index f005948d2094445..aa5311957351382 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -228,6 +228,18 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&RISCV::ValueMappings[RISCV::FPR64Idx]});
break;
}
+ case TargetOpcode::G_FCMP: {
+ LLT Ty = MRI.getType(MI.getOperand(2).getReg());
+
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ auto *FPRValueMapping = Size == 32 ? &RISCV::ValueMappings[RISCV::FPR32Idx]
+ : &RISCV::ValueMappings[RISCV::FPR64Idx];
+ OperandsMapping = getOperandsMapping(
+ {GPRValueMapping, nullptr, FPRValueMapping, FPRValueMapping});
+ break;
+ }
default:
return getInvalidInstructionMapping();
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fcmp-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fcmp-rv32.mir
new file mode 100644
index 000000000000000..285a35f989f9d86
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fcmp-rv32.mir
@@ -0,0 +1,706 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+d -run-pass=instruction-select \
+# RUN: -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: fcmp_oeq_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_oeq_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FEQ_S:%[0-9]+]]:gpr = nofpexcept FEQ_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[FEQ_S]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(oeq), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ogt_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_ogt_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLT_S:%[0-9]+]]:gpr = FLT_S [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: $x10 = COPY [[FLT_S]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(ogt), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_oge_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_oge_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLE_S:%[0-9]+]]:gpr = FLE_S [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: $x10 = COPY [[FLE_S]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(oge), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_olt_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_olt_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLT_S:%[0-9]+]]:gpr = nofpexcept FLT_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[FLT_S]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(olt), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ole_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_ole_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLE_S:%[0-9]+]]:gpr = nofpexcept FLE_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[FLE_S]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(ole), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_one_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_one_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLT_S:%[0-9]+]]:gpr = FLT_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[FLT_S1:%[0-9]+]]:gpr = FLT_S [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:gpr = OR [[FLT_S]], [[FLT_S1]]
+ ; CHECK-NEXT: $x10 = COPY [[OR]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(one), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ord_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_ord_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FEQ_S:%[0-9]+]]:gpr = FEQ_S [[COPY]], [[COPY]]
+ ; CHECK-NEXT: [[FEQ_S1:%[0-9]+]]:gpr = FEQ_S [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr = AND [[FEQ_S]], [[FEQ_S1]]
+ ; CHECK-NEXT: $x10 = COPY [[AND]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(ord), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ueq_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_ueq_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLT_S:%[0-9]+]]:gpr = FLT_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[FLT_S1:%[0-9]+]]:gpr = FLT_S [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:gpr = OR [[FLT_S]], [[FLT_S1]]
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[OR]], 1
+ ; CHECK-NEXT: $x10 = COPY [[XORI]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(ueq), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ugt_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_ugt_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLE_S:%[0-9]+]]:gpr = FLE_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[FLE_S]], 1
+ ; CHECK-NEXT: $x10 = COPY [[XORI]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(ugt), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_uge_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_uge_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLT_S:%[0-9]+]]:gpr = FLT_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[FLT_S]], 1
+ ; CHECK-NEXT: $x10 = COPY [[XORI]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(uge), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ult_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_ult_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLE_S:%[0-9]+]]:gpr = FLE_S [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[FLE_S]], 1
+ ; CHECK-NEXT: $x10 = COPY [[XORI]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(ult), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ule_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_ule_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FLT_S:%[0-9]+]]:gpr = FLT_S [[COPY1]], [[COPY]]
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[FLT_S]], 1
+ ; CHECK-NEXT: $x10 = COPY [[XORI]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(ule), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_une_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_une_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FEQ_S:%[0-9]+]]:gpr = FEQ_S [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[FEQ_S]], 1
+ ; CHECK-NEXT: $x10 = COPY [[XORI]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(une), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_uno_f32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_f, $f11_f
+
+ ; CHECK-LABEL: name: fcmp_uno_f32
+ ; CHECK: liveins: $f10_f, $f11_f
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+ ; CHECK-NEXT: [[FEQ_S:%[0-9]+]]:gpr = FEQ_S [[COPY]], [[COPY]]
+ ; CHECK-NEXT: [[FEQ_S1:%[0-9]+]]:gpr = FEQ_S [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:gpr = AND [[FEQ_S]], [[FEQ_S1]]
+ ; CHECK-NEXT: [[XORI:%[0-9]+]]:gpr = XORI [[AND]], 1
+ ; CHECK-NEXT: $x10 = COPY [[XORI]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s32) = COPY $f10_f
+ %1:fprb(s32) = COPY $f11_f
+ %4:gprb(s32) = G_FCMP floatpred(uno), %0(s32), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_oeq_f64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_d, $f11_d
+
+ ; CHECK-LABEL: name: fcmp_oeq_f64
+ ; CHECK: liveins: $f10_d, $f11_d
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f10_d
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f11_d
+ ; CHECK-NEXT: [[FEQ_D:%[0-9]+]]:gpr = nofpexcept FEQ_D [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x10 = COPY [[FEQ_D]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:fprb(s64) = COPY $f10_d
+ %1:fprb(s64) = COPY $f11_d
+ %4:gprb(s32) = G_FCMP floatpred(oeq), %0(s64), %1
+ $x10 = COPY %4(s32)
+ PseudoRET implicit $x10
+
+...
+---
+name: fcmp_ogt_f64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $f10_d, $f11_d
+
+ ; CHECK-LABEL: name: fcmp_ogt...
[truncated]
|
|
||
// Try legalizing an FCMP by swapping or inverting the predicate to one that | ||
// is supported. | ||
static bool legalizeFCmpPredicate(Register &LHS, Register &RHS, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This kind of stuff should go in LegalizerHelper. We should be able to add the cond code to the legalizer queries for G_ICMP/G_FCMP and replicate the DAG's setCondCodeAction
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be fine to claim support for all condition codes in the legalizer. Do target independent combines and later down the pass pipeline, they can do RISCV-V specific optimizations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My immediate goal was just to get -O0 working. I will start looking at how to put this in LegalizerHelper, but I don't want that to block this patch if possible.
@@ -67,6 +67,9 @@ class RISCVInstructionSelector : public InstructionSelector { | |||
bool selectSelect(MachineInstr &MI, MachineIRBuilder &MIB, | |||
MachineRegisterInfo &MRI) const; | |||
|
|||
bool emitFPCompare(CmpInst::Predicate Pred, Register DstReg, Register LHS, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this function be renamed to selectFPCompare
? It looks like we are using it to select a G_FCMP.
MIB)) | ||
return false; | ||
|
||
MI.eraseFromParent(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we move the eraseFromParent
into emitFPCompare
like the other select functions do?
auto isLegalFCmpPredicate = [](CmpInst::Predicate Pred) { | ||
return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE || | ||
Pred == CmpInst::FCMP_OEQ; | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need to check the case where Pred
is already legal: isLegalFCmpPredicate(Pred)
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that olt
is getting selected correctly, but I don't see it getting handled by any of the code in this patch. How is that code getting selected? selectImpl
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. tablegen has patterns for oeq, olt, and ole so selectImpl gets it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it. If that’s the case, then it’s up to you if you’d like to have the assert here. I made my initial comment thinking we had to handle it with custom code.
CmpInst::Predicate Pred = | ||
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); | ||
|
||
Register DstReg = MI.getOperand(0).getReg(); | ||
Register LHS = MI.getOperand(2).getReg(); | ||
Register RHS = MI.getOperand(3).getReg(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have the GICmp/GFCmp
wrapper classes that you can use. They provide things like getCond()
, getLHSReg()
. To use it just cast the MachineInstr
: auto &Cmp = cast<GFCmp>(MI);
0d555f8
to
8f57641
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM w/ minor comments.
I'm fine with keeping legalizeFCmpPredicate
for now, as long as it will eventually be replaced by LegalizeHelper.
} else | ||
llvm_unreachable("Unhandled predicate"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
style: add braces here to keep it uniform with prior if blocks
if (!Cmp.constrainAllUses(TII, TRI, RBI)) | ||
return false; | ||
} else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UEQ) { | ||
// fcmp ogt LHS, RHS => (OR (FLT LHS, RHS), (FLT RHS, LHS)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fcmp uno
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fcmp one
?
We only have instructions for OEQ, OLT, and OGT. We need to convert other comparison codes into those. I think we'll likely want to split this up in the future to support optimizations. Maybe do some of it in the legalizer or in a new post legalizer lowering pass. So this patch is just enough to get something working without adding 11 additional patterns to tablegen for each type.
8f57641
to
cf0e8bb
Compare
…70624) We only have instructions for OEQ, OLT, and OLE. We need to convert other comparison codes into those. I think we'll likely want to split this up in the future to support optimizations. Maybe do some of it in the legalizer or in a new post legalizer lowering pass. So this patch is just enough to get something working without adding 11 additional patterns to tablegen for each type.
We only have instructions for OEQ, OLT, and OGT. We need to convert other comparison codes into those.
I think we'll likely want to split this up in the future to support optimizations. Maybe do some of it in the legalizer or in a new post legalizer lowering pass. So this patch is just enough to get something working without adding 11 additional patterns to tablegen for each type.