Skip to content

DAG: Assert fcmp uno runtime calls are boolean values #142898

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14933,6 +14933,25 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
}
}

// If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
// than X, and the And doesn't change the lower iX bits, we can move the
// AssertZext in front of the And and drop the AssertSext.
if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND &&
N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::AssertSext &&
isa<ConstantSDNode>(N0.getOperand(1))) {
SDValue BigA = N0.getOperand(0);
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
const APInt &Mask = N0.getConstantOperandAPInt(1);
if (AssertVT.bitsLT(BigA_AssertVT) &&
Mask.countr_one() >= BigA_AssertVT.getScalarSizeInBits()) {
SDLoc DL(N);
SDValue NewAssert =
DAG.getNode(Opcode, DL, N->getValueType(0), BigA.getOperand(0), N1);
return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert,
N0.getOperand(1));
}
}

return SDValue();
}

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Update Chain.
Chain = Call.second;
} else {
assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
"unordered call should be simple boolean");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're adding an assertion that CCCode should be SETEQ or SETNE... then you're changing the condition code for RTLIB::UO_F32 to SETCC_INVALID, which then goes through here? I must be missing something...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, wait, I see, we don't call setCmpLibcallCC in that case, so the ARMISelLowering change is just a no-op.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it was setting the default value so this never did anything


EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
if (getBooleanContents(RetVT) == ZeroOrOneBooleanContent) {
NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
DAG.getValueType(MVT::i1));
}

SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
CCCode = getCmpLibcallCC(LC2);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

// Single-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 4
Expand All @@ -630,7 +630,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },

// Floating-point to integer conversions.
// RTABI chapter 4.1.2, Table 6
Expand Down
13 changes: 7 additions & 6 deletions llvm/test/CodeGen/ARM/fpcmp_ueq.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ entry:
}

; CHECK-ARMv4-LABEL: f7:
; CHECK-ARMv4-DAG: bl ___eqsf2
; CHECK-ARMv4-DAG: bl ___unordsf2
; CHECK-ARMv4: cmp r0, #0
; CHECK-ARMv4: movne r0, #1
; CHECK-ARMv4: orrs r0, r0,
; CHECK-ARMv4: moveq r0, #42
; CHECK-ARMv4: bl ___eqsf2
; CHECK-ARMv4-NEXT: rsbs r1, r0, #0
; CHECK-ARMv4-NEXT: adc r6, r0, r1

; CHECK-ARMv4: bl ___unordsf2
; CHECK-ARMv4-NEXT: orrs r0, r0, r6
; CHECK-ARMv4-NEXT: mov r0, #154

; CHECK-ARMv7-LABEL: f7:
; CHECK-ARMv7: vcmp.f32
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
Original file line number Diff line number Diff line change
Expand Up @@ -471,15 +471,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: call __eqdf2
; RV32I-NEXT: seqz s4, a0
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: or a0, a0, s4
; RV32I-NEXT: call __eqdf2
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
Expand All @@ -498,13 +498,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: call __eqdf2
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: call __unorddf2
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, a0, s2
; RV64I-NEXT: call __eqdf2
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -1199,15 +1199,15 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: call __eqdf2
; RV32I-NEXT: seqz s4, a0
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: or a0, a0, s4
; RV32I-NEXT: call __eqdf2
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
Expand All @@ -1226,13 +1226,13 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: call __eqdf2
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: call __unorddf2
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, a0, s2
; RV64I-NEXT: call __eqdf2
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/RISCV/double-fcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -403,15 +403,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: call __eqdf2
; RV32I-NEXT: seqz s4, a0
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: or a0, a0, s4
; RV32I-NEXT: call __eqdf2
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
Expand All @@ -430,13 +430,13 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: call __eqdf2
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: call __unorddf2
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unorddf2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, a0, s2
; RV64I-NEXT: call __eqdf2
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
Original file line number Diff line number Diff line change
Expand Up @@ -382,13 +382,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: call __eqsf2
; RV32I-NEXT: seqz s2, a0
; RV32I-NEXT: call __unordsf2
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: or a0, a0, s2
; RV32I-NEXT: call __eqsf2
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
Expand All @@ -405,13 +405,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: call __eqsf2
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: call __unordsf2
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, a0, s2
; RV64I-NEXT: call __eqsf2
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -991,13 +991,13 @@ define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: call __eqsf2
; RV32I-NEXT: seqz s2, a0
; RV32I-NEXT: call __unordsf2
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: or a0, a0, s2
; RV32I-NEXT: call __eqsf2
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
Expand All @@ -1014,13 +1014,13 @@ define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: call __eqsf2
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: call __unordsf2
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, a0, s2
; RV64I-NEXT: call __eqsf2
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/RISCV/float-fcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -344,13 +344,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: call __eqsf2
; RV32I-NEXT: seqz s2, a0
; RV32I-NEXT: call __unordsf2
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __unordsf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: or a0, a0, s2
; RV32I-NEXT: call __eqsf2
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
Expand All @@ -367,13 +367,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: call __eqsf2
; RV64I-NEXT: seqz s2, a0
; RV64I-NEXT: call __unordsf2
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: mv a1, s0
; RV64I-NEXT: call __unordsf2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: or a0, a0, s2
; RV64I-NEXT: call __eqsf2
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: or a0, s2, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
Expand Down
7 changes: 6 additions & 1 deletion llvm/test/CodeGen/Thumb2/float-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,13 @@ define i1 @cmp_d_one(double %a, double %b) {
; CHECK-LABEL: cmp_d_one:
; NONE: bl __aeabi_dcmpeq
; NONE: bl __aeabi_dcmpun
; SP: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpun
; SP: eor r8, r0, #1
; SP: bl __aeabi_dcmpeq
; SP-NEXT: clz r0, r0
; SP-NEXT: lsrs r0, r0, #5
; SP-NEXT: ands.w r0, r0, r8

; DP: vcmp.f64
; DP: movmi r0, #1
; DP: movgt r0, #1
Expand Down
4 changes: 0 additions & 4 deletions llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3443,8 +3443,6 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll __unordtf2
; X86-NEXT: addl $32, %esp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: setne %al
; X86-NEXT: orb %bl, %al
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
Expand Down Expand Up @@ -3526,8 +3524,6 @@ define i64 @cmp_ueq_q(i64 %a, i64 %b, fp128 %x, fp128 %y) #0 {
; WIN-X86-NEXT: pushl {{[0-9]+}}(%esp)
; WIN-X86-NEXT: calll ___unordtf2
; WIN-X86-NEXT: addl $32, %esp
; WIN-X86-NEXT: testl %eax, %eax
; WIN-X86-NEXT: setne %al
; WIN-X86-NEXT: orb %bl, %al
; WIN-X86-NEXT: jne LBB39_1
; WIN-X86-NEXT: # %bb.2:
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/X86/fpcmp-soft-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ entry:
; CHECK: calll __eqdf2
; CHECK: sete
; CHECK: calll __unorddf2
; CHECK: setne
; CHECK: or
; CHECK: retl

define i1 @test11(double %d) #0 {
Expand Down
Loading