-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Legalizer] Expand fmaximum and fminimum #67301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
aba6ae1
b50ffdc
604fd6e
4c4a890
8ca3196
ddbc848
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8366,6 +8366,64 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, | |
return SDValue(); | ||
} | ||
|
||
SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, | ||
SelectionDAG &DAG) const { | ||
SDLoc DL(N); | ||
SDValue LHS = N->getOperand(0); | ||
SDValue RHS = N->getOperand(1); | ||
unsigned Opc = N->getOpcode(); | ||
EVT VT = N->getValueType(0); | ||
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); | ||
bool IsMax = Opc == ISD::FMAXIMUM; | ||
|
||
if (VT.isVector() && | ||
isOperationLegalOrCustomOrPromote(Opc, VT.getScalarType())) | ||
return SDValue(); | ||
|
||
// First, implement comparison not propagating NaN. If no native fmin or fmax | ||
// available, use plain select with setcc instead. | ||
SDValue MinMax; | ||
unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; | ||
unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM; | ||
if (isOperationLegalOrCustom(CompOpcIeee, VT)) { | ||
MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS); | ||
} else if (isOperationLegalOrCustom(CompOpc, VT)) { | ||
MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS); | ||
} else { | ||
// NaN (if exists) will be propagated later, so orderness doesn't matter. | ||
SDValue Compare = | ||
DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SETGT/SETLT leave it up to the target whether it returns true or false for NaN. Is that intentional? If so it's probably worth a comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes here it assumes no NaN exists (if either operand is NaN, it will be propagated in following code) |
||
MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS); | ||
} | ||
|
||
// Propagate any NaN of both operands | ||
if (!N->getFlags().hasNoNaNs() && | ||
(!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) { | ||
ConstantFP *FPNaN = ConstantFP::get( | ||
*DAG.getContext(), APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT))); | ||
MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO), | ||
DAG.getConstantFP(*FPNaN, DL, VT), MinMax); | ||
} | ||
|
||
// fminimum/fmaximum requires -0.0 less than +0.0 | ||
if (!N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(RHS) && | ||
!DAG.isKnownNeverZeroFloat(LHS)) { | ||
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, | ||
DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ); | ||
SDValue TestZero = | ||
DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); | ||
SDValue LCmp = DAG.getSelect( | ||
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, | ||
MinMax); | ||
SDValue RCmp = DAG.getSelect( | ||
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, | ||
LCmp); | ||
MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. random thought - can this be done better with FCOPYSIGN? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We still need isfpclass to know which is negative/positive zero and whether both are zeros, I think. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should try to avoid using is_fpclass here. Additionally, I think we have under-defined the internally used IEEE nodes. As currently defined, minnum_ieee/maxnum_ieee have unspecified signed 0 order. However for AMDGPU at least, the actual hardware instructions have always appropriately ordered 0s. We could either refine the _IEEE node definitions to be IEEE -2019 and require ordered 0 behavior which doesn't require this fixup. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PowerPC implements While LoongArch (using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I opened #85195 to just fix the definitions here. If LoongArch doesn't behave correctly, we can lower it to preserve the sign there There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #85195 is landed, so we should just assume the _IEEE flavors preserve the sign |
||
} | ||
|
||
return MinMax; | ||
} | ||
|
||
/// Returns a true value if if this FPClassTest can be performed with an ordered | ||
/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns | ||
/// std::nullopt if it cannot be performed as a compare with 0. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 | ||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s | ||
|
||
define fp128 @f128_minimum(fp128 %a, fp128 %b) { | ||
; CHECK-LABEL: f128_minimum: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: xscmpuqp 0, 2, 3 | ||
; CHECK-NEXT: vmr 4, 2 | ||
; CHECK-NEXT: bge 0, .LBB0_8 | ||
; CHECK-NEXT: # %bb.1: # %entry | ||
; CHECK-NEXT: bun 0, .LBB0_9 | ||
; CHECK-NEXT: .LBB0_2: # %entry | ||
; CHECK-NEXT: xststdcqp 0, 2, 4 | ||
; CHECK-NEXT: bc 4, 2, .LBB0_10 | ||
; CHECK-NEXT: .LBB0_3: # %entry | ||
; CHECK-NEXT: xststdcqp 0, 3, 4 | ||
; CHECK-NEXT: bc 12, 2, .LBB0_5 | ||
; CHECK-NEXT: .LBB0_4: # %entry | ||
; CHECK-NEXT: vmr 3, 2 | ||
; CHECK-NEXT: .LBB0_5: # %entry | ||
; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha | ||
; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l | ||
; CHECK-NEXT: lxv 34, 0(3) | ||
; CHECK-NEXT: xscmpuqp 0, 4, 2 | ||
; CHECK-NEXT: beq 0, .LBB0_7 | ||
; CHECK-NEXT: # %bb.6: # %entry | ||
; CHECK-NEXT: vmr 3, 4 | ||
; CHECK-NEXT: .LBB0_7: # %entry | ||
; CHECK-NEXT: vmr 2, 3 | ||
; CHECK-NEXT: blr | ||
; CHECK-NEXT: .LBB0_8: # %entry | ||
; CHECK-NEXT: vmr 4, 3 | ||
; CHECK-NEXT: bnu 0, .LBB0_2 | ||
; CHECK-NEXT: .LBB0_9: | ||
; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha | ||
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l | ||
; CHECK-NEXT: lxv 36, 0(3) | ||
; CHECK-NEXT: xststdcqp 0, 2, 4 | ||
; CHECK-NEXT: bc 12, 2, .LBB0_3 | ||
; CHECK-NEXT: .LBB0_10: # %entry | ||
; CHECK-NEXT: vmr 2, 4 | ||
; CHECK-NEXT: xststdcqp 0, 3, 4 | ||
; CHECK-NEXT: bc 4, 2, .LBB0_4 | ||
; CHECK-NEXT: b .LBB0_5 | ||
entry: | ||
%m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b) | ||
ret fp128 %m | ||
} | ||
|
||
define fp128 @f128_maximum(fp128 %a, fp128 %b) { | ||
; CHECK-LABEL: f128_maximum: | ||
; CHECK: # %bb.0: # %entry | ||
; CHECK-NEXT: xscmpuqp 0, 2, 3 | ||
; CHECK-NEXT: vmr 4, 2 | ||
; CHECK-NEXT: ble 0, .LBB1_8 | ||
; CHECK-NEXT: # %bb.1: # %entry | ||
; CHECK-NEXT: bun 0, .LBB1_9 | ||
; CHECK-NEXT: .LBB1_2: # %entry | ||
; CHECK-NEXT: xststdcqp 0, 2, 8 | ||
; CHECK-NEXT: bc 4, 2, .LBB1_10 | ||
; CHECK-NEXT: .LBB1_3: # %entry | ||
; CHECK-NEXT: xststdcqp 0, 3, 8 | ||
; CHECK-NEXT: bc 12, 2, .LBB1_5 | ||
; CHECK-NEXT: .LBB1_4: # %entry | ||
; CHECK-NEXT: vmr 3, 2 | ||
; CHECK-NEXT: .LBB1_5: # %entry | ||
; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha | ||
; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l | ||
; CHECK-NEXT: lxv 34, 0(3) | ||
; CHECK-NEXT: xscmpuqp 0, 4, 2 | ||
; CHECK-NEXT: beq 0, .LBB1_7 | ||
; CHECK-NEXT: # %bb.6: # %entry | ||
; CHECK-NEXT: vmr 3, 4 | ||
; CHECK-NEXT: .LBB1_7: # %entry | ||
; CHECK-NEXT: vmr 2, 3 | ||
; CHECK-NEXT: blr | ||
; CHECK-NEXT: .LBB1_8: # %entry | ||
; CHECK-NEXT: vmr 4, 3 | ||
; CHECK-NEXT: bnu 0, .LBB1_2 | ||
; CHECK-NEXT: .LBB1_9: | ||
; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha | ||
; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l | ||
; CHECK-NEXT: lxv 36, 0(3) | ||
; CHECK-NEXT: xststdcqp 0, 2, 8 | ||
; CHECK-NEXT: bc 12, 2, .LBB1_3 | ||
; CHECK-NEXT: .LBB1_10: # %entry | ||
; CHECK-NEXT: vmr 2, 4 | ||
; CHECK-NEXT: xststdcqp 0, 3, 8 | ||
; CHECK-NEXT: bc 4, 2, .LBB1_4 | ||
; CHECK-NEXT: b .LBB1_5 | ||
entry: | ||
%m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b) | ||
ret fp128 %m | ||
} | ||
|
||
declare fp128 @llvm.minimum.f128(fp128, fp128) | ||
declare fp128 @llvm.maximum.f128(fp128, fp128) |
Uh oh!
There was an error while loading. Please reload this page.