-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAG] Add generic expansion for ISD::FCANONICALIZE nodes #142105
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
da0f89e
c1ee0e1
07f5741
cd13388
510da45
badd5c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3356,6 +3356,28 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { | |
Results.push_back(Op); | ||
break; | ||
} | ||
case ISD::FCANONICALIZE: { | ||
// This implements llvm.canonicalize.f* by multiplication with 1.0, as | ||
// suggested in | ||
// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic. | ||
// It uses strict_fp operations even outside a strict_fp context in order | ||
// to guarantee that the canonicalization is not optimized away by later | ||
// passes. | ||
|
||
// Get operand x. | ||
SDValue Operand = Node->getOperand(0); | ||
// Get fp value type used. | ||
EVT VT = Operand.getValueType(); | ||
// Produce appropriately-typed 1.0 constant. | ||
SDValue One = DAG.getConstantFP(1.0, dl, VT); | ||
// Produce multiplication node x * 1.0. | ||
SDValue Chain = DAG.getEntryNode(); | ||
SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, | ||
{Chain, Operand, One}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Losing the fast math flags. You can propagate the existing flags, and additionally add NoFPExcept |
||
|
||
Results.push_back(Mul); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ignoring the new result chain feels wrong but it's probably correct in this context |
||
break; | ||
} | ||
case ISD::SIGN_EXTEND_INREG: { | ||
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); | ||
EVT VT = Node->getValueType(0); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -870,6 +870,9 @@ void TargetLoweringBase::initActions() { | |
ISD::FATAN2}, | ||
{MVT::f32, MVT::f64, MVT::f128}, Expand); | ||
|
||
// Insert custom handling default for llvm.canonicalize.*. | ||
setOperationAction(ISD::FCANONICALIZE, {MVT::f32, MVT::f64}, Expand); | ||
|
||
Comment on lines
+873
to
+875
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we'r going to expand by default, it should do it for all types, not just f32/f64 |
||
// FIXME: Query RuntimeLibCalls to make the decision. | ||
setOperationAction({ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND}, | ||
{MVT::f32, MVT::f64, MVT::f128}, LibCall); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march s390x-unknown-linux-gnu --version 5 | ||
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z16 < %s | FileCheck %s -check-prefixes=Z16 | ||
|
||
define float @canonicalize_fp32(float %a) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No half tests? Also are we missing tests in the other touched targets? |
||
; Z16-LABEL: canonicalize_fp32: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vgmf %v1, 2, 8 | ||
; Z16-NEXT: meebr %f0, %f1 | ||
; Z16-NEXT: br %r14 | ||
%canonicalized = call float @llvm.canonicalize.f32(float %a) | ||
ret float %canonicalized | ||
} | ||
|
||
define double @canonicalize_fp64(double %a) { | ||
; Z16-LABEL: canonicalize_fp64: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vgmg %v1, 2, 11 | ||
; Z16-NEXT: mdbr %f0, %f1 | ||
; Z16-NEXT: br %r14 | ||
%canonicalized = call double @llvm.canonicalize.f64(double %a) | ||
ret double %canonicalized | ||
} | ||
|
||
define void @canonicalize_ptr_f32(float * %out) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test should be updated to use opaque pointers |
||
; Z16-LABEL: canonicalize_ptr_f32: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vgmf %v0, 2, 8 | ||
; Z16-NEXT: meeb %f0, 0(%r2) | ||
; Z16-NEXT: ste %f0, 0(%r2) | ||
; Z16-NEXT: br %r14 | ||
%val = load float, float * %out | ||
%canonicalized = call float @llvm.canonicalize.f32(float %val) | ||
store float %canonicalized, float * %out | ||
ret void | ||
} | ||
|
||
define void @canonicalize_ptr_f64(double * %out) { | ||
; Z16-LABEL: canonicalize_ptr_f64: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vgmg %v0, 2, 11 | ||
; Z16-NEXT: mdb %f0, 0(%r2) | ||
; Z16-NEXT: std %f0, 0(%r2) | ||
; Z16-NEXT: br %r14 | ||
%val = load double, double * %out | ||
%canonicalized = call double @llvm.canonicalize.f64(double %val) | ||
store double %canonicalized, double * %out | ||
ret void | ||
} | ||
|
||
define <4 x float> @canonicalize_v4f32(<4 x float> %a) { | ||
; Z16-LABEL: canonicalize_v4f32: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vrepf %v0, %v24, 3 | ||
; Z16-NEXT: vgmf %v1, 2, 8 | ||
; Z16-NEXT: vrepf %v2, %v24, 2 | ||
; Z16-NEXT: meebr %f0, %f1 | ||
; Z16-NEXT: meebr %f2, %f1 | ||
; Z16-NEXT: vrepf %v3, %v24, 1 | ||
; Z16-NEXT: vmrhf %v0, %v2, %v0 | ||
; Z16-NEXT: wfmsb %f2, %v24, %f1 | ||
; Z16-NEXT: wfmsb %f1, %f3, %f1 | ||
; Z16-NEXT: vmrhf %v1, %v2, %v1 | ||
; Z16-NEXT: vmrhg %v24, %v1, %v0 | ||
; Z16-NEXT: br %r14 | ||
%canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a) | ||
ret <4 x float> %canonicalized | ||
} | ||
|
||
define <4 x double> @canonicalize_v4f64(<4 x double> %a) { | ||
; Z16-LABEL: canonicalize_v4f64: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vgmg %v0, 2, 11 | ||
; Z16-NEXT: vrepg %v2, %v24, 1 | ||
; Z16-NEXT: wfmdb %f1, %v24, %f0 | ||
; Z16-NEXT: mdbr %f2, %f0 | ||
; Z16-NEXT: vmrhg %v24, %v1, %v2 | ||
; Z16-NEXT: vrepg %v2, %v26, 1 | ||
; Z16-NEXT: wfmdb %f1, %v26, %f0 | ||
; Z16-NEXT: wfmdb %f0, %f2, %f0 | ||
; Z16-NEXT: vmrhg %v26, %v1, %v0 | ||
; Z16-NEXT: br %r14 | ||
%canonicalized = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %a) | ||
ret <4 x double> %canonicalized | ||
} | ||
|
||
define void @canonicalize_ptr_v4f32(<4 x float> * %out) { | ||
; Z16-LABEL: canonicalize_ptr_v4f32: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vl %v0, 0(%r2), 3 | ||
; Z16-NEXT: vrepf %v1, %v0, 3 | ||
; Z16-NEXT: vgmf %v2, 2, 8 | ||
; Z16-NEXT: vrepf %v3, %v0, 2 | ||
; Z16-NEXT: meebr %f1, %f2 | ||
; Z16-NEXT: meebr %f3, %f2 | ||
; Z16-NEXT: vmrhf %v1, %v3, %v1 | ||
; Z16-NEXT: wfmsb %f3, %f0, %f2 | ||
; Z16-NEXT: vrepf %v0, %v0, 1 | ||
; Z16-NEXT: meebr %f0, %f2 | ||
; Z16-NEXT: vmrhf %v0, %v3, %v0 | ||
; Z16-NEXT: vmrhg %v0, %v0, %v1 | ||
; Z16-NEXT: vst %v0, 0(%r2), 3 | ||
; Z16-NEXT: br %r14 | ||
%val = load <4 x float>, <4 x float> * %out | ||
%canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %val) | ||
store <4 x float> %canonicalized, <4 x float> * %out | ||
ret void | ||
} | ||
|
||
define void @canonicalize_ptr_v4f64(<4 x double> * %out) { | ||
; Z16-LABEL: canonicalize_ptr_v4f64: | ||
; Z16: # %bb.0: | ||
; Z16-NEXT: vl %v1, 16(%r2), 4 | ||
; Z16-NEXT: vgmg %v2, 2, 11 | ||
; Z16-NEXT: wfmdb %f3, %f1, %f2 | ||
; Z16-NEXT: vrepg %v1, %v1, 1 | ||
; Z16-NEXT: mdbr %f1, %f2 | ||
; Z16-NEXT: vl %v0, 0(%r2), 4 | ||
; Z16-NEXT: vmrhg %v1, %v3, %v1 | ||
; Z16-NEXT: wfmdb %f3, %f0, %f2 | ||
; Z16-NEXT: vrepg %v0, %v0, 1 | ||
; Z16-NEXT: mdbr %f0, %f2 | ||
; Z16-NEXT: vmrhg %v0, %v3, %v0 | ||
; Z16-NEXT: vst %v0, 0(%r2), 4 | ||
; Z16-NEXT: vst %v1, 16(%r2), 4 | ||
; Z16-NEXT: br %r14 | ||
%val = load <4 x double>, <4 x double> * %out | ||
%canonicalized = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %val) | ||
store <4 x double> %canonicalized, <4 x double> * %out | ||
ret void | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these individual comments look superfluous?