Skip to content

Commit 5f19028

Browse files
committed
Fix vector split
1 parent b8f89e2 commit 5f19028

File tree

3 files changed

+133
-32
lines changed

3 files changed

+133
-32
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,8 +1353,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13531353
// And the same for FMAXNUM_IEEE and FMINNUM_IEEE.
13541354
for (auto Op :
13551355
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
1356-
ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR, ISD::FMAXNUM_IEEE,
1357-
ISD::FMINNUM_IEEE, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL,
1356+
ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
1357+
ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL,
13581358
ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUND,
13591359
ISD::STRICT_FROUNDEVEN}) {
13601360
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
@@ -1364,6 +1364,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13641364
setOperationAction(Op, Ty, Legal);
13651365
}
13661366

1367+
// In fact TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM works well with
1368+
// them. While in narrowInsertExtractVectorBinOp, they are expected to be
1369+
// LegalOrCustom.
1370+
for (auto Op : {ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}) {
1371+
for (MVT Ty : {MVT::v4f32, MVT::v2f64})
1372+
setOperationAction(Op, Ty, Custom);
1373+
if (Subtarget->hasFullFP16())
1374+
setOperationAction(Op, MVT::v8f16, Custom);
1375+
}
1376+
13671377
// LRINT and LLRINT.
13681378
for (auto Op : {ISD::LRINT, ISD::LLRINT}) {
13691379
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
@@ -7208,6 +7218,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
72087218
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
72097219
case ISD::FMAXNUM:
72107220
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
7221+
case ISD::FMAXIMUMNUM:
7222+
case ISD::FMINIMUMNUM:
7223+
return LowerFMINIMUMNUM_FMAXIMUMNUM(Op, DAG);
72117224
case ISD::FMINIMUM:
72127225
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
72137226
case ISD::FMINNUM:
@@ -10236,6 +10249,28 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
1023610249
return BitCast(VT, BSP, DAG);
1023710250
}
1023810251

10252+
SDValue
10253+
AArch64TargetLowering::LowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op,
10254+
SelectionDAG &DAG) const {
10255+
SDValue LHS = Op.getOperand(0);
10256+
SDValue RHS = Op.getOperand(1);
10257+
unsigned Opc = Op.getOpcode();
10258+
SDLoc DL(Op);
10259+
EVT VT = Op->getValueType(0);
10260+
unsigned NewOp =
10261+
Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
10262+
SDNodeFlags Flags = Op->getFlags();
10263+
10264+
if (!Flags.hasNoNaNs()) {
10265+
if (!DAG.isKnownNeverSNaN(LHS)) {
10266+
LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
10267+
}
10268+
if (!DAG.isKnownNeverSNaN(RHS)) {
10269+
RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
10270+
}
10271+
}
10272+
return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
10273+
}
1023910274
SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
1024010275
SelectionDAG &DAG) const {
1024110276
if (DAG.getMachineFunction().getFunction().hasFnAttribute(

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,7 @@ class AArch64TargetLowering : public TargetLowering {
12031203
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
12041204
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
12051205
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1206+
SDValue LowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
12061207
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
12071208
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
12081209
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll

Lines changed: 95 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc --mtriple=aarch64 --mattr=+fullfp16 < %s | FileCheck %s --check-prefix=AARCH64
33

4-
define <2 x double> @max_v2f64(<2 x double> %a, <2 x double> %b) {
5-
; AARCH64-LABEL: max_v2f64:
4+
define <2 x double> @max_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
5+
; AARCH64-LABEL: max_nnan_v2f64:
66
; AARCH64: // %bb.0: // %entry
77
; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
88
; AARCH64-NEXT: ret
@@ -11,29 +11,62 @@ entry:
1111
ret <2 x double> %c
1212
}
1313

14-
define <4 x float> @max_v4f32(<4 x float> %a, <4 x float> %b) {
15-
; AARCH64-LABEL: max_v4f32:
14+
define <4 x float> @max_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
15+
; AARCH64-LABEL: max_nnan_v4f32:
1616
; AARCH64: // %bb.0: // %entry
1717
; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
1818
; AARCH64-NEXT: ret
1919
entry:
20-
%c = call nnan <4 x float> @llvm.maximumnum.v2f64(<4 x float> %a, <4 x float> %b)
20+
%c = call nnan <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b)
2121
ret <4 x float> %c
2222
}
2323

24-
25-
define <8 x half> @max_v8f16(<8 x half> %a, <8 x half> %b) {
26-
; AARCH64-LABEL: max_v8f16:
24+
define <8 x half> @max_nnan_v8f16(<8 x half> %a, <8 x half> %b) {
25+
; AARCH64-LABEL: max_nnan_v8f16:
2726
; AARCH64: // %bb.0: // %entry
2827
; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
2928
; AARCH64-NEXT: ret
3029
entry:
31-
%c = call nnan <8 x half> @llvm.maximumnum.v4f16(<8 x half> %a, <8 x half> %b)
30+
%c = call nnan <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
3231
ret <8 x half> %c
3332
}
3433

35-
define double @max_f64(double %a, double %b) {
36-
; AARCH64-LABEL: max_f64:
34+
define <4 x double> @max_nnan_v4f64(<4 x double> %a, <4 x double> %b) {
35+
; AARCH64-LABEL: max_nnan_v4f64:
36+
; AARCH64: // %bb.0: // %entry
37+
; AARCH64-NEXT: fmaxnm v1.2d, v1.2d, v3.2d
38+
; AARCH64-NEXT: fmaxnm v0.2d, v0.2d, v2.2d
39+
; AARCH64-NEXT: ret
40+
entry:
41+
%c = call nnan <4 x double> @llvm.maximumnum.v4f64(<4 x double> %a, <4 x double> %b)
42+
ret <4 x double> %c
43+
}
44+
45+
define <8 x float> @max_nnan_v8f32(<8 x float> %a, <8 x float> %b) {
46+
; AARCH64-LABEL: max_nnan_v8f32:
47+
; AARCH64: // %bb.0: // %entry
48+
; AARCH64-NEXT: fmaxnm v1.4s, v1.4s, v3.4s
49+
; AARCH64-NEXT: fmaxnm v0.4s, v0.4s, v2.4s
50+
; AARCH64-NEXT: ret
51+
entry:
52+
%c = call nnan <8 x float> @llvm.maximumnum.v8f32(<8 x float> %a, <8 x float> %b)
53+
ret <8 x float> %c
54+
}
55+
56+
define <16 x half> @max_nnan_v16f16(<16 x half> %a, <16 x half> %b) {
57+
; AARCH64-LABEL: max_nnan_v16f16:
58+
; AARCH64: // %bb.0: // %entry
59+
; AARCH64-NEXT: fmaxnm v1.8h, v1.8h, v3.8h
60+
; AARCH64-NEXT: fmaxnm v0.8h, v0.8h, v2.8h
61+
; AARCH64-NEXT: ret
62+
entry:
63+
%c = call nnan <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
64+
ret <16 x half> %c
65+
}
66+
67+
68+
define double @max_nnan_f64(double %a, double %b) {
69+
; AARCH64-LABEL: max_nnan_f64:
3770
; AARCH64: // %bb.0: // %entry
3871
; AARCH64-NEXT: fmaxnm d0, d0, d1
3972
; AARCH64-NEXT: ret
@@ -42,8 +75,8 @@ entry:
4275
ret double %c
4376
}
4477

45-
define float @max_f32(float %a, float %b) {
46-
; AARCH64-LABEL: max_f32:
78+
define float @max_nnan_f32(float %a, float %b) {
79+
; AARCH64-LABEL: max_nnan_f32:
4780
; AARCH64: // %bb.0: // %entry
4881
; AARCH64-NEXT: fmaxnm s0, s0, s1
4982
; AARCH64-NEXT: ret
@@ -52,8 +85,8 @@ entry:
5285
ret float %c
5386
}
5487

55-
define half @max_f16(half %a, half %b) {
56-
; AARCH64-LABEL: max_f16:
88+
define half @max_nnan_f16(half %a, half %b) {
89+
; AARCH64-LABEL: max_nnan_f16:
5790
; AARCH64: // %bb.0: // %entry
5891
; AARCH64-NEXT: fmaxnm h0, h0, h1
5992
; AARCH64-NEXT: ret
@@ -62,8 +95,8 @@ entry:
6295
ret half %c
6396
}
6497

65-
define <2 x double> @min_v2f64(<2 x double> %a, <2 x double> %b) {
66-
; AARCH64-LABEL: min_v2f64:
98+
define <2 x double> @min_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
99+
; AARCH64-LABEL: min_nnan_v2f64:
67100
; AARCH64: // %bb.0: // %entry
68101
; AARCH64-NEXT: fminnm v0.2d, v0.2d, v1.2d
69102
; AARCH64-NEXT: ret
@@ -72,29 +105,61 @@ entry:
72105
ret <2 x double> %c
73106
}
74107

75-
define <4 x float> @min_v4f32(<4 x float> %a, <4 x float> %b) {
76-
; AARCH64-LABEL: min_v4f32:
108+
define <4 x float> @min_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
109+
; AARCH64-LABEL: min_nnan_v4f32:
77110
; AARCH64: // %bb.0: // %entry
78111
; AARCH64-NEXT: fminnm v0.4s, v0.4s, v1.4s
79112
; AARCH64-NEXT: ret
80113
entry:
81-
%c = call nnan <4 x float> @llvm.minimumnum.v2f64(<4 x float> %a, <4 x float> %b)
114+
%c = call nnan <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b)
82115
ret <4 x float> %c
83116
}
84117

85-
86-
define <8 x half> @min_v8f16(<8 x half> %a, <8 x half> %b) {
87-
; AARCH64-LABEL: min_v8f16:
118+
define <8 x half> @min_nnan_v8f16(<8 x half> %a, <8 x half> %b) {
119+
; AARCH64-LABEL: min_nnan_v8f16:
88120
; AARCH64: // %bb.0: // %entry
89121
; AARCH64-NEXT: fminnm v0.8h, v0.8h, v1.8h
90122
; AARCH64-NEXT: ret
91123
entry:
92-
%c = call nnan <8 x half> @llvm.minimumnum.v4f16(<8 x half> %a, <8 x half> %b)
124+
%c = call nnan <8 x half> @llvm.minimumnum.v8f16(<8 x half> %a, <8 x half> %b)
93125
ret <8 x half> %c
94126
}
95127

96-
define double @min_f64(double %a, double %b) {
97-
; AARCH64-LABEL: min_f64:
128+
define <4 x double> @min_nnan_v4f64(<4 x double> %a, <4 x double> %b) {
129+
; AARCH64-LABEL: min_nnan_v4f64:
130+
; AARCH64: // %bb.0: // %entry
131+
; AARCH64-NEXT: fminnm v1.2d, v1.2d, v3.2d
132+
; AARCH64-NEXT: fminnm v0.2d, v0.2d, v2.2d
133+
; AARCH64-NEXT: ret
134+
entry:
135+
%c = call nnan <4 x double> @llvm.minimumnum.v4f64(<4 x double> %a, <4 x double> %b)
136+
ret <4 x double> %c
137+
}
138+
139+
define <8 x float> @min_nnan_v8f32(<8 x float> %a, <8 x float> %b) {
140+
; AARCH64-LABEL: min_nnan_v8f32:
141+
; AARCH64: // %bb.0: // %entry
142+
; AARCH64-NEXT: fminnm v1.4s, v1.4s, v3.4s
143+
; AARCH64-NEXT: fminnm v0.4s, v0.4s, v2.4s
144+
; AARCH64-NEXT: ret
145+
entry:
146+
%c = call nnan <8 x float> @llvm.minimumnum.v8f32(<8 x float> %a, <8 x float> %b)
147+
ret <8 x float> %c
148+
}
149+
150+
define <16 x half> @min_nnan_v16f16(<16 x half> %a, <16 x half> %b) {
151+
; AARCH64-LABEL: min_nnan_v16f16:
152+
; AARCH64: // %bb.0: // %entry
153+
; AARCH64-NEXT: fminnm v1.8h, v1.8h, v3.8h
154+
; AARCH64-NEXT: fminnm v0.8h, v0.8h, v2.8h
155+
; AARCH64-NEXT: ret
156+
entry:
157+
%c = call nnan <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b)
158+
ret <16 x half> %c
159+
}
160+
161+
define double @min_nnan_f64(double %a, double %b) {
162+
; AARCH64-LABEL: min_nnan_f64:
98163
; AARCH64: // %bb.0: // %entry
99164
; AARCH64-NEXT: fminnm d0, d0, d1
100165
; AARCH64-NEXT: ret
@@ -103,8 +168,8 @@ entry:
103168
ret double %c
104169
}
105170

106-
define float @min_f32(float %a, float %b) {
107-
; AARCH64-LABEL: min_f32:
171+
define float @min_nnan_f32(float %a, float %b) {
172+
; AARCH64-LABEL: min_nnan_f32:
108173
; AARCH64: // %bb.0: // %entry
109174
; AARCH64-NEXT: fminnm s0, s0, s1
110175
; AARCH64-NEXT: ret
@@ -113,8 +178,8 @@ entry:
113178
ret float %c
114179
}
115180

116-
define half @min_f16(half %a, half %b) {
117-
; AARCH64-LABEL: min_f16:
181+
define half @min_nnan_f16(half %a, half %b) {
182+
; AARCH64-LABEL: min_nnan_f16:
118183
; AARCH64: // %bb.0: // %entry
119184
; AARCH64-NEXT: fminnm h0, h0, h1
120185
; AARCH64-NEXT: ret

0 commit comments

Comments
 (0)