Fix vector split

wzssyqa · wzssyqa · commit 5f1902814cb5 · 2024-09-21T15:08:36.000+08:00
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1353,8 +1353,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     // And the same for FMAXNUM_IEEE and FMINNUM_IEEE.
     for (auto Op :
          {ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
-          ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR, ISD::FMAXNUM_IEEE,
-          ISD::FMINNUM_IEEE, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL,
+          ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
+          ISD::STRICT_FFLOOR, ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL,
           ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUND,
           ISD::STRICT_FROUNDEVEN}) {
       for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
@@ -1364,6 +1364,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
           setOperationAction(Op, Ty, Legal);
     }
 
+    // In fact TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM works well with
+    // them. While in narrowInsertExtractVectorBinOp, they are expected to be
+    // LegalOrCustom.
+    for (auto Op : {ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}) {
+      for (MVT Ty : {MVT::v4f32, MVT::v2f64})
+        setOperationAction(Op, Ty, Custom);
+      if (Subtarget->hasFullFP16())
+        setOperationAction(Op, MVT::v8f16, Custom);
+    }
+
     // LRINT and LLRINT.
     for (auto Op : {ISD::LRINT, ISD::LLRINT}) {
       for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
@@ -7208,6 +7218,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
   case ISD::FMAXNUM:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
+  case ISD::FMAXIMUMNUM:
+  case ISD::FMINIMUMNUM:
+    return LowerFMINIMUMNUM_FMAXIMUMNUM(Op, DAG);
   case ISD::FMINIMUM:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
   case ISD::FMINNUM:
@@ -10236,6 +10249,28 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
   return BitCast(VT, BSP, DAG);
 }
 
+SDValue
+AArch64TargetLowering::LowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  unsigned Opc = Op.getOpcode();
+  SDLoc DL(Op);
+  EVT VT = Op->getValueType(0);
+  unsigned NewOp =
+      Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+  SDNodeFlags Flags = Op->getFlags();
+
+  if (!Flags.hasNoNaNs()) {
+    if (!DAG.isKnownNeverSNaN(LHS)) {
+      LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
+    }
+    if (!DAG.isKnownNeverSNaN(RHS)) {
+      RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
+    }
+  }
+  return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
+}
 SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
                                                  SelectionDAG &DAG) const {
   if (DAG.getMachineFunction().getFunction().hasFnAttribute(
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1203,6 +1203,7 @@ class AArch64TargetLowering : public TargetLowering {
   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc --mtriple=aarch64 --mattr=+fullfp16 < %s | FileCheck %s --check-prefix=AARCH64
 
-define <2 x double> @max_v2f64(<2 x double> %a, <2 x double> %b) {
-; AARCH64-LABEL: max_v2f64:
+define <2 x double> @max_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; AARCH64-LABEL: max_nnan_v2f64:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fmaxnm v0.2d, v0.2d, v1.2d
 ; AARCH64-NEXT:    ret
@@ -11,29 +11,62 @@ entry:
   ret <2 x double> %c
 }
 
-define <4 x float> @max_v4f32(<4 x float> %a, <4 x float> %b) {
-; AARCH64-LABEL: max_v4f32:
+define <4 x float> @max_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; AARCH64-LABEL: max_nnan_v4f32:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
 ; AARCH64-NEXT:    ret
 entry:
-  %c = call nnan <4 x float> @llvm.maximumnum.v2f64(<4 x float> %a, <4 x float> %b)
+  %c = call nnan <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b)
   ret <4 x float> %c
 }
 
-
-define <8 x half> @max_v8f16(<8 x half> %a, <8 x half> %b) {
-; AARCH64-LABEL: max_v8f16:
+define <8 x half> @max_nnan_v8f16(<8 x half> %a, <8 x half> %b) {
+; AARCH64-LABEL: max_nnan_v8f16:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fmaxnm v0.8h, v0.8h, v1.8h
 ; AARCH64-NEXT:    ret
 entry:
-  %c = call nnan <8 x half> @llvm.maximumnum.v4f16(<8 x half> %a, <8 x half> %b)
+  %c = call nnan <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %c
 }
 
-define double @max_f64(double %a, double %b) {
-; AARCH64-LABEL: max_f64:
+define <4 x double> @max_nnan_v4f64(<4 x double> %a, <4 x double> %b) {
+; AARCH64-LABEL: max_nnan_v4f64:
+; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    fmaxnm v1.2d, v1.2d, v3.2d
+; AARCH64-NEXT:    fmaxnm v0.2d, v0.2d, v2.2d
+; AARCH64-NEXT:    ret
+entry:
+  %c = call nnan <4 x double> @llvm.maximumnum.v4f64(<4 x double> %a, <4 x double> %b)
+  ret <4 x double> %c
+}
+
+define <8 x float> @max_nnan_v8f32(<8 x float> %a, <8 x float> %b) {
+; AARCH64-LABEL: max_nnan_v8f32:
+; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    fmaxnm v1.4s, v1.4s, v3.4s
+; AARCH64-NEXT:    fmaxnm v0.4s, v0.4s, v2.4s
+; AARCH64-NEXT:    ret
+entry:
+  %c = call nnan <8 x float> @llvm.maximumnum.v8f32(<8 x float> %a, <8 x float> %b)
+  ret <8 x float> %c
+}
+
+define <16 x half> @max_nnan_v16f16(<16 x half> %a, <16 x half> %b) {
+; AARCH64-LABEL: max_nnan_v16f16:
+; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    fmaxnm v1.8h, v1.8h, v3.8h
+; AARCH64-NEXT:    fmaxnm v0.8h, v0.8h, v2.8h
+; AARCH64-NEXT:    ret
+entry:
+  %c = call nnan <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
+  ret <16 x half> %c
+}
+
+
+define double @max_nnan_f64(double %a, double %b) {
+; AARCH64-LABEL: max_nnan_f64:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fmaxnm d0, d0, d1
 ; AARCH64-NEXT:    ret
@@ -42,8 +75,8 @@ entry:
   ret double %c
 }
 
-define float @max_f32(float %a, float %b) {
-; AARCH64-LABEL: max_f32:
+define float @max_nnan_f32(float %a, float %b) {
+; AARCH64-LABEL: max_nnan_f32:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fmaxnm s0, s0, s1
 ; AARCH64-NEXT:    ret
@@ -52,8 +85,8 @@ entry:
   ret float %c
 }
 
-define half @max_f16(half %a, half %b) {
-; AARCH64-LABEL: max_f16:
+define half @max_nnan_f16(half %a, half %b) {
+; AARCH64-LABEL: max_nnan_f16:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fmaxnm h0, h0, h1
 ; AARCH64-NEXT:    ret
@@ -62,8 +95,8 @@ entry:
   ret half %c
 }
 
-define <2 x double> @min_v2f64(<2 x double> %a, <2 x double> %b) {
-; AARCH64-LABEL: min_v2f64:
+define <2 x double> @min_nnan_v2f64(<2 x double> %a, <2 x double> %b) {
+; AARCH64-LABEL: min_nnan_v2f64:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fminnm v0.2d, v0.2d, v1.2d
 ; AARCH64-NEXT:    ret
@@ -72,29 +105,61 @@ entry:
   ret <2 x double> %c
 }
 
-define <4 x float> @min_v4f32(<4 x float> %a, <4 x float> %b) {
-; AARCH64-LABEL: min_v4f32:
+define <4 x float> @min_nnan_v4f32(<4 x float> %a, <4 x float> %b) {
+; AARCH64-LABEL: min_nnan_v4f32:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v1.4s
 ; AARCH64-NEXT:    ret
 entry:
-  %c = call nnan <4 x float> @llvm.minimumnum.v2f64(<4 x float> %a, <4 x float> %b)
+  %c = call nnan <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b)
   ret <4 x float> %c
 }
 
-
-define <8 x half> @min_v8f16(<8 x half> %a, <8 x half> %b) {
-; AARCH64-LABEL: min_v8f16:
+define <8 x half> @min_nnan_v8f16(<8 x half> %a, <8 x half> %b) {
+; AARCH64-LABEL: min_nnan_v8f16:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fminnm v0.8h, v0.8h, v1.8h
 ; AARCH64-NEXT:    ret
 entry:
-  %c = call nnan <8 x half> @llvm.minimumnum.v4f16(<8 x half> %a, <8 x half> %b)
+  %c = call nnan <8 x half> @llvm.minimumnum.v8f16(<8 x half> %a, <8 x half> %b)
   ret <8 x half> %c
 }
 
-define double @min_f64(double %a, double %b) {
-; AARCH64-LABEL: min_f64:
+define <4 x double> @min_nnan_v4f64(<4 x double> %a, <4 x double> %b) {
+; AARCH64-LABEL: min_nnan_v4f64:
+; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    fminnm v1.2d, v1.2d, v3.2d
+; AARCH64-NEXT:    fminnm v0.2d, v0.2d, v2.2d
+; AARCH64-NEXT:    ret
+entry:
+  %c = call nnan <4 x double> @llvm.minimumnum.v4f64(<4 x double> %a, <4 x double> %b)
+  ret <4 x double> %c
+}
+
+define <8 x float> @min_nnan_v8f32(<8 x float> %a, <8 x float> %b) {
+; AARCH64-LABEL: min_nnan_v8f32:
+; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    fminnm v1.4s, v1.4s, v3.4s
+; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v2.4s
+; AARCH64-NEXT:    ret
+entry:
+  %c = call nnan <8 x float> @llvm.minimumnum.v8f32(<8 x float> %a, <8 x float> %b)
+  ret <8 x float> %c
+}
+
+define <16 x half> @min_nnan_v16f16(<16 x half> %a, <16 x half> %b) {
+; AARCH64-LABEL: min_nnan_v16f16:
+; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    fminnm v1.8h, v1.8h, v3.8h
+; AARCH64-NEXT:    fminnm v0.8h, v0.8h, v2.8h
+; AARCH64-NEXT:    ret
+entry:
+  %c = call nnan <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b)
+  ret <16 x half> %c
+}
+
+define double @min_nnan_f64(double %a, double %b) {
+; AARCH64-LABEL: min_nnan_f64:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fminnm d0, d0, d1
 ; AARCH64-NEXT:    ret
@@ -103,8 +168,8 @@ entry:
   ret double %c
 }
 
-define float @min_f32(float %a, float %b) {
-; AARCH64-LABEL: min_f32:
+define float @min_nnan_f32(float %a, float %b) {
+; AARCH64-LABEL: min_nnan_f32:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fminnm s0, s0, s1
 ; AARCH64-NEXT:    ret
@@ -113,8 +178,8 @@ entry:
   ret float %c
 }
 
-define half @min_f16(half %a, half %b) {
-; AARCH64-LABEL: min_f16:
+define half @min_nnan_f16(half %a, half %b) {
+; AARCH64-LABEL: min_nnan_f16:
 ; AARCH64:       // %bb.0: // %entry
 ; AARCH64-NEXT:    fminnm h0, h0, h1
 ; AARCH64-NEXT:    ret