llvm
diff --git a/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 56 additions & 27 deletions b/‎llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Lines changed: 56 additions & 27 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64InstrInfo.td
Lines changed: 36 additions & 3 deletions b/‎llvm/lib/Target/AArch64/AArch64InstrInfo.td
Lines changed: 36 additions & 3 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/f16-instructions.ll
Lines changed: 12 additions & 3 deletions b/‎llvm/test/CodeGen/AArch64/f16-instructions.ll
Lines changed: 12 additions & 3 deletions
@@ -676,11 +676,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FPOW, MVT::f64, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
-  if (Subtarget->hasFullFP16())
+  if (Subtarget->hasFullFP16()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
-  else
+    setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Custom);
+  } else {
     setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
-  setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
+    setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
+  }
 
   for (auto Op : {ISD::FREM,        ISD::FPOW,         ISD::FPOWI,
                   ISD::FCOS,        ISD::FSIN,         ISD::FSINCOS,
@@ -699,23 +701,48 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   }
 
   auto LegalizeNarrowFP = [this](MVT ScalarVT) {
-    for (auto Op :
-         {ISD::SETCC,          ISD::SELECT_CC,
-          ISD::BR_CC,          ISD::FADD,           ISD::FSUB,
-          ISD::FMUL,           ISD::FDIV,           ISD::FMA,
-          ISD::FNEG,           ISD::FABS,           ISD::FCEIL,
-          ISD::FSQRT,          ISD::FFLOOR,         ISD::FNEARBYINT,
-          ISD::FRINT,          ISD::FROUND,         ISD::FROUNDEVEN,
-          ISD::FTRUNC,         ISD::FMINNUM,        ISD::FMAXNUM,
-          ISD::FMINIMUM,       ISD::FMAXIMUM,       ISD::STRICT_FADD,
-          ISD::STRICT_FSUB,    ISD::STRICT_FMUL,    ISD::STRICT_FDIV,
-          ISD::STRICT_FMA,     ISD::STRICT_FCEIL,   ISD::STRICT_FFLOOR,
-          ISD::STRICT_FSQRT,   ISD::STRICT_FRINT,   ISD::STRICT_FNEARBYINT,
-          ISD::STRICT_FROUND,  ISD::STRICT_FTRUNC,  ISD::STRICT_FROUNDEVEN,
-          ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
-          ISD::STRICT_FMAXIMUM})
+    for (auto Op : {ISD::SETCC,
+                    ISD::SELECT_CC,
+                    ISD::BR_CC,
+                    ISD::FADD,
+                    ISD::FSUB,
+                    ISD::FMUL,
+                    ISD::FDIV,
+                    ISD::FMA,
+                    ISD::FCEIL,
+                    ISD::FSQRT,
+                    ISD::FFLOOR,
+                    ISD::FNEARBYINT,
+                    ISD::FRINT,
+                    ISD::FROUND,
+                    ISD::FROUNDEVEN,
+                    ISD::FTRUNC,
+                    ISD::FMINNUM,
+                    ISD::FMAXNUM,
+                    ISD::FMINIMUM,
+                    ISD::FMAXIMUM,
+                    ISD::STRICT_FADD,
+                    ISD::STRICT_FSUB,
+                    ISD::STRICT_FMUL,
+                    ISD::STRICT_FDIV,
+                    ISD::STRICT_FMA,
+                    ISD::STRICT_FCEIL,
+                    ISD::STRICT_FFLOOR,
+                    ISD::STRICT_FSQRT,
+                    ISD::STRICT_FRINT,
+                    ISD::STRICT_FNEARBYINT,
+                    ISD::STRICT_FROUND,
+                    ISD::STRICT_FTRUNC,
+                    ISD::STRICT_FROUNDEVEN,
+                    ISD::STRICT_FMINNUM,
+                    ISD::STRICT_FMAXNUM,
+                    ISD::STRICT_FMINIMUM,
+                    ISD::STRICT_FMAXIMUM})
       setOperationAction(Op, ScalarVT, Promote);
 
+    for (auto Op : {ISD::FNEG, ISD::FABS})
+      setOperationAction(Op, ScalarVT, Legal);
+
     // Round-to-integer need custom lowering for fp16, as Promote doesn't work
     // because the result type is integer.
     for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
@@ -730,8 +757,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationPromotedToType(ISD::FMUL, V4Narrow, MVT::v4f32);
     setOperationPromotedToType(ISD::FDIV, V4Narrow, MVT::v4f32);
 
-    setOperationAction(ISD::FABS,        V4Narrow, Expand);
-    setOperationAction(ISD::FNEG,        V4Narrow, Expand);
+    setOperationAction(ISD::FABS, V4Narrow, Legal);
+    setOperationAction(ISD::FNEG, V4Narrow, Legal);
     setOperationAction(ISD::FROUND,      V4Narrow, Expand);
     setOperationAction(ISD::FROUNDEVEN,  V4Narrow, Expand);
     setOperationAction(ISD::FMA,         V4Narrow, Expand);
@@ -740,24 +767,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SELECT,      V4Narrow, Expand);
     setOperationAction(ISD::SELECT_CC,   V4Narrow, Expand);
     setOperationAction(ISD::FTRUNC,      V4Narrow, Expand);
-    setOperationAction(ISD::FCOPYSIGN,   V4Narrow, Expand);
+    setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
     setOperationAction(ISD::FFLOOR,      V4Narrow, Expand);
     setOperationAction(ISD::FCEIL,       V4Narrow, Expand);
     setOperationAction(ISD::FRINT,       V4Narrow, Expand);
     setOperationAction(ISD::FNEARBYINT,  V4Narrow, Expand);
     setOperationAction(ISD::FSQRT,       V4Narrow, Expand);
 
     auto V8Narrow = MVT::getVectorVT(ScalarVT, 8);
-    setOperationAction(ISD::FABS,        V8Narrow, Expand);
+    setOperationAction(ISD::FABS, V8Narrow, Legal);
     setOperationAction(ISD::FADD,        V8Narrow, Expand);
     setOperationAction(ISD::FCEIL,       V8Narrow, Expand);
-    setOperationAction(ISD::FCOPYSIGN,   V8Narrow, Expand);
+    setOperationAction(ISD::FCOPYSIGN, V8Narrow, Custom);
     setOperationAction(ISD::FDIV,        V8Narrow, Expand);
     setOperationAction(ISD::FFLOOR,      V8Narrow, Expand);
     setOperationAction(ISD::FMA,         V8Narrow, Expand);
     setOperationAction(ISD::FMUL,        V8Narrow, Expand);
     setOperationAction(ISD::FNEARBYINT,  V8Narrow, Expand);
-    setOperationAction(ISD::FNEG,        V8Narrow, Expand);
+    setOperationAction(ISD::FNEG, V8Narrow, Legal);
     setOperationAction(ISD::FROUND,      V8Narrow, Expand);
     setOperationAction(ISD::FROUNDEVEN,  V8Narrow, Expand);
     setOperationAction(ISD::FRINT,       V8Narrow, Expand);
@@ -1745,7 +1772,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
 
   // But we do support custom-lowering for FCOPYSIGN.
   if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
-      ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
+      ((VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v4f16 ||
+        VT == MVT::v8f16) &&
+       Subtarget->hasFullFP16()))
     setOperationAction(ISD::FCOPYSIGN, VT, Custom);
 
   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -9208,7 +9237,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
   } else if (VT == MVT::f32) {
     VecVT = MVT::v4i32;
     SetVecVal(AArch64::ssub);
-  } else if (VT == MVT::f16) {
+  } else if (VT == MVT::f16 || VT == MVT::bf16) {
     VecVT = MVT::v8i16;
     SetVecVal(AArch64::hsub);
   } else {
@@ -9230,7 +9259,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
 
   SDValue BSP =
       DAG.getNode(AArch64ISD::BSP, DL, VecVT, SignMaskV, VecVal1, VecVal2);
-  if (VT == MVT::f16)
+  if (VT == MVT::f16 || VT == MVT::bf16)
     return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, BSP);
   if (VT == MVT::f32)
     return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, BSP);
 
@@ -5028,9 +5028,6 @@ defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu
 defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
 def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
           (FCVTNv4i16 V128:$Rn)>;
-//def : Pat<(concat_vectors V64:$Rd,
-//                          (v4bf16 (any_fpround (v4f32 V128:$Rn)))),
-//          (FCVTNv8bf16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 def : Pat<(concat_vectors V64:$Rd,
                           (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
           (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
@@ -7813,6 +7810,42 @@ def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
                 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
 }
 
+def abs_f16 :
+  OutPatFrag<(ops node:$Rn),
+             (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
+	         (i32 (ANDWri
+		   (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+		                           node:$Rn, hsub), GPR32)),
+	           (i32 (logical_imm32_XFORM(i32 0x7fff))))),
+	         FPR32)), hsub)>;
+
+def : Pat<(f16 (fabs (f16 FPR16:$Rn))), (f16 (abs_f16 (f16 FPR16:$Rn)))>;
+def : Pat<(bf16 (fabs (bf16 FPR16:$Rn))), (bf16 (abs_f16 (bf16 FPR16:$Rn)))>;
+
+def neg_f16 :
+  OutPatFrag<(ops node:$Rn),
+             (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
+	         (i32 (EORWri
+		   (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+		                           node:$Rn, hsub), GPR32)),
+	           (i32 (logical_imm32_XFORM(i32 0x8000))))),
+	         FPR32)), hsub)>;
+
+def : Pat<(f16 (fneg (f16 FPR16:$Rn))), (f16 (neg_f16 (f16 FPR16:$Rn)))>;
+def : Pat<(bf16 (fneg (bf16 FPR16:$Rn))), (bf16 (neg_f16 (bf16 FPR16:$Rn)))>;
+
+let Predicates = [HasNEON] in {
+def : Pat<(v4f16 (fabs (v4f16 V64:$Rn))), (v4f16 (BICv4i16 (v4f16 V64:$Rn), (i32 128), (i32 8)))>;
+def : Pat<(v4bf16 (fabs (v4bf16 V64:$Rn))), (v4bf16 (BICv4i16 (v4bf16 V64:$Rn), (i32 128), (i32 8)))>;
+def : Pat<(v8f16 (fabs (v8f16 V128:$Rn))), (v8f16 (BICv8i16 (v8f16 V128:$Rn), (i32 128), (i32 8)))>;
+def : Pat<(v8bf16 (fabs (v8bf16 V128:$Rn))), (v8bf16 (BICv8i16 (v8bf16 V128:$Rn), (i32 128), (i32 8)))>;
+
+def : Pat<(v4f16 (fneg (v4f16 V64:$Rn))), (v4f16 (EORv8i8 (v4f16 V64:$Rn), (MOVIv4i16 (i32 128), (i32 8))))>;
+def : Pat<(v4bf16 (fneg (v4bf16 V64:$Rn))), (v4bf16 (EORv8i8 (v4bf16 V64:$Rn), (v4i16 (MOVIv4i16 (i32 0x80), (i32 8)))))>;
+def : Pat<(v8f16 (fneg (v8f16 V128:$Rn))), (v8f16 (EORv16i8 (v8f16 V128:$Rn), (MOVIv8i16 (i32 128), (i32 8))))>;
+def : Pat<(v8bf16 (fneg (v8bf16 V128:$Rn))), (v8bf16 (EORv16i8 (v8bf16 V128:$Rn), (v8i16 (MOVIv8i16 (i32 0x80), (i32 8)))))>;
+}
+
 // If an integer is about to be converted to a floating point value,
 // just load it on the floating point unit.
 // These patterns are more complex because floating point loads do not
 
@@ -1027,9 +1027,9 @@ define half @test_fma(half %a, half %b, half %c) #0 {
 }
 
 ; CHECK-CVT-LABEL: test_fabs:
-; CHECK-CVT-NEXT: fcvt s0, h0
-; CHECK-CVT-NEXT: fabs s0, s0
-; CHECK-CVT-NEXT: fcvt h0, s0
+; CHECK-CVT-NEXT: fmov w8, s0
+; CHECK-CVT-NEXT: and w8, w8, #0x7fff
+; CHECK-CVT-NEXT: fmov s0, w8
 ; CHECK-CVT-NEXT: ret
 
 ; CHECK-FP16-LABEL: test_fabs:
@@ -1338,3 +1338,12 @@ define half @test_fmuladd(half %a, half %b, half %c) #0 {
 }
 
 attributes #0 = { nounwind }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-COMMON: {{.*}}
+; CHECK-CVT: {{.*}}
+; CHECK-FP16: {{.*}}
+; FALLBACK: {{.*}}
+; FALLBACK-FP16: {{.*}}
+; GISEL: {{.*}}
+; GISEL-CVT: {{.*}}
+; GISEL-FP16: {{.*}}