Skip to content

[AArch64][GlobalISel] Lower fp16 abs and neg without fullfp16. #110096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 27, 2024

Conversation

davemgreen
Copy link
Collaborator

This changes the existing promote logic to lower, so that it can use normal integer operations. A minor change was needed to fneg lower code to handle vectors.

@llvmbot
Copy link
Member

llvmbot commented Sep 26, 2024

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

This changes the existing promote logic to lower, so that it can use normal integer operations. A minor change was needed to fneg lower code to handle vectors.


Full diff: https://github.com/llvm/llvm-project/pull/110096.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+2-6)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/f16-instructions.ll (+8-15)
  • (modified) llvm/test/CodeGen/AArch64/fabs.ll (+14-40)
  • (modified) llvm/test/CodeGen/AArch64/fneg.ll (+14-40)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c3b6b3033cf5c4..2fb2d104f1ce34 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4051,12 +4051,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     auto [Res, SubByReg] = MI.getFirst2Regs();
     LLT Ty = MRI.getType(Res);
 
-    // TODO: Handle vector types once we are able to
-    // represent them.
-    if (Ty.isVector())
-      return UnableToLegalize;
-    auto SignMask =
-        MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
+    auto SignMask = MIRBuilder.buildConstant(
+        Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
     MIRBuilder.buildXor(Res, SubByReg, SignMask);
     MI.eraseFromParent();
     return Legalized;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 6cb181011f8f67..51aeee023f2e34 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -268,11 +268,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       })
       .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
       .lowerIf(scalarOrEltWiderThan(0, 64))
-      .minScalarOrElt(0, MinFPScalar)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
-      .moreElementsToNextPow2(0);
+      .moreElementsToNextPow2(0)
+      .lowerFor({s16, v4s16, v8s16});
 
   getActionDefinitionsBuilder(G_FREM)
       .libcallFor({s32, s64})
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index d8a17b40587106..e058c83f274f14 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -1392,26 +1392,19 @@ define half @test_fma(half %a, half %b, half %c) #0 {
 }
 
 define half @test_fabs(half %a) #0 {
-; CHECK-CVT-SD-LABEL: test_fabs:
-; CHECK-CVT-SD:       // %bb.0:
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-SD-NEXT:    fmov w8, s0
-; CHECK-CVT-SD-NEXT:    and w8, w8, #0x7fff
-; CHECK-CVT-SD-NEXT:    fmov s0, w8
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
-; CHECK-CVT-SD-NEXT:    ret
+; CHECK-CVT-LABEL: test_fabs:
+; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-NEXT:    fmov w8, s0
+; CHECK-CVT-NEXT:    and w8, w8, #0x7fff
+; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_fabs:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fabs h0, h0
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-CVT-GI-LABEL: test_fabs:
-; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    fcvt s0, h0
-; CHECK-CVT-GI-NEXT:    fabs s0, s0
-; CHECK-CVT-GI-NEXT:    fcvt h0, s0
-; CHECK-CVT-GI-NEXT:    ret
   %r = call half @llvm.fabs.f16(half %a)
   ret half %r
 }
diff --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll
index e19e2ead11f4d0..43e90070736345 100644
--- a/llvm/test/CodeGen/AArch64/fabs.ll
+++ b/llvm/test/CodeGen/AArch64/fabs.ll
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fabs s0, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
+; CHECK-GI-NOFP16-NEXT:    and w8, w8, #0x7fff
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -160,22 +162,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[0], v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    fabs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    fabs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[5], v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[6], v2.h[2]
+; CHECK-GI-NOFP16-NEXT:    mvni v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -200,9 +188,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fabs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT:    mvni v1.4h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -227,12 +214,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fabs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v2.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NOFP16-NEXT:    mvni v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -259,18 +242,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fabs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v4.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v5.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v5.4s
+; CHECK-GI-NOFP16-NEXT:    mvni v2.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    and v1.16b, v1.16b, v2.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v16f16:
diff --git a/llvm/test/CodeGen/AArch64/fneg.ll b/llvm/test/CodeGen/AArch64/fneg.ll
index a0e9edff733e09..de2671afe60ab7 100644
--- a/llvm/test/CodeGen/AArch64/fneg.ll
+++ b/llvm/test/CodeGen/AArch64/fneg.ll
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fneg s0, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
+; CHECK-GI-NOFP16-NEXT:    eor w8, w8, #0xffff8000
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -161,22 +163,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[0], v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    fneg v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    fneg v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[5], v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[6], v2.h[2]
+; CHECK-GI-NOFP16-NEXT:    movi v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -202,9 +190,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fneg v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT:    movi v1.4h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -230,12 +217,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fneg v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v2.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NOFP16-NEXT:    movi v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -263,18 +246,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fneg v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v4.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v5.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v5.4s
+; CHECK-GI-NOFP16-NEXT:    movi v2.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.16b, v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v16f16:

@llvmbot
Copy link
Member

llvmbot commented Sep 26, 2024

@llvm/pr-subscribers-llvm-globalisel

Author: David Green (davemgreen)

Changes

This changes the existing promote logic to lower, so that it can use normal integer operations. A minor change was needed to fneg lower code to handle vectors.


Full diff: https://github.com/llvm/llvm-project/pull/110096.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+2-6)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/f16-instructions.ll (+8-15)
  • (modified) llvm/test/CodeGen/AArch64/fabs.ll (+14-40)
  • (modified) llvm/test/CodeGen/AArch64/fneg.ll (+14-40)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c3b6b3033cf5c4..2fb2d104f1ce34 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4051,12 +4051,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     auto [Res, SubByReg] = MI.getFirst2Regs();
     LLT Ty = MRI.getType(Res);
 
-    // TODO: Handle vector types once we are able to
-    // represent them.
-    if (Ty.isVector())
-      return UnableToLegalize;
-    auto SignMask =
-        MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
+    auto SignMask = MIRBuilder.buildConstant(
+        Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
     MIRBuilder.buildXor(Res, SubByReg, SignMask);
     MI.eraseFromParent();
     return Legalized;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 6cb181011f8f67..51aeee023f2e34 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -268,11 +268,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       })
       .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
       .lowerIf(scalarOrEltWiderThan(0, 64))
-      .minScalarOrElt(0, MinFPScalar)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
-      .moreElementsToNextPow2(0);
+      .moreElementsToNextPow2(0)
+      .lowerFor({s16, v4s16, v8s16});
 
   getActionDefinitionsBuilder(G_FREM)
       .libcallFor({s32, s64})
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index d8a17b40587106..e058c83f274f14 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -1392,26 +1392,19 @@ define half @test_fma(half %a, half %b, half %c) #0 {
 }
 
 define half @test_fabs(half %a) #0 {
-; CHECK-CVT-SD-LABEL: test_fabs:
-; CHECK-CVT-SD:       // %bb.0:
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
-; CHECK-CVT-SD-NEXT:    fmov w8, s0
-; CHECK-CVT-SD-NEXT:    and w8, w8, #0x7fff
-; CHECK-CVT-SD-NEXT:    fmov s0, w8
-; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
-; CHECK-CVT-SD-NEXT:    ret
+; CHECK-CVT-LABEL: test_fabs:
+; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-NEXT:    fmov w8, s0
+; CHECK-CVT-NEXT:    and w8, w8, #0x7fff
+; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
+; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_fabs:
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fabs h0, h0
 ; CHECK-FP16-NEXT:    ret
-;
-; CHECK-CVT-GI-LABEL: test_fabs:
-; CHECK-CVT-GI:       // %bb.0:
-; CHECK-CVT-GI-NEXT:    fcvt s0, h0
-; CHECK-CVT-GI-NEXT:    fabs s0, s0
-; CHECK-CVT-GI-NEXT:    fcvt h0, s0
-; CHECK-CVT-GI-NEXT:    ret
   %r = call half @llvm.fabs.f16(half %a)
   ret half %r
 }
diff --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll
index e19e2ead11f4d0..43e90070736345 100644
--- a/llvm/test/CodeGen/AArch64/fabs.ll
+++ b/llvm/test/CodeGen/AArch64/fabs.ll
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fabs s0, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
+; CHECK-GI-NOFP16-NEXT:    and w8, w8, #0x7fff
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -160,22 +162,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[0], v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    fabs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    fabs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[5], v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[6], v2.h[2]
+; CHECK-GI-NOFP16-NEXT:    mvni v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -200,9 +188,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fabs v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT:    mvni v1.4h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -227,12 +214,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fabs v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v2.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NOFP16-NEXT:    mvni v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -259,18 +242,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fabs v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v4.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fabs v5.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v5.4s
+; CHECK-GI-NOFP16-NEXT:    mvni v2.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    and v1.16b, v1.16b, v2.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v16f16:
diff --git a/llvm/test/CodeGen/AArch64/fneg.ll b/llvm/test/CodeGen/AArch64/fneg.ll
index a0e9edff733e09..de2671afe60ab7 100644
--- a/llvm/test/CodeGen/AArch64/fneg.ll
+++ b/llvm/test/CodeGen/AArch64/fneg.ll
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    fneg s0, s0
-; CHECK-GI-NOFP16-NEXT:    fcvt h0, s0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
+; CHECK-GI-NOFP16-NEXT:    eor w8, w8, #0xffff8000
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -161,22 +163,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[0], v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    fneg v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[1], v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    mov v2.h[2], v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[0], v1.h[0]
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v2.4h
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    fneg v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[2], v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvtn v2.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[3], v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[5], v2.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov v0.h[6], v2.h[2]
+; CHECK-GI-NOFP16-NEXT:    movi v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -202,9 +190,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fneg v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT:    movi v1.4h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -230,12 +217,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fneg v1.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v2.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-GI-NOFP16-NEXT:    movi v1.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -263,18 +246,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    fcvtl v2.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl v3.4s, v1.4h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
-; CHECK-GI-NOFP16-NEXT:    fneg v2.4s, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v3.4s, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v4.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT:    fneg v5.4s, v1.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v0.4h, v2.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn v1.4h, v3.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v0.8h, v4.4s
-; CHECK-GI-NOFP16-NEXT:    fcvtn2 v1.8h, v5.4s
+; CHECK-GI-NOFP16-NEXT:    movi v2.8h, #128, lsl #8
+; CHECK-GI-NOFP16-NEXT:    eor v0.16b, v0.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    eor v1.16b, v1.16b, v2.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_v16f16:

This changes the existing promote logic to lower, so that it can use normal
integer operations. A minor change was needed to fneg lower code to handle
vectors.
@davemgreen davemgreen merged commit 9f255d8 into llvm:main Sep 27, 2024
8 checks passed
@davemgreen davemgreen deleted the gh-gi-fp16negabs branch September 27, 2024 06:51
Sterling-Augustine pushed a commit to Sterling-Augustine/llvm-project that referenced this pull request Sep 27, 2024
…110096)

This changes the existing promote logic to lower, so that it can use
normal integer operations. A minor change was needed to fneg lower code
to handle vectors.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants