Skip to content

[AArch64] Avoid using NEON BSL for streaming[-compatible] functions #95803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 19, 2024

Conversation

sdesmalen-arm
Copy link
Collaborator

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Jun 17, 2024

@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/95803.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+6-3)
  • (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll (+66-33)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c5c3ef02115ec..9ecd93f4b8cdb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18147,9 +18147,12 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   if (!VT.isVector())
     return SDValue();
 
-  // The combining code works for NEON, SVE2 and SME.
-  if (TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()) ||
-      (VT.isScalableVector() && !Subtarget.hasSVE2()))
+  if (VT.isScalableVector() && !Subtarget.hasSVE2())
+    return SDValue();
+
+  if (VT.isFixedLengthVector() &&
+      (!Subtarget.isNeonAvailable() ||
+       TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable())))
     return SDValue();
 
   SDValue N0 = N->getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index b908dd61f2401..d65e87d5b9756 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -34,39 +34,72 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
 ;
 ; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [x1]
-; NONEON-NOSVE-NEXT:    ldp q5, q4, [x2]
-; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #44]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp]
-; NONEON-NOSVE-NEXT:    neg w8, w8
-; NONEON-NOSVE-NEXT:    str w8, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
-; NONEON-NOSVE-NEXT:    bsl v0.16b, v3.16b, v5.16b
-; NONEON-NOSVE-NEXT:    bsl v1.16b, v2.16b, v4.16b
-; NONEON-NOSVE-NEXT:    add sp, sp, #64
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT:    ldp q2, q3, [x1]
+; NONEON-NOSVE-NEXT:    ldp q4, q5, [x2]
+; NONEON-NOSVE-NEXT:    stp q0, q2, [sp, #-128]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 128
+; NONEON-NOSVE-NEXT:    stp q1, q3, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp w8, w14, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp w9, w4, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldp w13, w11, [sp, #56]
+; NONEON-NOSVE-NEXT:    neg w3, w8
+; NONEON-NOSVE-NEXT:    neg w15, w14
+; NONEON-NOSVE-NEXT:    str q4, [sp, #32]
+; NONEON-NOSVE-NEXT:    and w9, w3, w9
+; NONEON-NOSVE-NEXT:    and w15, w15, w4
+; NONEON-NOSVE-NEXT:    str q5, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp w5, w3, [sp, #72]
+; NONEON-NOSVE-NEXT:    ldp w16, w12, [sp]
+; NONEON-NOSVE-NEXT:    neg w4, w11
+; NONEON-NOSVE-NEXT:    neg w2, w13
+; NONEON-NOSVE-NEXT:    sub w11, w11, #1
+; NONEON-NOSVE-NEXT:    and w3, w4, w3
+; NONEON-NOSVE-NEXT:    and w2, w2, w5
+; NONEON-NOSVE-NEXT:    sub w13, w13, #1
+; NONEON-NOSVE-NEXT:    ldp w6, w4, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp w10, w17, [sp, #8]
+; NONEON-NOSVE-NEXT:    neg w1, w16
+; NONEON-NOSVE-NEXT:    neg w0, w12
+; NONEON-NOSVE-NEXT:    sub w16, w16, #1
+; NONEON-NOSVE-NEXT:    and w1, w1, w6
+; NONEON-NOSVE-NEXT:    and w0, w0, w4
+; NONEON-NOSVE-NEXT:    sub w12, w12, #1
+; NONEON-NOSVE-NEXT:    ldp w5, w6, [sp, #24]
+; NONEON-NOSVE-NEXT:    neg w18, w17
+; NONEON-NOSVE-NEXT:    neg w4, w10
+; NONEON-NOSVE-NEXT:    sub w17, w17, #1
+; NONEON-NOSVE-NEXT:    sub w10, w10, #1
+; NONEON-NOSVE-NEXT:    sub w14, w14, #1
+; NONEON-NOSVE-NEXT:    sub w8, w8, #1
+; NONEON-NOSVE-NEXT:    and w4, w4, w5
+; NONEON-NOSVE-NEXT:    and w18, w18, w6
+; NONEON-NOSVE-NEXT:    ldp w5, w6, [sp, #32]
+; NONEON-NOSVE-NEXT:    and w16, w16, w5
+; NONEON-NOSVE-NEXT:    and w12, w12, w6
+; NONEON-NOSVE-NEXT:    ldp w5, w6, [sp, #40]
+; NONEON-NOSVE-NEXT:    and w10, w10, w5
+; NONEON-NOSVE-NEXT:    and w17, w17, w6
+; NONEON-NOSVE-NEXT:    orr w17, w17, w18
+; NONEON-NOSVE-NEXT:    orr w10, w10, w4
+; NONEON-NOSVE-NEXT:    ldp w18, w4, [sp, #88]
+; NONEON-NOSVE-NEXT:    ldp w5, w6, [sp, #80]
+; NONEON-NOSVE-NEXT:    stp w10, w17, [sp, #104]
+; NONEON-NOSVE-NEXT:    orr w10, w12, w0
+; NONEON-NOSVE-NEXT:    orr w12, w16, w1
+; NONEON-NOSVE-NEXT:    and w11, w11, w4
+; NONEON-NOSVE-NEXT:    stp w12, w10, [sp, #96]
+; NONEON-NOSVE-NEXT:    and w10, w13, w18
+; NONEON-NOSVE-NEXT:    orr w11, w11, w3
+; NONEON-NOSVE-NEXT:    and w12, w14, w6
+; NONEON-NOSVE-NEXT:    orr w10, w10, w2
+; NONEON-NOSVE-NEXT:    and w8, w8, w5
+; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #120]
+; NONEON-NOSVE-NEXT:    orr w10, w12, w15
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    add sp, sp, #128
 ; NONEON-NOSVE-NEXT:    ret
   %pre_cond = load <8 x i32>, ptr %pre_cond_ptr
   %left = load <8 x i32>, ptr %left_ptr

Comment on lines 18154 to 18155
(!Subtarget.isNeonAvailable() ||
TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable())))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be simplified to:

(!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT))

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, done!

@sdesmalen-arm sdesmalen-arm merged commit ca423a2 into llvm:main Jun 19, 2024
5 of 6 checks passed
AlexisPerry pushed a commit to llvm-project-tlp/llvm-project that referenced this pull request Jul 9, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants