-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Avoid using NEON BSL for streaming[-compatible] functions #95803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Sander de Smalen (sdesmalen-arm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/95803.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c5c3ef02115ec..9ecd93f4b8cdb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18147,9 +18147,12 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
if (!VT.isVector())
return SDValue();
- // The combining code works for NEON, SVE2 and SME.
- if (TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()) ||
- (VT.isScalableVector() && !Subtarget.hasSVE2()))
+ if (VT.isScalableVector() && !Subtarget.hasSVE2())
+ return SDValue();
+
+ if (VT.isFixedLengthVector() &&
+ (!Subtarget.isNeonAvailable() ||
+ TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable())))
return SDValue();
SDValue N0 = N->getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index b908dd61f2401..d65e87d5b9756 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -34,39 +34,72 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
;
; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
-; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
-; NONEON-NOSVE-NEXT: ldp q5, q4, [x2]
-; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #60]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #56]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #52]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #48]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #44]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #40]
-; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #36]
-; NONEON-NOSVE-NEXT: ldr w8, [sp]
-; NONEON-NOSVE-NEXT: neg w8, w8
-; NONEON-NOSVE-NEXT: str w8, [sp, #32]
-; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
-; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v5.16b
-; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v4.16b
-; NONEON-NOSVE-NEXT: add sp, sp, #64
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q4, q5, [x2]
+; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #-128]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
+; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp w8, w14, [sp, #48]
+; NONEON-NOSVE-NEXT: ldp w9, w4, [sp, #64]
+; NONEON-NOSVE-NEXT: ldp w13, w11, [sp, #56]
+; NONEON-NOSVE-NEXT: neg w3, w8
+; NONEON-NOSVE-NEXT: neg w15, w14
+; NONEON-NOSVE-NEXT: str q4, [sp, #32]
+; NONEON-NOSVE-NEXT: and w9, w3, w9
+; NONEON-NOSVE-NEXT: and w15, w15, w4
+; NONEON-NOSVE-NEXT: str q5, [sp, #80]
+; NONEON-NOSVE-NEXT: ldp w5, w3, [sp, #72]
+; NONEON-NOSVE-NEXT: ldp w16, w12, [sp]
+; NONEON-NOSVE-NEXT: neg w4, w11
+; NONEON-NOSVE-NEXT: neg w2, w13
+; NONEON-NOSVE-NEXT: sub w11, w11, #1
+; NONEON-NOSVE-NEXT: and w3, w4, w3
+; NONEON-NOSVE-NEXT: and w2, w2, w5
+; NONEON-NOSVE-NEXT: sub w13, w13, #1
+; NONEON-NOSVE-NEXT: ldp w6, w4, [sp, #16]
+; NONEON-NOSVE-NEXT: ldp w10, w17, [sp, #8]
+; NONEON-NOSVE-NEXT: neg w1, w16
+; NONEON-NOSVE-NEXT: neg w0, w12
+; NONEON-NOSVE-NEXT: sub w16, w16, #1
+; NONEON-NOSVE-NEXT: and w1, w1, w6
+; NONEON-NOSVE-NEXT: and w0, w0, w4
+; NONEON-NOSVE-NEXT: sub w12, w12, #1
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #24]
+; NONEON-NOSVE-NEXT: neg w18, w17
+; NONEON-NOSVE-NEXT: neg w4, w10
+; NONEON-NOSVE-NEXT: sub w17, w17, #1
+; NONEON-NOSVE-NEXT: sub w10, w10, #1
+; NONEON-NOSVE-NEXT: sub w14, w14, #1
+; NONEON-NOSVE-NEXT: sub w8, w8, #1
+; NONEON-NOSVE-NEXT: and w4, w4, w5
+; NONEON-NOSVE-NEXT: and w18, w18, w6
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #32]
+; NONEON-NOSVE-NEXT: and w16, w16, w5
+; NONEON-NOSVE-NEXT: and w12, w12, w6
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #40]
+; NONEON-NOSVE-NEXT: and w10, w10, w5
+; NONEON-NOSVE-NEXT: and w17, w17, w6
+; NONEON-NOSVE-NEXT: orr w17, w17, w18
+; NONEON-NOSVE-NEXT: orr w10, w10, w4
+; NONEON-NOSVE-NEXT: ldp w18, w4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #80]
+; NONEON-NOSVE-NEXT: stp w10, w17, [sp, #104]
+; NONEON-NOSVE-NEXT: orr w10, w12, w0
+; NONEON-NOSVE-NEXT: orr w12, w16, w1
+; NONEON-NOSVE-NEXT: and w11, w11, w4
+; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #96]
+; NONEON-NOSVE-NEXT: and w10, w13, w18
+; NONEON-NOSVE-NEXT: orr w11, w11, w3
+; NONEON-NOSVE-NEXT: and w12, w14, w6
+; NONEON-NOSVE-NEXT: orr w10, w10, w2
+; NONEON-NOSVE-NEXT: and w8, w8, w5
+; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #120]
+; NONEON-NOSVE-NEXT: orr w10, w12, w15
+; NONEON-NOSVE-NEXT: orr w8, w8, w9
+; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #112]
+; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #128
; NONEON-NOSVE-NEXT: ret
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
%left = load <8 x i32>, ptr %left_ptr
|
(!Subtarget.isNeonAvailable() || | ||
TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be simplified to:
(!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT))
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, done!
No description provided.