Skip to content

[AArch64] Avoid using NEON BSL for streaming[-compatible] functions #95803

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18147,9 +18147,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
if (!VT.isVector())
return SDValue();

// The combining code works for NEON, SVE2 and SME.
if (TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()) ||
(VT.isScalableVector() && !Subtarget.hasSVE2()))
if (VT.isScalableVector() && !Subtarget.hasSVE2())
return SDValue();

if (VT.isFixedLengthVector() &&
(!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT)))
return SDValue();

SDValue N0 = N->getOperand(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,39 +34,72 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
;
; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
; NONEON-NOSVE-NEXT: ldp q5, q4, [x2]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #60]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #56]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #52]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #48]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #44]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #40]
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #36]
; NONEON-NOSVE-NEXT: ldr w8, [sp]
; NONEON-NOSVE-NEXT: neg w8, w8
; NONEON-NOSVE-NEXT: str w8, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v5.16b
; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v4.16b
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
; NONEON-NOSVE-NEXT: ldp q4, q5, [x2]
; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #-128]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #48]
; NONEON-NOSVE-NEXT: ldp w8, w14, [sp, #48]
; NONEON-NOSVE-NEXT: ldp w9, w4, [sp, #64]
; NONEON-NOSVE-NEXT: ldp w13, w11, [sp, #56]
; NONEON-NOSVE-NEXT: neg w3, w8
; NONEON-NOSVE-NEXT: neg w15, w14
; NONEON-NOSVE-NEXT: str q4, [sp, #32]
; NONEON-NOSVE-NEXT: and w9, w3, w9
; NONEON-NOSVE-NEXT: and w15, w15, w4
; NONEON-NOSVE-NEXT: str q5, [sp, #80]
; NONEON-NOSVE-NEXT: ldp w5, w3, [sp, #72]
; NONEON-NOSVE-NEXT: ldp w16, w12, [sp]
; NONEON-NOSVE-NEXT: neg w4, w11
; NONEON-NOSVE-NEXT: neg w2, w13
; NONEON-NOSVE-NEXT: sub w11, w11, #1
; NONEON-NOSVE-NEXT: and w3, w4, w3
; NONEON-NOSVE-NEXT: and w2, w2, w5
; NONEON-NOSVE-NEXT: sub w13, w13, #1
; NONEON-NOSVE-NEXT: ldp w6, w4, [sp, #16]
; NONEON-NOSVE-NEXT: ldp w10, w17, [sp, #8]
; NONEON-NOSVE-NEXT: neg w1, w16
; NONEON-NOSVE-NEXT: neg w0, w12
; NONEON-NOSVE-NEXT: sub w16, w16, #1
; NONEON-NOSVE-NEXT: and w1, w1, w6
; NONEON-NOSVE-NEXT: and w0, w0, w4
; NONEON-NOSVE-NEXT: sub w12, w12, #1
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #24]
; NONEON-NOSVE-NEXT: neg w18, w17
; NONEON-NOSVE-NEXT: neg w4, w10
; NONEON-NOSVE-NEXT: sub w17, w17, #1
; NONEON-NOSVE-NEXT: sub w10, w10, #1
; NONEON-NOSVE-NEXT: sub w14, w14, #1
; NONEON-NOSVE-NEXT: sub w8, w8, #1
; NONEON-NOSVE-NEXT: and w4, w4, w5
; NONEON-NOSVE-NEXT: and w18, w18, w6
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #32]
; NONEON-NOSVE-NEXT: and w16, w16, w5
; NONEON-NOSVE-NEXT: and w12, w12, w6
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #40]
; NONEON-NOSVE-NEXT: and w10, w10, w5
; NONEON-NOSVE-NEXT: and w17, w17, w6
; NONEON-NOSVE-NEXT: orr w17, w17, w18
; NONEON-NOSVE-NEXT: orr w10, w10, w4
; NONEON-NOSVE-NEXT: ldp w18, w4, [sp, #88]
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #80]
; NONEON-NOSVE-NEXT: stp w10, w17, [sp, #104]
; NONEON-NOSVE-NEXT: orr w10, w12, w0
; NONEON-NOSVE-NEXT: orr w12, w16, w1
; NONEON-NOSVE-NEXT: and w11, w11, w4
; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #96]
; NONEON-NOSVE-NEXT: and w10, w13, w18
; NONEON-NOSVE-NEXT: orr w11, w11, w3
; NONEON-NOSVE-NEXT: and w12, w14, w6
; NONEON-NOSVE-NEXT: orr w10, w10, w2
; NONEON-NOSVE-NEXT: and w8, w8, w5
; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #120]
; NONEON-NOSVE-NEXT: orr w10, w12, w15
; NONEON-NOSVE-NEXT: orr w8, w8, w9
; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #112]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
; NONEON-NOSVE-NEXT: add sp, sp, #128
; NONEON-NOSVE-NEXT: ret
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
%left = load <8 x i32>, ptr %left_ptr
Expand Down
Loading