Skip to content

Commit ca423a2

Browse files
[AArch64] Avoid using NEON BSL for streaming[-compatible] functions (llvm#95803)
1 parent 93831c7 commit ca423a2

File tree

2 files changed

+71
-36
lines changed

2 files changed

+71
-36
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18244,9 +18244,11 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1824418244
if (!VT.isVector())
1824518245
return SDValue();
1824618246

18247-
// The combining code works for NEON, SVE2 and SME.
18248-
if (TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()) ||
18249-
(VT.isScalableVector() && !Subtarget.hasSVE2()))
18247+
if (VT.isScalableVector() && !Subtarget.hasSVE2())
18248+
return SDValue();
18249+
18250+
if (VT.isFixedLengthVector() &&
18251+
(!Subtarget.isNeonAvailable() || TLI.useSVEForFixedLengthVectorVT(VT)))
1825018252
return SDValue();
1825118253

1825218254
SDValue N0 = N->getOperand(0);

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll

Lines changed: 66 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,39 +34,72 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
3434
;
3535
; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
3636
; NONEON-NOSVE: // %bb.0:
37-
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
38-
; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
39-
; NONEON-NOSVE-NEXT: ldp q5, q4, [x2]
40-
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
41-
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
42-
; NONEON-NOSVE-NEXT: ldr w8, [sp, #28]
43-
; NONEON-NOSVE-NEXT: neg w8, w8
44-
; NONEON-NOSVE-NEXT: str w8, [sp, #60]
45-
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
46-
; NONEON-NOSVE-NEXT: neg w8, w8
47-
; NONEON-NOSVE-NEXT: str w8, [sp, #56]
48-
; NONEON-NOSVE-NEXT: ldr w8, [sp, #20]
49-
; NONEON-NOSVE-NEXT: neg w8, w8
50-
; NONEON-NOSVE-NEXT: str w8, [sp, #52]
51-
; NONEON-NOSVE-NEXT: ldr w8, [sp, #16]
52-
; NONEON-NOSVE-NEXT: neg w8, w8
53-
; NONEON-NOSVE-NEXT: str w8, [sp, #48]
54-
; NONEON-NOSVE-NEXT: ldr w8, [sp, #12]
55-
; NONEON-NOSVE-NEXT: neg w8, w8
56-
; NONEON-NOSVE-NEXT: str w8, [sp, #44]
57-
; NONEON-NOSVE-NEXT: ldr w8, [sp, #8]
58-
; NONEON-NOSVE-NEXT: neg w8, w8
59-
; NONEON-NOSVE-NEXT: str w8, [sp, #40]
60-
; NONEON-NOSVE-NEXT: ldr w8, [sp, #4]
61-
; NONEON-NOSVE-NEXT: neg w8, w8
62-
; NONEON-NOSVE-NEXT: str w8, [sp, #36]
63-
; NONEON-NOSVE-NEXT: ldr w8, [sp]
64-
; NONEON-NOSVE-NEXT: neg w8, w8
65-
; NONEON-NOSVE-NEXT: str w8, [sp, #32]
66-
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
67-
; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v5.16b
68-
; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v4.16b
69-
; NONEON-NOSVE-NEXT: add sp, sp, #64
37+
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
38+
; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
39+
; NONEON-NOSVE-NEXT: ldp q4, q5, [x2]
40+
; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #-128]!
41+
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
42+
; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #48]
43+
; NONEON-NOSVE-NEXT: ldp w8, w14, [sp, #48]
44+
; NONEON-NOSVE-NEXT: ldp w9, w4, [sp, #64]
45+
; NONEON-NOSVE-NEXT: ldp w13, w11, [sp, #56]
46+
; NONEON-NOSVE-NEXT: neg w3, w8
47+
; NONEON-NOSVE-NEXT: neg w15, w14
48+
; NONEON-NOSVE-NEXT: str q4, [sp, #32]
49+
; NONEON-NOSVE-NEXT: and w9, w3, w9
50+
; NONEON-NOSVE-NEXT: and w15, w15, w4
51+
; NONEON-NOSVE-NEXT: str q5, [sp, #80]
52+
; NONEON-NOSVE-NEXT: ldp w5, w3, [sp, #72]
53+
; NONEON-NOSVE-NEXT: ldp w16, w12, [sp]
54+
; NONEON-NOSVE-NEXT: neg w4, w11
55+
; NONEON-NOSVE-NEXT: neg w2, w13
56+
; NONEON-NOSVE-NEXT: sub w11, w11, #1
57+
; NONEON-NOSVE-NEXT: and w3, w4, w3
58+
; NONEON-NOSVE-NEXT: and w2, w2, w5
59+
; NONEON-NOSVE-NEXT: sub w13, w13, #1
60+
; NONEON-NOSVE-NEXT: ldp w6, w4, [sp, #16]
61+
; NONEON-NOSVE-NEXT: ldp w10, w17, [sp, #8]
62+
; NONEON-NOSVE-NEXT: neg w1, w16
63+
; NONEON-NOSVE-NEXT: neg w0, w12
64+
; NONEON-NOSVE-NEXT: sub w16, w16, #1
65+
; NONEON-NOSVE-NEXT: and w1, w1, w6
66+
; NONEON-NOSVE-NEXT: and w0, w0, w4
67+
; NONEON-NOSVE-NEXT: sub w12, w12, #1
68+
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #24]
69+
; NONEON-NOSVE-NEXT: neg w18, w17
70+
; NONEON-NOSVE-NEXT: neg w4, w10
71+
; NONEON-NOSVE-NEXT: sub w17, w17, #1
72+
; NONEON-NOSVE-NEXT: sub w10, w10, #1
73+
; NONEON-NOSVE-NEXT: sub w14, w14, #1
74+
; NONEON-NOSVE-NEXT: sub w8, w8, #1
75+
; NONEON-NOSVE-NEXT: and w4, w4, w5
76+
; NONEON-NOSVE-NEXT: and w18, w18, w6
77+
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #32]
78+
; NONEON-NOSVE-NEXT: and w16, w16, w5
79+
; NONEON-NOSVE-NEXT: and w12, w12, w6
80+
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #40]
81+
; NONEON-NOSVE-NEXT: and w10, w10, w5
82+
; NONEON-NOSVE-NEXT: and w17, w17, w6
83+
; NONEON-NOSVE-NEXT: orr w17, w17, w18
84+
; NONEON-NOSVE-NEXT: orr w10, w10, w4
85+
; NONEON-NOSVE-NEXT: ldp w18, w4, [sp, #88]
86+
; NONEON-NOSVE-NEXT: ldp w5, w6, [sp, #80]
87+
; NONEON-NOSVE-NEXT: stp w10, w17, [sp, #104]
88+
; NONEON-NOSVE-NEXT: orr w10, w12, w0
89+
; NONEON-NOSVE-NEXT: orr w12, w16, w1
90+
; NONEON-NOSVE-NEXT: and w11, w11, w4
91+
; NONEON-NOSVE-NEXT: stp w12, w10, [sp, #96]
92+
; NONEON-NOSVE-NEXT: and w10, w13, w18
93+
; NONEON-NOSVE-NEXT: orr w11, w11, w3
94+
; NONEON-NOSVE-NEXT: and w12, w14, w6
95+
; NONEON-NOSVE-NEXT: orr w10, w10, w2
96+
; NONEON-NOSVE-NEXT: and w8, w8, w5
97+
; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #120]
98+
; NONEON-NOSVE-NEXT: orr w10, w12, w15
99+
; NONEON-NOSVE-NEXT: orr w8, w8, w9
100+
; NONEON-NOSVE-NEXT: stp w8, w10, [sp, #112]
101+
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
102+
; NONEON-NOSVE-NEXT: add sp, sp, #128
70103
; NONEON-NOSVE-NEXT: ret
71104
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
72105
%left = load <8 x i32>, ptr %left_ptr

0 commit comments

Comments
 (0)