Skip to content

Commit b0e3b9d

Browse files
committed
Tidy up tests
1 parent 8e1da8e commit b0e3b9d

File tree

1 file changed

+26
-67
lines changed

1 file changed

+26
-67
lines changed
Lines changed: 26 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,33 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -O3 < %s -o - | FileCheck %s --check-prefixes=CHECK
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme -O3 < %s -o - | FileCheck %s --check-prefixes=CHECK
33

44
; Tests consecutive stores of @llvm.aarch64.sve.faddv. Within SDAG faddv is
55
; lowered as a FADDV + EXTRACT_VECTOR_ELT (of lane 0). Stores of extracts can
66
; be matched by DAGCombiner::mergeConsecutiveStores(), which we want to avoid in
77
; some cases as it can lead to worse codegen.
88

9-
define void @consecutive_stores_pair(ptr noalias %dest0, ptr noalias %src0) {
9+
; TODO: A single `stp s0, s1, [x0]` may be preferred here.
10+
define void @consecutive_stores_pair(ptr %dest0, <vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1) {
1011
; CHECK-LABEL: consecutive_stores_pair:
1112
; CHECK: // %bb.0:
1213
; CHECK-NEXT: ptrue p0.s
13-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1]
14-
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, #1, mul vl]
1514
; CHECK-NEXT: faddv s0, p0, z0.s
1615
; CHECK-NEXT: faddv s1, p0, z1.s
1716
; CHECK-NEXT: mov v0.s[1], v1.s[0]
1817
; CHECK-NEXT: str d0, [x0]
1918
; CHECK-NEXT: ret
20-
%ptrue = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
21-
%vscale = call i64 @llvm.vscale.i64()
22-
%c4_vscale = shl i64 %vscale, 2
23-
%src1 = getelementptr inbounds float, ptr %src0, i64 %c4_vscale
2419
%dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
25-
%vec0 = load <vscale x 4 x float>, ptr %src0, align 4
26-
%vec1 = load <vscale x 4 x float>, ptr %src1, align 4
27-
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec0)
28-
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec1)
20+
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec0)
21+
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec1)
2922
store float %reduce0, ptr %dest0, align 4
3023
store float %reduce1, ptr %dest1, align 4
3124
ret void
3225
}
3326

34-
define void @consecutive_stores_quadruple(ptr noalias %dest0, ptr noalias %src0) {
27+
define void @consecutive_stores_quadruple(ptr %dest0,
3528
; CHECK-LABEL: consecutive_stores_quadruple:
3629
; CHECK: // %bb.0:
3730
; CHECK-NEXT: ptrue p0.s
38-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1]
39-
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, #1, mul vl]
40-
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1, #2, mul vl]
41-
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1, #3, mul vl]
4231
; CHECK-NEXT: faddv s0, p0, z0.s
4332
; CHECK-NEXT: faddv s1, p0, z1.s
4433
; CHECK-NEXT: faddv s2, p0, z2.s
@@ -47,102 +36,72 @@ define void @consecutive_stores_quadruple(ptr noalias %dest0, ptr noalias %src0)
4736
; CHECK-NEXT: mov v2.s[1], v3.s[0]
4837
; CHECK-NEXT: stp d0, d2, [x0]
4938
; CHECK-NEXT: ret
50-
%ptrue = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
51-
%vscale = call i64 @llvm.vscale.i64()
52-
%c4_vscale = shl i64 %vscale, 2
39+
<vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2, <vscale x 4 x float> %vec3)
40+
{
5341
%dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
5442
%dest2 = getelementptr inbounds i8, ptr %dest1, i64 4
5543
%dest3 = getelementptr inbounds i8, ptr %dest2, i64 4
56-
%src1 = getelementptr inbounds float, ptr %src0, i64 %c4_vscale
57-
%src2 = getelementptr inbounds float, ptr %src1, i64 %c4_vscale
58-
%src3 = getelementptr inbounds float, ptr %src2, i64 %c4_vscale
59-
%vec0 = load <vscale x 4 x float>, ptr %src0, align 4
60-
%vec1 = load <vscale x 4 x float>, ptr %src1, align 4
61-
%vec2 = load <vscale x 4 x float>, ptr %src2, align 4
62-
%vec3 = load <vscale x 4 x float>, ptr %src3, align 4
63-
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec0)
64-
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec1)
65-
%reduce2 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec2)
66-
%reduce3 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec3)
44+
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec0)
45+
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec1)
46+
%reduce2 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec2)
47+
%reduce3 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec3)
6748
store float %reduce0, ptr %dest0, align 4
6849
store float %reduce1, ptr %dest1, align 4
6950
store float %reduce2, ptr %dest2, align 4
7051
store float %reduce3, ptr %dest3, align 4
7152
ret void
7253
}
7354

74-
define void @consecutive_stores_pair_streaming_function(ptr noalias %dest0, ptr noalias %src0) #0 "aarch64_pstate_sm_enabled" {
55+
define void @consecutive_stores_pair_streaming_function(ptr %dest0, <vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1) "aarch64_pstate_sm_enabled" {
7556
; CHECK-LABEL: consecutive_stores_pair_streaming_function:
7657
; CHECK: // %bb.0:
7758
; CHECK-NEXT: sub sp, sp, #16
7859
; CHECK-NEXT: .cfi_def_cfa_offset 16
7960
; CHECK-NEXT: ptrue p0.s
80-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1, #1, mul vl]
81-
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
82-
; CHECK-NEXT: faddv s0, p0, z0.s
8361
; CHECK-NEXT: faddv s1, p0, z1.s
84-
; CHECK-NEXT: stp s1, s0, [sp, #8]
62+
; CHECK-NEXT: faddv s0, p0, z0.s
63+
; CHECK-NEXT: stp s0, s1, [sp, #8]
8564
; CHECK-NEXT: ldr d0, [sp, #8]
8665
; CHECK-NEXT: str d0, [x0]
8766
; CHECK-NEXT: add sp, sp, #16
8867
; CHECK-NEXT: ret
89-
%ptrue = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
90-
%vscale = call i64 @llvm.vscale.i64()
91-
%c4_vscale = shl i64 %vscale, 2
92-
%src1 = getelementptr inbounds float, ptr %src0, i64 %c4_vscale
9368
%dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
94-
%vec0 = load <vscale x 4 x float>, ptr %src0, align 4
95-
%vec1 = load <vscale x 4 x float>, ptr %src1, align 4
96-
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec0)
97-
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec1)
69+
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec0)
70+
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec1)
9871
store float %reduce0, ptr %dest0, align 4
9972
store float %reduce1, ptr %dest1, align 4
10073
ret void
10174
}
10275

103-
define void @consecutive_stores_quadruple_streaming_function(ptr noalias %dest0, ptr noalias %src0) #0 "aarch64_pstate_sm_enabled" {
76+
define void @consecutive_stores_quadruple_streaming_function(ptr %dest0,
10477
; CHECK-LABEL: consecutive_stores_quadruple_streaming_function:
10578
; CHECK: // %bb.0:
10679
; CHECK-NEXT: ptrue p0.s
107-
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1]
108-
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, #1, mul vl]
109-
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1, #3, mul vl]
110-
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1, #2, mul vl]
11180
; CHECK-NEXT: faddv s0, p0, z0.s
11281
; CHECK-NEXT: faddv s1, p0, z1.s
113-
; CHECK-NEXT: faddv s2, p0, z2.s
11482
; CHECK-NEXT: faddv s3, p0, z3.s
83+
; CHECK-NEXT: faddv s2, p0, z2.s
11584
; CHECK-NEXT: stp s0, s1, [sp, #-16]!
11685
; CHECK-NEXT: .cfi_def_cfa_offset 16
11786
; CHECK-NEXT: ldr d0, [sp]
11887
; CHECK-NEXT: str d0, [x0]
119-
; CHECK-NEXT: stp s3, s2, [sp, #8]
88+
; CHECK-NEXT: stp s2, s3, [sp, #8]
12089
; CHECK-NEXT: ldr d0, [sp, #8]
12190
; CHECK-NEXT: str d0, [x0, #8]
12291
; CHECK-NEXT: add sp, sp, #16
12392
; CHECK-NEXT: ret
124-
%ptrue = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
125-
%vscale = call i64 @llvm.vscale.i64()
126-
%c4_vscale = shl i64 %vscale, 2
93+
<vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2, <vscale x 4 x float> %vec3) "aarch64_pstate_sm_enabled"
94+
{
12795
%dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
12896
%dest2 = getelementptr inbounds i8, ptr %dest1, i64 4
12997
%dest3 = getelementptr inbounds i8, ptr %dest2, i64 4
130-
%src1 = getelementptr inbounds float, ptr %src0, i64 %c4_vscale
131-
%src2 = getelementptr inbounds float, ptr %src1, i64 %c4_vscale
132-
%src3 = getelementptr inbounds float, ptr %src2, i64 %c4_vscale
133-
%vec0 = load <vscale x 4 x float>, ptr %src0, align 4
134-
%vec1 = load <vscale x 4 x float>, ptr %src1, align 4
135-
%vec2 = load <vscale x 4 x float>, ptr %src2, align 4
136-
%vec3 = load <vscale x 4 x float>, ptr %src3, align 4
137-
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec0)
138-
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec1)
139-
%reduce2 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec2)
140-
%reduce3 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %ptrue, <vscale x 4 x float> %vec3)
98+
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec0)
99+
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec1)
100+
%reduce2 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec2)
101+
%reduce3 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec3)
141102
store float %reduce0, ptr %dest0, align 4
142103
store float %reduce1, ptr %dest1, align 4
143104
store float %reduce2, ptr %dest2, align 4
144105
store float %reduce3, ptr %dest3, align 4
145106
ret void
146107
}
147-
148-
attributes #0 = { vscale_range(1, 16) "target-features"="+sve,+sme" }

0 commit comments

Comments
 (0)