Skip to content

Commit 322c7c7

Browse files
committed
[X86] slow-unaligned-mem.ll - improve checks
We can't easily convert this to use the update scripts, but we can manually improve the checks so we check for the right number of stores
1 parent a65363d commit 322c7c7

File tree

1 file changed

+98
-67
lines changed

1 file changed

+98
-67
lines changed

llvm/test/CodeGen/X86/slow-unaligned-mem.ll

Lines changed: 98 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,127 @@
11
; Intel chips with slow unaligned memory accesses
22

3-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW
4-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW
5-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW
6-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW
7-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW
8-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW
9-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW
10-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW
11-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=SLOW
12-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefix=SLOW
13-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefix=SLOW
3+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefixes=SLOW
4+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefixes=SLOW
5+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefixes=SLOW
6+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefixes=SLOW
7+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefixes=SLOW
8+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefixes=SLOW
9+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefixes=SLOW
10+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefixes=SLOW
11+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefixes=SLOW
12+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefixes=SLOW
13+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefixes=SLOW
1414

1515
; Intel chips with fast unaligned memory accesses
1616

17-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST
18-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST
19-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST
20-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=FAST
21-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=FAST
22-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=FAST
23-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=FAST
24-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=FAST
25-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake-avx512 2>&1 | FileCheck %s --check-prefix=FAST
17+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
18+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
19+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
20+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128
21+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128
22+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
23+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
24+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
25+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake-avx512 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
2626

2727
; AMD chips with slow unaligned memory accesses
2828

29-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefix=SLOW
30-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefix=SLOW
31-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefix=SLOW
32-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefix=SLOW
33-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefix=SLOW
34-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefix=SLOW
35-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
36-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
37-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
29+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefixes=SLOW
30+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefixes=SLOW
31+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefixes=SLOW
32+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefixes=SLOW
33+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefixes=SLOW
34+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefixes=SLOW
35+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
36+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
37+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW
3838

3939
; AMD chips with fast unaligned memory accesses
4040

41-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST
42-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST
43-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST
44-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST
45-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST
46-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST
47-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
48-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
49-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST
50-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST
51-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=FAST
52-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=FAST
41+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
42+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
43+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
44+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
45+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
46+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
47+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
48+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
49+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
50+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
51+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
52+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
5353

5454
; Other chips with slow unaligned memory accesses
5555

56-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=SLOW
56+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefixes=SLOW
5757

5858
; Verify that the slow/fast unaligned memory attribute is set correctly for each CPU model.
5959
; Slow chips use 4-byte stores. Fast chips with SSE or later use something other than 4-byte stores.
6060
; Chips that don't have SSE use 4-byte stores either way, so they're not tested.
6161

6262
; Also verify that SSE4.2 or SSE4a imply fast unaligned accesses.
6363

64-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2 2>&1 | FileCheck %s --check-prefix=FAST
65-
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a 2>&1 | FileCheck %s --check-prefix=FAST
64+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
65+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE
6666

67-
define void @store_zeros(ptr %a) {
6867
; SLOW-NOT: not a recognized processor
68+
; FAST-NOT: not a recognized processor
69+
define void @store_zeros(ptr %a) {
6970
; SLOW-LABEL: store_zeros:
7071
; SLOW: # %bb.0:
71-
; SLOW-NEXT: movl
72-
; SLOW-NEXT: movl
73-
; SLOW-NEXT: movl
74-
; SLOW-NEXT: movl
75-
; SLOW-NEXT: movl
76-
; SLOW-NEXT: movl
77-
; SLOW-NEXT: movl
78-
; SLOW-NEXT: movl
79-
; SLOW-NEXT: movl
80-
; SLOW-NEXT: movl
81-
; SLOW-NEXT: movl
82-
; SLOW-NEXT: movl
83-
; SLOW-NEXT: movl
84-
; SLOW-NEXT: movl
85-
; SLOW-NEXT: movl
86-
; SLOW-NEXT: movl
87-
; SLOW-NEXT: movl
72+
; SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
73+
; SLOW-NEXT: movl $0
74+
; SLOW-NEXT: movl $0
75+
; SLOW-NEXT: movl $0
76+
; SLOW-NEXT: movl $0
77+
; SLOW-NEXT: movl $0
78+
; SLOW-NEXT: movl $0
79+
; SLOW-NEXT: movl $0
80+
; SLOW-NEXT: movl $0
81+
; SLOW-NEXT: movl $0
82+
; SLOW-NEXT: movl $0
83+
; SLOW-NEXT: movl $0
84+
; SLOW-NEXT: movl $0
85+
; SLOW-NEXT: movl $0
86+
; SLOW-NEXT: movl $0
87+
; SLOW-NEXT: movl $0
88+
; SLOW-NEXT: movl $0
89+
; SLOW-NOT: movl
8890
;
89-
; FAST-NOT: not a recognized processor
90-
; FAST-LABEL: store_zeros:
91-
; FAST: # %bb.0:
92-
; FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
93-
; FAST-NOT: movl
91+
; FAST-SSE-LABEL: store_zeros:
92+
; FAST-SSE: # %bb.0:
93+
; FAST-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
94+
; FAST-SSE-NEXT: xorps %xmm0, %xmm0
95+
; FAST-SSE-NEXT: movups %xmm0
96+
; FAST-SSE-NEXT: movups %xmm0
97+
; FAST-SSE-NEXT: movups %xmm0
98+
; FAST-SSE-NEXT: movups %xmm0
99+
; FAST-SSE-NOT: movups
100+
;
101+
; FAST-AVX128-LABEL: store_zeros:
102+
; FAST-AVX128: # %bb.0:
103+
; FAST-AVX128-NEXT: movl {{[0-9]+}}(%esp), %eax
104+
; FAST-AVX128-NEXT: vxorps %xmm0, %xmm0, %xmm0
105+
; FAST-AVX128-NEXT: vmovups %xmm0
106+
; FAST-AVX128-NEXT: vmovups %xmm0
107+
; FAST-AVX128-NEXT: vmovups %xmm0
108+
; FAST-AVX128-NEXT: vmovups %xmm0
109+
; FAST-AVX128-NOT: vmovups
110+
;
111+
; FAST-AVX256-LABEL: store_zeros:
112+
; FAST-AVX256: # %bb.0:
113+
; FAST-AVX256-NEXT: movl {{[0-9]+}}(%esp), %eax
114+
; FAST-AVX256-NEXT: vxorps %xmm0, %xmm0, %xmm0
115+
; FAST-AVX256-NEXT: vmovups %ymm0
116+
; FAST-AVX256-NEXT: vmovups %ymm0
117+
; FAST-AVX256-NOT: vmovups
118+
;
119+
; FAST-AVX512-LABEL: store_zeros:
120+
; FAST-AVX512: # %bb.0:
121+
; FAST-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
122+
; FAST-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
123+
; FAST-AVX512-NEXT: vmovups %zmm0, (%eax)
124+
; FAST-AVX512-NOT: vmovups
94125
call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 64, i1 false)
95126
ret void
96127
}

0 commit comments

Comments
 (0)