|
1 | 1 | ; Intel chips with slow unaligned memory accesses
|
2 | 2 |
|
3 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW |
4 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW |
5 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW |
6 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW |
7 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW |
8 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW |
9 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW |
10 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW |
11 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=SLOW |
12 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefix=SLOW |
13 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefix=SLOW |
| 3 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 4 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 5 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 6 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 7 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 8 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 9 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 10 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 11 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 12 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 13 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefixes=SLOW |
14 | 14 |
|
15 | 15 | ; Intel chips with fast unaligned memory accesses
|
16 | 16 |
|
17 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST |
18 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST |
19 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST |
20 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=FAST |
21 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=FAST |
22 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=FAST |
23 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=FAST |
24 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=FAST |
25 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake-avx512 2>&1 | FileCheck %s --check-prefix=FAST |
| 17 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
| 18 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
| 19 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
| 20 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128 |
| 21 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128 |
| 22 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 23 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 24 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 |
| 25 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake-avx512 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
26 | 26 |
|
27 | 27 | ; AMD chips with slow unaligned memory accesses
|
28 | 28 |
|
29 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefix=SLOW |
30 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefix=SLOW |
31 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefix=SLOW |
32 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefix=SLOW |
33 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefix=SLOW |
34 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefix=SLOW |
35 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefix=SLOW |
36 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefix=SLOW |
37 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefix=SLOW |
| 29 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 30 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 31 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 32 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 33 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 34 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 35 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 36 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW |
| 37 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW |
38 | 38 |
|
39 | 39 | ; AMD chips with fast unaligned memory accesses
|
40 | 40 |
|
41 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST |
42 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST |
43 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST |
44 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST |
45 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST |
46 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST |
47 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST |
48 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST |
49 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST |
50 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=FAST |
51 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=FAST |
52 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=FAST |
| 41 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
| 42 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
| 43 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
| 44 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 45 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 46 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 47 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 48 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 49 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 50 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 51 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 |
| 52 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 |
53 | 53 |
|
54 | 54 | ; Other chips with slow unaligned memory accesses
|
55 | 55 |
|
56 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=SLOW |
| 56 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefixes=SLOW |
57 | 57 |
|
58 | 58 | ; Verify that the slow/fast unaligned memory attribute is set correctly for each CPU model.
|
59 | 59 | ; Slow chips use 4-byte stores. Fast chips with SSE or later use something other than 4-byte stores.
|
60 | 60 | ; Chips that don't have SSE use 4-byte stores either way, so they're not tested.
|
61 | 61 |
|
62 | 62 | ; Also verify that SSE4.2 or SSE4a imply fast unaligned accesses.
|
63 | 63 |
|
64 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2 2>&1 | FileCheck %s --check-prefix=FAST |
65 |
| -; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a 2>&1 | FileCheck %s --check-prefix=FAST |
| 64 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
| 65 | +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE |
66 | 66 |
|
67 |
| -define void @store_zeros(ptr %a) { |
68 | 67 | ; SLOW-NOT: not a recognized processor
|
| 68 | +; FAST-NOT: not a recognized processor |
| 69 | +define void @store_zeros(ptr %a) { |
69 | 70 | ; SLOW-LABEL: store_zeros:
|
70 | 71 | ; SLOW: # %bb.0:
|
71 |
| -; SLOW-NEXT: movl |
72 |
| -; SLOW-NEXT: movl |
73 |
| -; SLOW-NEXT: movl |
74 |
| -; SLOW-NEXT: movl |
75 |
| -; SLOW-NEXT: movl |
76 |
| -; SLOW-NEXT: movl |
77 |
| -; SLOW-NEXT: movl |
78 |
| -; SLOW-NEXT: movl |
79 |
| -; SLOW-NEXT: movl |
80 |
| -; SLOW-NEXT: movl |
81 |
| -; SLOW-NEXT: movl |
82 |
| -; SLOW-NEXT: movl |
83 |
| -; SLOW-NEXT: movl |
84 |
| -; SLOW-NEXT: movl |
85 |
| -; SLOW-NEXT: movl |
86 |
| -; SLOW-NEXT: movl |
87 |
| -; SLOW-NEXT: movl |
| 72 | +; SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 73 | +; SLOW-NEXT: movl $0 |
| 74 | +; SLOW-NEXT: movl $0 |
| 75 | +; SLOW-NEXT: movl $0 |
| 76 | +; SLOW-NEXT: movl $0 |
| 77 | +; SLOW-NEXT: movl $0 |
| 78 | +; SLOW-NEXT: movl $0 |
| 79 | +; SLOW-NEXT: movl $0 |
| 80 | +; SLOW-NEXT: movl $0 |
| 81 | +; SLOW-NEXT: movl $0 |
| 82 | +; SLOW-NEXT: movl $0 |
| 83 | +; SLOW-NEXT: movl $0 |
| 84 | +; SLOW-NEXT: movl $0 |
| 85 | +; SLOW-NEXT: movl $0 |
| 86 | +; SLOW-NEXT: movl $0 |
| 87 | +; SLOW-NEXT: movl $0 |
| 88 | +; SLOW-NEXT: movl $0 |
| 89 | +; SLOW-NOT: movl |
88 | 90 | ;
|
89 |
| -; FAST-NOT: not a recognized processor |
90 |
| -; FAST-LABEL: store_zeros: |
91 |
| -; FAST: # %bb.0: |
92 |
| -; FAST-NEXT: movl {{[0-9]+}}(%esp), %eax |
93 |
| -; FAST-NOT: movl |
| 91 | +; FAST-SSE-LABEL: store_zeros: |
| 92 | +; FAST-SSE: # %bb.0: |
| 93 | +; FAST-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 94 | +; FAST-SSE-NEXT: xorps %xmm0, %xmm0 |
| 95 | +; FAST-SSE-NEXT: movups %xmm0 |
| 96 | +; FAST-SSE-NEXT: movups %xmm0 |
| 97 | +; FAST-SSE-NEXT: movups %xmm0 |
| 98 | +; FAST-SSE-NEXT: movups %xmm0 |
| 99 | +; FAST-SSE-NOT: movups |
| 100 | +; |
| 101 | +; FAST-AVX128-LABEL: store_zeros: |
| 102 | +; FAST-AVX128: # %bb.0: |
| 103 | +; FAST-AVX128-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 104 | +; FAST-AVX128-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| 105 | +; FAST-AVX128-NEXT: vmovups %xmm0 |
| 106 | +; FAST-AVX128-NEXT: vmovups %xmm0 |
| 107 | +; FAST-AVX128-NEXT: vmovups %xmm0 |
| 108 | +; FAST-AVX128-NEXT: vmovups %xmm0 |
| 109 | +; FAST-AVX128-NOT: vmovups |
| 110 | +; |
| 111 | +; FAST-AVX256-LABEL: store_zeros: |
| 112 | +; FAST-AVX256: # %bb.0: |
| 113 | +; FAST-AVX256-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 114 | +; FAST-AVX256-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| 115 | +; FAST-AVX256-NEXT: vmovups %ymm0 |
| 116 | +; FAST-AVX256-NEXT: vmovups %ymm0 |
| 117 | +; FAST-AVX256-NOT: vmovups |
| 118 | +; |
| 119 | +; FAST-AVX512-LABEL: store_zeros: |
| 120 | +; FAST-AVX512: # %bb.0: |
| 121 | +; FAST-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax |
| 122 | +; FAST-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| 123 | +; FAST-AVX512-NEXT: vmovups %zmm0, (%eax) |
| 124 | +; FAST-AVX512-NOT: vmovups |
94 | 125 | call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 64, i1 false)
|
95 | 126 | ret void
|
96 | 127 | }
|
|
0 commit comments