|
1 |
| -; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE |
2 |
| -; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK |
3 |
| -; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK |
4 |
| -; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
5 |
| -; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
6 |
| -; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
7 |
| -; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
8 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
9 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
10 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
11 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
12 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
13 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
14 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
15 |
| -; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 |
16 |
| -; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 |
| 3 | +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
| 4 | +; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 |
17 | 5 |
|
18 | 6 | target triple = "aarch64-unknown-linux-gnu"
|
19 | 7 |
|
20 |
| -; Don't use SVE when its registers are no bigger than NEON. |
21 |
| -; NO_SVE-NOT: ptrue |
22 |
| - |
23 | 8 | ; Don't use SVE for 64-bit vectors.
|
24 |
| -define void @bitcast_v4i16(<4 x i16> *%a, <4 x half>* %b) #0 { |
| 9 | +define void @bitcast_v4i16(<4 x i16> *%a, <4 x half>* %b) vscale_range(2,0) #0 { |
25 | 10 | ; CHECK-LABEL: bitcast_v4i16:
|
26 |
| -; CHECK: ldr d0, [x0] |
27 |
| -; CHECK-NEXT: str d0, [x1] |
28 |
| -; CHECK-NEXT: ret |
| 11 | +; CHECK: // %bb.0: |
| 12 | +; CHECK-NEXT: ldr d0, [x0] |
| 13 | +; CHECK-NEXT: str d0, [x1] |
| 14 | +; CHECK-NEXT: ret |
29 | 15 | %load = load volatile <4 x i16>, <4 x i16>* %a
|
30 | 16 | %cast = bitcast <4 x i16> %load to <4 x half>
|
31 | 17 | store volatile <4 x half> %cast, <4 x half>* %b
|
32 | 18 | ret void
|
33 | 19 | }
|
34 | 20 |
|
35 | 21 | ; Don't use SVE for 128-bit vectors.
|
36 |
| -define void @bitcast_v8i16(<8 x i16> *%a, <8 x half>* %b) #0 { |
| 22 | +define void @bitcast_v8i16(<8 x i16> *%a, <8 x half>* %b) vscale_range(2,0) #0 { |
37 | 23 | ; CHECK-LABEL: bitcast_v8i16:
|
38 |
| -; CHECK: ldr q0, [x0] |
39 |
| -; CHECK-NEXT: str q0, [x1] |
40 |
| -; CHECK-NEXT: ret |
| 24 | +; CHECK: // %bb.0: |
| 25 | +; CHECK-NEXT: ldr q0, [x0] |
| 26 | +; CHECK-NEXT: str q0, [x1] |
| 27 | +; CHECK-NEXT: ret |
41 | 28 | %load = load volatile <8 x i16>, <8 x i16>* %a
|
42 | 29 | %cast = bitcast <8 x i16> %load to <8 x half>
|
43 | 30 | store volatile <8 x half> %cast, <8 x half>* %b
|
44 | 31 | ret void
|
45 | 32 | }
|
46 | 33 |
|
47 |
| -define void @bitcast_v16i16(<16 x i16> *%a, <16 x half>* %b) #0 { |
| 34 | +define void @bitcast_v16i16(<16 x i16> *%a, <16 x half>* %b) vscale_range(2,0) #0 { |
48 | 35 | ; CHECK-LABEL: bitcast_v16i16:
|
49 |
| -; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 |
50 |
| -; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] |
51 |
| -; CHECK-NEXT: st1h { [[OP]].h }, [[PG]], [x1] |
52 |
| -; CHECK-NEXT: ret |
| 36 | +; CHECK: // %bb.0: |
| 37 | +; CHECK-NEXT: ptrue p0.h, vl16 |
| 38 | +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] |
| 39 | +; CHECK-NEXT: st1h { z0.h }, p0, [x1] |
| 40 | +; CHECK-NEXT: ret |
53 | 41 | %load = load volatile <16 x i16>, <16 x i16>* %a
|
54 | 42 | %cast = bitcast <16 x i16> %load to <16 x half>
|
55 | 43 | store volatile <16 x half> %cast, <16 x half>* %b
|
56 | 44 | ret void
|
57 | 45 | }
|
58 | 46 |
|
59 | 47 | define void @bitcast_v32i16(<32 x i16> *%a, <32 x half>* %b) #0 {
|
60 |
| -; CHECK-LABEL: bitcast_v32i16: |
61 |
| -; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 |
62 |
| -; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] |
63 |
| -; VBITS_GE_512-NEXT: st1h { [[OP]].h }, [[PG]], [x1] |
64 |
| -; VBITS_GE_512-NEXT: ret |
| 48 | +; VBITS_GE_256-LABEL: bitcast_v32i16: |
| 49 | +; VBITS_GE_256: // %bb.0: |
| 50 | +; VBITS_GE_256-NEXT: mov x8, #16 |
| 51 | +; VBITS_GE_256-NEXT: ptrue p0.h, vl16 |
| 52 | +; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] |
| 53 | +; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] |
| 54 | +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] |
| 55 | +; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] |
| 56 | +; VBITS_GE_256-NEXT: ret |
| 57 | +; |
| 58 | +; VBITS_GE_512-LABEL: bitcast_v32i16: |
| 59 | +; VBITS_GE_512: // %bb.0: |
| 60 | +; VBITS_GE_512-NEXT: ptrue p0.h, vl32 |
| 61 | +; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] |
| 62 | +; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] |
| 63 | +; VBITS_GE_512-NEXT: ret |
65 | 64 | %load = load volatile <32 x i16>, <32 x i16>* %a
|
66 | 65 | %cast = bitcast <32 x i16> %load to <32 x half>
|
67 | 66 | store volatile <32 x half> %cast, <32 x half>* %b
|
68 | 67 | ret void
|
69 | 68 | }
|
70 | 69 |
|
71 |
| -define void @bitcast_v64i16(<64 x i16> *%a, <64 x half>* %b) #0 { |
| 70 | +define void @bitcast_v64i16(<64 x i16> *%a, <64 x half>* %b) vscale_range(8,0) #0 { |
72 | 71 | ; CHECK-LABEL: bitcast_v64i16:
|
73 |
| -; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 |
74 |
| -; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] |
75 |
| -; VBITS_GE_1024-NEXT: st1h { [[OP]].h }, [[PG]], [x1] |
76 |
| -; VBITS_GE_1024-NEXT: ret |
| 72 | +; CHECK: // %bb.0: |
| 73 | +; CHECK-NEXT: ptrue p0.h, vl64 |
| 74 | +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] |
| 75 | +; CHECK-NEXT: st1h { z0.h }, p0, [x1] |
| 76 | +; CHECK-NEXT: ret |
77 | 77 | %load = load volatile <64 x i16>, <64 x i16>* %a
|
78 | 78 | %cast = bitcast <64 x i16> %load to <64 x half>
|
79 | 79 | store volatile <64 x half> %cast, <64 x half>* %b
|
80 | 80 | ret void
|
81 | 81 | }
|
82 | 82 |
|
83 |
| -define void @bitcast_v128i16(<128 x i16> *%a, <128 x half>* %b) #0 { |
| 83 | +define void @bitcast_v128i16(<128 x i16> *%a, <128 x half>* %b) vscale_range(16,0) #0 { |
84 | 84 | ; CHECK-LABEL: bitcast_v128i16:
|
85 |
| -; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 |
86 |
| -; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] |
87 |
| -; VBITS_GE_2048-NEXT: st1h { [[OP]].h }, [[PG]], [x1] |
88 |
| -; VBITS_GE_2048-NEXT: ret |
| 85 | +; CHECK: // %bb.0: |
| 86 | +; CHECK-NEXT: ptrue p0.h, vl128 |
| 87 | +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] |
| 88 | +; CHECK-NEXT: st1h { z0.h }, p0, [x1] |
| 89 | +; CHECK-NEXT: ret |
89 | 90 | %load = load volatile <128 x i16>, <128 x i16>* %a
|
90 | 91 | %cast = bitcast <128 x i16> %load to <128 x half>
|
91 | 92 | store volatile <128 x half> %cast, <128 x half>* %b
|
92 | 93 | ret void
|
93 | 94 | }
|
94 | 95 |
|
95 | 96 | ; Don't use SVE for 64-bit vectors.
|
96 |
| -define void @bitcast_v2i32(<2 x i32> *%a, <2 x float>* %b) #0 { |
| 97 | +define void @bitcast_v2i32(<2 x i32> *%a, <2 x float>* %b) vscale_range(2,0) #0 { |
97 | 98 | ; CHECK-LABEL: bitcast_v2i32:
|
98 |
| -; CHECK: ldr d0, [x0] |
99 |
| -; CHECK-NEXT: str d0, [x1] |
100 |
| -; CHECK-NEXT: ret |
| 99 | +; CHECK: // %bb.0: |
| 100 | +; CHECK-NEXT: ldr d0, [x0] |
| 101 | +; CHECK-NEXT: str d0, [x1] |
| 102 | +; CHECK-NEXT: ret |
101 | 103 | %load = load volatile <2 x i32>, <2 x i32>* %a
|
102 | 104 | %cast = bitcast <2 x i32> %load to <2 x float>
|
103 | 105 | store volatile <2 x float> %cast, <2 x float>* %b
|
104 | 106 | ret void
|
105 | 107 | }
|
106 | 108 |
|
107 | 109 | ; Don't use SVE for 128-bit vectors.
|
108 |
| -define void @bitcast_v4i32(<4 x i32> *%a, <4 x float>* %b) #0 { |
| 110 | +define void @bitcast_v4i32(<4 x i32> *%a, <4 x float>* %b) vscale_range(2,0) #0 { |
109 | 111 | ; CHECK-LABEL: bitcast_v4i32:
|
110 |
| -; CHECK: ldr q0, [x0] |
111 |
| -; CHECK-NEXT: str q0, [x1] |
112 |
| -; CHECK-NEXT: ret |
| 112 | +; CHECK: // %bb.0: |
| 113 | +; CHECK-NEXT: ldr q0, [x0] |
| 114 | +; CHECK-NEXT: str q0, [x1] |
| 115 | +; CHECK-NEXT: ret |
113 | 116 | %load = load volatile <4 x i32>, <4 x i32>* %a
|
114 | 117 | %cast = bitcast <4 x i32> %load to <4 x float>
|
115 | 118 | store volatile <4 x float> %cast, <4 x float>* %b
|
116 | 119 | ret void
|
117 | 120 | }
|
118 | 121 |
|
119 |
| -define void @bitcast_v8i32(<8 x i32> *%a, <8 x float>* %b) #0 { |
| 122 | +define void @bitcast_v8i32(<8 x i32> *%a, <8 x float>* %b) vscale_range(2,0) #0 { |
120 | 123 | ; CHECK-LABEL: bitcast_v8i32:
|
121 |
| -; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 |
122 |
| -; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] |
123 |
| -; CHECK-NEXT: st1w { [[OP]].s }, [[PG]], [x1] |
124 |
| -; CHECK-NEXT: ret |
| 124 | +; CHECK: // %bb.0: |
| 125 | +; CHECK-NEXT: ptrue p0.s, vl8 |
| 126 | +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] |
| 127 | +; CHECK-NEXT: st1w { z0.s }, p0, [x1] |
| 128 | +; CHECK-NEXT: ret |
125 | 129 | %load = load volatile <8 x i32>, <8 x i32>* %a
|
126 | 130 | %cast = bitcast <8 x i32> %load to <8 x float>
|
127 | 131 | store volatile <8 x float> %cast, <8 x float>* %b
|
128 | 132 | ret void
|
129 | 133 | }
|
130 | 134 |
|
131 | 135 | define void @bitcast_v16i32(<16 x i32> *%a, <16 x float>* %b) #0 {
|
132 |
| -; CHECK-LABEL: bitcast_v16i32: |
133 |
| -; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 |
134 |
| -; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] |
135 |
| -; VBITS_GE_512-NEXT: st1w { [[OP]].s }, [[PG]], [x1] |
136 |
| -; VBITS_GE_512-NEXT: ret |
| 136 | +; VBITS_GE_256-LABEL: bitcast_v16i32: |
| 137 | +; VBITS_GE_256: // %bb.0: |
| 138 | +; VBITS_GE_256-NEXT: mov x8, #8 |
| 139 | +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 |
| 140 | +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] |
| 141 | +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] |
| 142 | +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] |
| 143 | +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] |
| 144 | +; VBITS_GE_256-NEXT: ret |
| 145 | +; |
| 146 | +; VBITS_GE_512-LABEL: bitcast_v16i32: |
| 147 | +; VBITS_GE_512: // %bb.0: |
| 148 | +; VBITS_GE_512-NEXT: ptrue p0.s, vl16 |
| 149 | +; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] |
| 150 | +; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] |
| 151 | +; VBITS_GE_512-NEXT: ret |
137 | 152 | %load = load volatile <16 x i32>, <16 x i32>* %a
|
138 | 153 | %cast = bitcast <16 x i32> %load to <16 x float>
|
139 | 154 | store volatile <16 x float> %cast, <16 x float>* %b
|
140 | 155 | ret void
|
141 | 156 | }
|
142 | 157 |
|
143 |
| -define void @bitcast_v32i32(<32 x i32> *%a, <32 x float>* %b) #0 { |
| 158 | +define void @bitcast_v32i32(<32 x i32> *%a, <32 x float>* %b) vscale_range(8,0) #0 { |
144 | 159 | ; CHECK-LABEL: bitcast_v32i32:
|
145 |
| -; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 |
146 |
| -; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] |
147 |
| -; VBITS_GE_1024-NEXT: st1w { [[OP]].s }, [[PG]], [x1] |
148 |
| -; VBITS_GE_1024-NEXT: ret |
| 160 | +; CHECK: // %bb.0: |
| 161 | +; CHECK-NEXT: ptrue p0.s, vl32 |
| 162 | +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] |
| 163 | +; CHECK-NEXT: st1w { z0.s }, p0, [x1] |
| 164 | +; CHECK-NEXT: ret |
149 | 165 | %load = load volatile <32 x i32>, <32 x i32>* %a
|
150 | 166 | %cast = bitcast <32 x i32> %load to <32 x float>
|
151 | 167 | store volatile <32 x float> %cast, <32 x float>* %b
|
152 | 168 | ret void
|
153 | 169 | }
|
154 | 170 |
|
155 |
| -define void @bitcast_v64i32(<64 x i32> *%a, <64 x float>* %b) #0 { |
| 171 | +define void @bitcast_v64i32(<64 x i32> *%a, <64 x float>* %b) vscale_range(16,0) #0 { |
156 | 172 | ; CHECK-LABEL: bitcast_v64i32:
|
157 |
| -; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 |
158 |
| -; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] |
159 |
| -; VBITS_GE_2048-NEXT: st1w { [[OP]].s }, [[PG]], [x1] |
160 |
| -; VBITS_GE_2048-NEXT: ret |
| 173 | +; CHECK: // %bb.0: |
| 174 | +; CHECK-NEXT: ptrue p0.s, vl64 |
| 175 | +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] |
| 176 | +; CHECK-NEXT: st1w { z0.s }, p0, [x1] |
| 177 | +; CHECK-NEXT: ret |
161 | 178 | %load = load volatile <64 x i32>, <64 x i32>* %a
|
162 | 179 | %cast = bitcast <64 x i32> %load to <64 x float>
|
163 | 180 | store volatile <64 x float> %cast, <64 x float>* %b
|
164 | 181 | ret void
|
165 | 182 | }
|
166 | 183 |
|
167 | 184 | ; Don't use SVE for 64-bit vectors.
|
168 |
| -define void @bitcast_v1i64(<1 x i64> *%a, <1 x double>* %b) #0 { |
| 185 | +define void @bitcast_v1i64(<1 x i64> *%a, <1 x double>* %b) vscale_range(2,0) #0 { |
169 | 186 | ; CHECK-LABEL: bitcast_v1i64:
|
170 |
| -; CHECK: ldr d0, [x0] |
171 |
| -; CHECK-NEXT: str d0, [x1] |
172 |
| -; CHECK-NEXT: ret |
| 187 | +; CHECK: // %bb.0: |
| 188 | +; CHECK-NEXT: ldr d0, [x0] |
| 189 | +; CHECK-NEXT: str d0, [x1] |
| 190 | +; CHECK-NEXT: ret |
173 | 191 | %load = load volatile <1 x i64>, <1 x i64>* %a
|
174 | 192 | %cast = bitcast <1 x i64> %load to <1 x double>
|
175 | 193 | store volatile <1 x double> %cast, <1 x double>* %b
|
176 | 194 | ret void
|
177 | 195 | }
|
178 | 196 |
|
179 | 197 | ; Don't use SVE for 128-bit vectors.
|
180 |
| -define void @bitcast_v2i64(<2 x i64> *%a, <2 x double>* %b) #0 { |
| 198 | +define void @bitcast_v2i64(<2 x i64> *%a, <2 x double>* %b) vscale_range(2,0) #0 { |
181 | 199 | ; CHECK-LABEL: bitcast_v2i64:
|
182 |
| -; CHECK: ldr q0, [x0] |
183 |
| -; CHECK-NEXT: str q0, [x1] |
184 |
| -; CHECK-NEXT: ret |
| 200 | +; CHECK: // %bb.0: |
| 201 | +; CHECK-NEXT: ldr q0, [x0] |
| 202 | +; CHECK-NEXT: str q0, [x1] |
| 203 | +; CHECK-NEXT: ret |
185 | 204 | %load = load volatile <2 x i64>, <2 x i64>* %a
|
186 | 205 | %cast = bitcast <2 x i64> %load to <2 x double>
|
187 | 206 | store volatile <2 x double> %cast, <2 x double>* %b
|
188 | 207 | ret void
|
189 | 208 | }
|
190 | 209 |
|
191 |
| -define void @bitcast_v4i64(<4 x i64> *%a, <4 x double>* %b) #0 { |
| 210 | +define void @bitcast_v4i64(<4 x i64> *%a, <4 x double>* %b) vscale_range(2,0) #0 { |
192 | 211 | ; CHECK-LABEL: bitcast_v4i64:
|
193 |
| -; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 |
194 |
| -; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] |
195 |
| -; CHECK-NEXT: st1d { [[OP]].d }, [[PG]], [x1] |
196 |
| -; CHECK-NEXT: ret |
| 212 | +; CHECK: // %bb.0: |
| 213 | +; CHECK-NEXT: ptrue p0.d, vl4 |
| 214 | +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] |
| 215 | +; CHECK-NEXT: st1d { z0.d }, p0, [x1] |
| 216 | +; CHECK-NEXT: ret |
197 | 217 | %load = load volatile <4 x i64>, <4 x i64>* %a
|
198 | 218 | %cast = bitcast <4 x i64> %load to <4 x double>
|
199 | 219 | store volatile <4 x double> %cast, <4 x double>* %b
|
200 | 220 | ret void
|
201 | 221 | }
|
202 | 222 |
|
203 | 223 | define void @bitcast_v8i64(<8 x i64> *%a, <8 x double>* %b) #0 {
|
204 |
| -; CHECK-LABEL: bitcast_v8i64: |
205 |
| -; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 |
206 |
| -; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] |
207 |
| -; VBITS_GE_512-NEXT: st1d { [[OP]].d }, [[PG]], [x1] |
208 |
| -; VBITS_GE_512-NEXT: ret |
| 224 | +; VBITS_GE_256-LABEL: bitcast_v8i64: |
| 225 | +; VBITS_GE_256: // %bb.0: |
| 226 | +; VBITS_GE_256-NEXT: mov x8, #4 |
| 227 | +; VBITS_GE_256-NEXT: ptrue p0.d, vl4 |
| 228 | +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] |
| 229 | +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] |
| 230 | +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] |
| 231 | +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] |
| 232 | +; VBITS_GE_256-NEXT: ret |
| 233 | +; |
| 234 | +; VBITS_GE_512-LABEL: bitcast_v8i64: |
| 235 | +; VBITS_GE_512: // %bb.0: |
| 236 | +; VBITS_GE_512-NEXT: ptrue p0.d, vl8 |
| 237 | +; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] |
| 238 | +; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] |
| 239 | +; VBITS_GE_512-NEXT: ret |
209 | 240 | %load = load volatile <8 x i64>, <8 x i64>* %a
|
210 | 241 | %cast = bitcast <8 x i64> %load to <8 x double>
|
211 | 242 | store volatile <8 x double> %cast, <8 x double>* %b
|
212 | 243 | ret void
|
213 | 244 | }
|
214 | 245 |
|
215 |
| -define void @bitcast_v16i64(<16 x i64> *%a, <16 x double>* %b) #0 { |
| 246 | +define void @bitcast_v16i64(<16 x i64> *%a, <16 x double>* %b) vscale_range(8,0) #0 { |
216 | 247 | ; CHECK-LABEL: bitcast_v16i64:
|
217 |
| -; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 |
218 |
| -; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] |
219 |
| -; VBITS_GE_1024-NEXT: st1d { [[OP]].d }, [[PG]], [x1] |
220 |
| -; VBITS_GE_1024-NEXT: ret |
| 248 | +; CHECK: // %bb.0: |
| 249 | +; CHECK-NEXT: ptrue p0.d, vl16 |
| 250 | +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] |
| 251 | +; CHECK-NEXT: st1d { z0.d }, p0, [x1] |
| 252 | +; CHECK-NEXT: ret |
221 | 253 | %load = load volatile <16 x i64>, <16 x i64>* %a
|
222 | 254 | %cast = bitcast <16 x i64> %load to <16 x double>
|
223 | 255 | store volatile <16 x double> %cast, <16 x double>* %b
|
224 | 256 | ret void
|
225 | 257 | }
|
226 | 258 |
|
227 |
| -define void @bitcast_v32i64(<32 x i64> *%a, <32 x double>* %b) #0 { |
| 259 | +define void @bitcast_v32i64(<32 x i64> *%a, <32 x double>* %b) vscale_range(16,0) #0 { |
228 | 260 | ; CHECK-LABEL: bitcast_v32i64:
|
229 |
| -; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 |
230 |
| -; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] |
231 |
| -; VBITS_GE_2048-NEXT: st1d { [[OP]].d }, [[PG]], [x1] |
232 |
| -; VBITS_GE_2048-NEXT: ret |
| 261 | +; CHECK: // %bb.0: |
| 262 | +; CHECK-NEXT: ptrue p0.d, vl32 |
| 263 | +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] |
| 264 | +; CHECK-NEXT: st1d { z0.d }, p0, [x1] |
| 265 | +; CHECK-NEXT: ret |
233 | 266 | %load = load volatile <32 x i64>, <32 x i64>* %a
|
234 | 267 | %cast = bitcast <32 x i64> %load to <32 x double>
|
235 | 268 | store volatile <32 x double> %cast, <32 x double>* %b
|
|
0 commit comments