|
6 | 6 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8
|
7 | 7 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8
|
8 | 8 | ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v2i8
|
9 |
| -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v3i8 |
10 |
| -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v4i8 |
11 |
| -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v8i8 |
12 |
| -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v16i8 |
13 |
| -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_str_v32i8 |
14 | 9 |
|
15 | 10 | define <2 x i8> @dup_v2i8(i8 %a) {
|
16 | 11 | ; CHECK-LABEL: dup_v2i8:
|
@@ -127,14 +122,25 @@ entry:
|
127 | 122 | }
|
128 | 123 |
|
129 | 124 | define <3 x i8> @loaddup_str_v3i8(ptr %p) {
|
130 |
| -; CHECK-LABEL: loaddup_str_v3i8: |
131 |
| -; CHECK: // %bb.0: // %entry |
132 |
| -; CHECK-NEXT: mov x8, x0 |
133 |
| -; CHECK-NEXT: ldrb w0, [x0] |
134 |
| -; CHECK-NEXT: strb wzr, [x8] |
135 |
| -; CHECK-NEXT: mov w1, w0 |
136 |
| -; CHECK-NEXT: mov w2, w0 |
137 |
| -; CHECK-NEXT: ret |
| 125 | +; CHECK-SD-LABEL: loaddup_str_v3i8: |
| 126 | +; CHECK-SD: // %bb.0: // %entry |
| 127 | +; CHECK-SD-NEXT: mov x8, x0 |
| 128 | +; CHECK-SD-NEXT: ldrb w0, [x0] |
| 129 | +; CHECK-SD-NEXT: strb wzr, [x8] |
| 130 | +; CHECK-SD-NEXT: mov w1, w0 |
| 131 | +; CHECK-SD-NEXT: mov w2, w0 |
| 132 | +; CHECK-SD-NEXT: ret |
| 133 | +; |
| 134 | +; CHECK-GI-LABEL: loaddup_str_v3i8: |
| 135 | +; CHECK-GI: // %bb.0: // %entry |
| 136 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 137 | +; CHECK-GI-NEXT: mov x8, x0 |
| 138 | +; CHECK-GI-NEXT: strb wzr, [x8] |
| 139 | +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] |
| 140 | +; CHECK-GI-NEXT: umov w0, v0.b[0] |
| 141 | +; CHECK-GI-NEXT: umov w1, v0.b[1] |
| 142 | +; CHECK-GI-NEXT: umov w2, v0.b[2] |
| 143 | +; CHECK-GI-NEXT: ret |
138 | 144 | entry:
|
139 | 145 | %a = load i8, ptr %p
|
140 | 146 | %b = insertelement <3 x i8> poison, i8 %a, i64 0
|
@@ -201,12 +207,21 @@ entry:
|
201 | 207 | }
|
202 | 208 |
|
203 | 209 | define <4 x i8> @loaddup_str_v4i8(ptr %p) {
|
204 |
| -; CHECK-LABEL: loaddup_str_v4i8: |
205 |
| -; CHECK: // %bb.0: // %entry |
206 |
| -; CHECK-NEXT: ldrb w8, [x0] |
207 |
| -; CHECK-NEXT: strb wzr, [x0] |
208 |
| -; CHECK-NEXT: dup v0.4h, w8 |
209 |
| -; CHECK-NEXT: ret |
| 210 | +; CHECK-SD-LABEL: loaddup_str_v4i8: |
| 211 | +; CHECK-SD: // %bb.0: // %entry |
| 212 | +; CHECK-SD-NEXT: ldrb w8, [x0] |
| 213 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 214 | +; CHECK-SD-NEXT: dup v0.4h, w8 |
| 215 | +; CHECK-SD-NEXT: ret |
| 216 | +; |
| 217 | +; CHECK-GI-LABEL: loaddup_str_v4i8: |
| 218 | +; CHECK-GI: // %bb.0: // %entry |
| 219 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 220 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 221 | +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] |
| 222 | +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 |
| 223 | +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 |
| 224 | +; CHECK-GI-NEXT: ret |
210 | 225 | entry:
|
211 | 226 | %a = load i8, ptr %p
|
212 | 227 | %b = insertelement <4 x i8> poison, i8 %a, i64 0
|
@@ -250,11 +265,18 @@ entry:
|
250 | 265 | }
|
251 | 266 |
|
252 | 267 | define <8 x i8> @loaddup_str_v8i8(ptr %p) {
|
253 |
| -; CHECK-LABEL: loaddup_str_v8i8: |
254 |
| -; CHECK: // %bb.0: // %entry |
255 |
| -; CHECK-NEXT: ld1r { v0.8b }, [x0] |
256 |
| -; CHECK-NEXT: strb wzr, [x0] |
257 |
| -; CHECK-NEXT: ret |
| 268 | +; CHECK-SD-LABEL: loaddup_str_v8i8: |
| 269 | +; CHECK-SD: // %bb.0: // %entry |
| 270 | +; CHECK-SD-NEXT: ld1r { v0.8b }, [x0] |
| 271 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 272 | +; CHECK-SD-NEXT: ret |
| 273 | +; |
| 274 | +; CHECK-GI-LABEL: loaddup_str_v8i8: |
| 275 | +; CHECK-GI: // %bb.0: // %entry |
| 276 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 277 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 278 | +; CHECK-GI-NEXT: dup v0.8b, v0.b[0] |
| 279 | +; CHECK-GI-NEXT: ret |
258 | 280 | entry:
|
259 | 281 | %a = load i8, ptr %p
|
260 | 282 | %b = insertelement <8 x i8> poison, i8 %a, i64 0
|
@@ -297,11 +319,18 @@ entry:
|
297 | 319 | }
|
298 | 320 |
|
299 | 321 | define <16 x i8> @loaddup_str_v16i8(ptr %p) {
|
300 |
| -; CHECK-LABEL: loaddup_str_v16i8: |
301 |
| -; CHECK: // %bb.0: // %entry |
302 |
| -; CHECK-NEXT: ld1r { v0.16b }, [x0] |
303 |
| -; CHECK-NEXT: strb wzr, [x0] |
304 |
| -; CHECK-NEXT: ret |
| 322 | +; CHECK-SD-LABEL: loaddup_str_v16i8: |
| 323 | +; CHECK-SD: // %bb.0: // %entry |
| 324 | +; CHECK-SD-NEXT: ld1r { v0.16b }, [x0] |
| 325 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 326 | +; CHECK-SD-NEXT: ret |
| 327 | +; |
| 328 | +; CHECK-GI-LABEL: loaddup_str_v16i8: |
| 329 | +; CHECK-GI: // %bb.0: // %entry |
| 330 | +; CHECK-GI-NEXT: ldr b0, [x0] |
| 331 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 332 | +; CHECK-GI-NEXT: dup v0.16b, v0.b[0] |
| 333 | +; CHECK-GI-NEXT: ret |
305 | 334 | entry:
|
306 | 335 | %a = load i8, ptr %p
|
307 | 336 | %b = insertelement <16 x i8> poison, i8 %a, i64 0
|
@@ -353,12 +382,20 @@ entry:
|
353 | 382 | }
|
354 | 383 |
|
355 | 384 | define <32 x i8> @loaddup_str_v32i8(ptr %p) {
|
356 |
| -; CHECK-LABEL: loaddup_str_v32i8: |
357 |
| -; CHECK: // %bb.0: // %entry |
358 |
| -; CHECK-NEXT: ld1r { v0.16b }, [x0] |
359 |
| -; CHECK-NEXT: strb wzr, [x0] |
360 |
| -; CHECK-NEXT: mov v1.16b, v0.16b |
361 |
| -; CHECK-NEXT: ret |
| 385 | +; CHECK-SD-LABEL: loaddup_str_v32i8: |
| 386 | +; CHECK-SD: // %bb.0: // %entry |
| 387 | +; CHECK-SD-NEXT: ld1r { v0.16b }, [x0] |
| 388 | +; CHECK-SD-NEXT: strb wzr, [x0] |
| 389 | +; CHECK-SD-NEXT: mov v1.16b, v0.16b |
| 390 | +; CHECK-SD-NEXT: ret |
| 391 | +; |
| 392 | +; CHECK-GI-LABEL: loaddup_str_v32i8: |
| 393 | +; CHECK-GI: // %bb.0: // %entry |
| 394 | +; CHECK-GI-NEXT: ldr b1, [x0] |
| 395 | +; CHECK-GI-NEXT: strb wzr, [x0] |
| 396 | +; CHECK-GI-NEXT: dup v0.16b, v1.b[0] |
| 397 | +; CHECK-GI-NEXT: dup v1.16b, v1.b[0] |
| 398 | +; CHECK-GI-NEXT: ret |
362 | 399 | entry:
|
363 | 400 | %a = load i8, ptr %p
|
364 | 401 | %b = insertelement <32 x i8> poison, i8 %a, i64 0
|
|
0 commit comments