|
2 | 2 |
|
3 | 3 | define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
|
4 | 4 | ; CHECK-LABEL: test_stnp_v4i64:
|
5 |
| -; CHECK-NEXT: mov d[[HI1:[0-9]+]], v1[1] |
6 |
| -; CHECK-NEXT: mov d[[HI0:[0-9]+]], v0[1] |
7 |
| -; CHECK-NEXT: stnp d1, d[[HI1]], [x0, #16] |
8 |
| -; CHECK-NEXT: stnp d0, d[[HI0]], [x0] |
| 5 | +; CHECK-NEXT: stnp q0, q1, [x0] |
9 | 6 | ; CHECK-NEXT: ret
|
10 | 7 | store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0
|
11 | 8 | ret void
|
@@ -334,6 +331,149 @@ define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
|
334 | 331 | ret void
|
335 | 332 | }
|
336 | 333 |
|
| 334 | +define void @test_stnp_v32i8(<32 x i8> %v, <32 x i8>* %ptr) { |
| 335 | +; CHECK-LABEL: _test_stnp_v32i8: |
| 336 | +; CHECK-NEXT: .cfi_startproc |
| 337 | +; CHECK-NEXT: stnp q0, q1, [x0] |
| 338 | +; CHECK-NEXT: ret |
| 339 | + |
| 340 | +entry: |
| 341 | + store <32 x i8> %v, <32 x i8>* %ptr, align 4, !nontemporal !0 |
| 342 | + ret void |
| 343 | +} |
| 344 | + |
| 345 | +define void @test_stnp_v32i16(<32 x i16> %v, <32 x i16>* %ptr) { |
| 346 | +; CHECK-LABEL: _test_stnp_v32i16: |
| 347 | +; CHECK-NEXT: .cfi_startproc |
| 348 | +; CHECK-NEXT: stnp q2, q3, [x0, #32] |
| 349 | +; CHECK-NEXT: stnp q0, q1, [x0] |
| 350 | +; CHECK-NEXT: ret |
| 351 | + |
| 352 | +entry: |
| 353 | + store <32 x i16> %v, <32 x i16>* %ptr, align 4, !nontemporal !0 |
| 354 | + ret void |
| 355 | +} |
| 356 | + |
| 357 | +define void @test_stnp_v32f16(<32 x half> %v, <32 x half>* %ptr) { |
| 358 | +; CHECK-LABEL: _test_stnp_v32f16: |
| 359 | +; CHECK-NEXT: .cfi_startproc |
| 360 | +; CHECK-NEXT: stnp q2, q3, [x0, #32] |
| 361 | +; CHECK-NEXT: stnp q0, q1, [x0] |
| 362 | +; CHECK-NEXT: ret |
| 363 | + |
| 364 | +entry: |
| 365 | + store <32 x half> %v, <32 x half>* %ptr, align 4, !nontemporal !0 |
| 366 | + ret void |
| 367 | +} |
| 368 | + |
| 369 | +define void @test_stnp_v16i32(<16 x i32> %v, <16 x i32>* %ptr) { |
| 370 | +; CHECK-LABEL: _test_stnp_v16i32: |
| 371 | +; CHECK-NEXT: .cfi_startproc |
| 372 | +; CHECK-NEXT: stnp q2, q3, [x0, #32] |
| 373 | +; CHECK-NEXT: stnp q0, q1, [x0] |
| 374 | +; CHECK-NEXT: ret |
| 375 | + |
| 376 | +entry: |
| 377 | + store <16 x i32> %v, <16 x i32>* %ptr, align 4, !nontemporal !0 |
| 378 | + ret void |
| 379 | +} |
| 380 | + |
| 381 | +define void @test_stnp_v16f32(<16 x float> %v, <16 x float>* %ptr) { |
| 382 | +; CHECK-LABEL: _test_stnp_v16f32: |
| 383 | +; CHECK-NEXT: .cfi_startproc |
| 384 | +; CHECK-NEXT: stnp q2, q3, [x0, #32] |
| 385 | +; CHECK-NEXT: stnp q0, q1, [x0] |
| 386 | +; CHECK-NEXT: ret |
| 387 | + |
| 388 | +entry: |
| 389 | + store <16 x float> %v, <16 x float>* %ptr, align 4, !nontemporal !0 |
| 390 | + ret void |
| 391 | +} |
| 392 | + |
| 393 | +define void @test_stnp_v17f32(<17 x float> %v, <17 x float>* %ptr) { |
| 394 | +; CHECK-LABEL: _test_stnp_v17f32: |
| 395 | +; CHECK-NEXT: .cfi_startproc |
| 396 | +; CHECK-NEXT: ldr s16, [sp, #16] |
| 397 | +; CHECK-NEXT: mov.s v0[1], v1[0] |
| 398 | +; CHECK-NEXT: mov.s v4[1], v5[0] |
| 399 | +; CHECK-NEXT: ldr s1, [sp] |
| 400 | +; CHECK-NEXT: add x8, sp, #20 |
| 401 | +; CHECK-NEXT: ld1.s { v16 }[1], [x8] |
| 402 | +; CHECK-NEXT: add x8, sp, #4 |
| 403 | +; CHECK-NEXT: ld1.s { v1 }[1], [x8] |
| 404 | +; CHECK-NEXT: add x8, sp, #24 |
| 405 | +; CHECK-NEXT: ld1.s { v16 }[2], [x8] |
| 406 | +; CHECK-NEXT: add x8, sp, #8 |
| 407 | +; CHECK-NEXT: ld1.s { v1 }[2], [x8] |
| 408 | +; CHECK-NEXT: add x8, sp, #28 |
| 409 | +; CHECK-NEXT: ld1.s { v16 }[3], [x8] |
| 410 | +; CHECK-NEXT: add x8, sp, #12 |
| 411 | +; CHECK-NEXT: mov.s v0[2], v2[0] |
| 412 | +; CHECK-NEXT: ldr s2, [sp, #32] |
| 413 | +; CHECK-NEXT: mov.s v4[2], v6[0] |
| 414 | +; CHECK-NEXT: mov.s v0[3], v3[0] |
| 415 | +; CHECK-NEXT: mov.s v4[3], v7[0] |
| 416 | +; CHECK-NEXT: mov d3, v4[1] |
| 417 | +; CHECK-NEXT: mov d5, v0[1] |
| 418 | +; CHECK-NEXT: ld1.s { v1 }[3], [x8] |
| 419 | +; CHECK-NEXT: stnp d4, d3, [x0, #16] |
| 420 | +; CHECK-NEXT: stnp d0, d5, [x0] |
| 421 | +; CHECK-NEXT: mov d0, v16[1] |
| 422 | +; CHECK-NEXT: mov d3, v1[1] |
| 423 | +; CHECK-NEXT: stnp d16, d0, [x0, #48] |
| 424 | +; CHECK-NEXT: stnp d1, d3, [x0, #32] |
| 425 | +; CHECK-NEXT: str s2, [x0, #64] |
| 426 | +; CHECK-NEXT: ret |
| 427 | + |
| 428 | +entry: |
| 429 | + store <17 x float> %v, <17 x float>* %ptr, align 4, !nontemporal !0 |
| 430 | + ret void |
| 431 | +} |
| 432 | +define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, <16 x i32>* %ptr) { |
| 433 | +; CHECK-LABEL: _test_stnp_v16i32_invalid_offset: |
| 434 | +; CHECK-NEXT: .cfi_startproc |
| 435 | +; CHECK-NEXT: mov w8, #32000 |
| 436 | +; CHECK-NEXT: mov w9, #32032 |
| 437 | +; CHECK-NEXT: add x8, x0, x8 |
| 438 | +; CHECK-NEXT: add x9, x0, x9 |
| 439 | +; CHECK-NEXT: stnp q2, q3, [x9] |
| 440 | +; CHECK-NEXT: stnp q0, q1, [x8] |
| 441 | +; CHECK-NEXT: ret |
| 442 | + |
| 443 | +entry: |
| 444 | + %gep = getelementptr <16 x i32>, <16 x i32>* %ptr, i32 500 |
| 445 | + store <16 x i32> %v, <16 x i32>* %gep, align 4, !nontemporal !0 |
| 446 | + ret void |
| 447 | +} |
| 448 | + |
| 449 | +define void @test_stnp_v16f64(<16 x double> %v, <16 x double>* %ptr) { |
| 450 | +; CHECK-LABEL: _test_stnp_v16f64: |
| 451 | +; CHECK-NEXT: .cfi_startproc |
| 452 | +; CHECK-NEXT: stnp q6, q7, [x0, #96] |
| 453 | +; CHECK-NEXT: stnp q4, q5, [x0, #64] |
| 454 | +; CHECK-NEXT: stnp q2, q3, [x0, #32] |
| 455 | +; CHECK-NEXT: stnp q0, q1, [x0] |
| 456 | +; CHECK-NEXT: ret |
| 457 | + |
| 458 | +entry: |
| 459 | + store <16 x double> %v, <16 x double>* %ptr, align 4, !nontemporal !0 |
| 460 | + ret void |
| 461 | +} |
| 462 | + |
| 463 | +define void @test_stnp_v16i64(<16 x i64> %v, <16 x i64>* %ptr) { |
| 464 | +; CHECK-LABEL: _test_stnp_v16i64: |
| 465 | +; CHECK-NEXT: .cfi_startproc |
| 466 | +; CHECK-NEXT: stnp q6, q7, [x0, #96] |
| 467 | +; CHECK-NEXT: stnp q4, q5, [x0, #64] |
| 468 | +; CHECK-NEXT: stnp q2, q3, [x0, #32] |
| 469 | +; CHECK-NEXT: stnp q0, q1, [x0] |
| 470 | +; CHECK-NEXT: ret |
| 471 | + |
| 472 | +entry: |
| 473 | + store <16 x i64> %v, <16 x i64>* %ptr, align 4, !nontemporal !0 |
| 474 | + ret void |
| 475 | +} |
| 476 | + |
337 | 477 | !0 = !{ i32 1 }
|
338 | 478 |
|
339 | 479 | attributes #0 = { nounwind }
|
0 commit comments