|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s |
| 2 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,V |
| 3 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH |
3 | 4 |
|
4 | 5 | declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
|
5 | 6 | declare void @g()
|
@@ -327,3 +328,168 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
|
327 | 328 | store <16 x i8> %x1, ptr %q1
|
328 | 329 | ret void
|
329 | 330 | }
|
| 331 | + |
| 332 | +; TODO: We fail to merge these, which would be profitable. |
| 333 | +define void @two_half(ptr %p, ptr %q) { |
| 334 | +; V-LABEL: two_half: |
| 335 | +; V: # %bb.0: |
| 336 | +; V-NEXT: addi sp, sp, -32 |
| 337 | +; V-NEXT: .cfi_def_cfa_offset 32 |
| 338 | +; V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| 339 | +; V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| 340 | +; V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| 341 | +; V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill |
| 342 | +; V-NEXT: .cfi_offset ra, -8 |
| 343 | +; V-NEXT: .cfi_offset s0, -16 |
| 344 | +; V-NEXT: .cfi_offset s1, -24 |
| 345 | +; V-NEXT: .cfi_offset s2, -32 |
| 346 | +; V-NEXT: lh s1, 0(a0) |
| 347 | +; V-NEXT: lh s2, 2(a0) |
| 348 | +; V-NEXT: mv s0, a1 |
| 349 | +; V-NEXT: call g |
| 350 | +; V-NEXT: sh s1, 0(s0) |
| 351 | +; V-NEXT: sh s2, 2(s0) |
| 352 | +; V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| 353 | +; V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| 354 | +; V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| 355 | +; V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload |
| 356 | +; V-NEXT: .cfi_restore ra |
| 357 | +; V-NEXT: .cfi_restore s0 |
| 358 | +; V-NEXT: .cfi_restore s1 |
| 359 | +; V-NEXT: .cfi_restore s2 |
| 360 | +; V-NEXT: addi sp, sp, 32 |
| 361 | +; V-NEXT: .cfi_def_cfa_offset 0 |
| 362 | +; V-NEXT: ret |
| 363 | +; |
| 364 | +; ZVFH-LABEL: two_half: |
| 365 | +; ZVFH: # %bb.0: |
| 366 | +; ZVFH-NEXT: addi sp, sp, -32 |
| 367 | +; ZVFH-NEXT: .cfi_def_cfa_offset 32 |
| 368 | +; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| 369 | +; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| 370 | +; ZVFH-NEXT: .cfi_offset ra, -8 |
| 371 | +; ZVFH-NEXT: .cfi_offset s0, -16 |
| 372 | +; ZVFH-NEXT: csrr a2, vlenb |
| 373 | +; ZVFH-NEXT: sub sp, sp, a2 |
| 374 | +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb |
| 375 | +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma |
| 376 | +; ZVFH-NEXT: vle16.v v8, (a0) |
| 377 | +; ZVFH-NEXT: addi a0, sp, 16 |
| 378 | +; ZVFH-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill |
| 379 | +; ZVFH-NEXT: mv s0, a1 |
| 380 | +; ZVFH-NEXT: call g |
| 381 | +; ZVFH-NEXT: addi a0, sp, 16 |
| 382 | +; ZVFH-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload |
| 383 | +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma |
| 384 | +; ZVFH-NEXT: vse16.v v8, (s0) |
| 385 | +; ZVFH-NEXT: csrr a0, vlenb |
| 386 | +; ZVFH-NEXT: add sp, sp, a0 |
| 387 | +; ZVFH-NEXT: .cfi_def_cfa sp, 32 |
| 388 | +; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| 389 | +; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| 390 | +; ZVFH-NEXT: .cfi_restore ra |
| 391 | +; ZVFH-NEXT: .cfi_restore s0 |
| 392 | +; ZVFH-NEXT: addi sp, sp, 32 |
| 393 | +; ZVFH-NEXT: .cfi_def_cfa_offset 0 |
| 394 | +; ZVFH-NEXT: ret |
| 395 | + %p0 = getelementptr i8, ptr %p, i64 0 |
| 396 | + %p1 = getelementptr i8, ptr %p, i64 2 |
| 397 | + %x0 = load half, ptr %p0 |
| 398 | + %x1 = load half, ptr %p1 |
| 399 | + call void @g() |
| 400 | + %q0 = getelementptr i8, ptr %q, i64 0 |
| 401 | + %q1 = getelementptr i8, ptr %q, i64 2 |
| 402 | + store half %x0, ptr %q0 |
| 403 | + store half %x1, ptr %q1 |
| 404 | + ret void |
| 405 | +} |
| 406 | + |
| 407 | +; TODO: This one is currently a vector which is unprofitable, we should |
| 408 | +; use i64 instead. |
| 409 | +define void @two_float(ptr %p, ptr %q) { |
| 410 | +; CHECK-LABEL: two_float: |
| 411 | +; CHECK: # %bb.0: |
| 412 | +; CHECK-NEXT: addi sp, sp, -32 |
| 413 | +; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| 414 | +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| 415 | +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| 416 | +; CHECK-NEXT: .cfi_offset ra, -8 |
| 417 | +; CHECK-NEXT: .cfi_offset s0, -16 |
| 418 | +; CHECK-NEXT: csrr a2, vlenb |
| 419 | +; CHECK-NEXT: sub sp, sp, a2 |
| 420 | +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb |
| 421 | +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma |
| 422 | +; CHECK-NEXT: vle32.v v8, (a0) |
| 423 | +; CHECK-NEXT: addi a0, sp, 16 |
| 424 | +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill |
| 425 | +; CHECK-NEXT: mv s0, a1 |
| 426 | +; CHECK-NEXT: call g |
| 427 | +; CHECK-NEXT: addi a0, sp, 16 |
| 428 | +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload |
| 429 | +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma |
| 430 | +; CHECK-NEXT: vse32.v v8, (s0) |
| 431 | +; CHECK-NEXT: csrr a0, vlenb |
| 432 | +; CHECK-NEXT: add sp, sp, a0 |
| 433 | +; CHECK-NEXT: .cfi_def_cfa sp, 32 |
| 434 | +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| 435 | +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| 436 | +; CHECK-NEXT: .cfi_restore ra |
| 437 | +; CHECK-NEXT: .cfi_restore s0 |
| 438 | +; CHECK-NEXT: addi sp, sp, 32 |
| 439 | +; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| 440 | +; CHECK-NEXT: ret |
| 441 | + %p0 = getelementptr i8, ptr %p, i64 0 |
| 442 | + %p1 = getelementptr i8, ptr %p, i64 4 |
| 443 | + %x0 = load float, ptr %p0 |
| 444 | + %x1 = load float, ptr %p1 |
| 445 | + call void @g() |
| 446 | + %q0 = getelementptr i8, ptr %q, i64 0 |
| 447 | + %q1 = getelementptr i8, ptr %q, i64 4 |
| 448 | + store float %x0, ptr %q0 |
| 449 | + store float %x1, ptr %q1 |
| 450 | + ret void |
| 451 | +} |
| 452 | + |
| 453 | +define void @two_double(ptr %p, ptr %q) { |
| 454 | +; CHECK-LABEL: two_double: |
| 455 | +; CHECK: # %bb.0: |
| 456 | +; CHECK-NEXT: addi sp, sp, -32 |
| 457 | +; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| 458 | +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| 459 | +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| 460 | +; CHECK-NEXT: .cfi_offset ra, -8 |
| 461 | +; CHECK-NEXT: .cfi_offset s0, -16 |
| 462 | +; CHECK-NEXT: csrr a2, vlenb |
| 463 | +; CHECK-NEXT: sub sp, sp, a2 |
| 464 | +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb |
| 465 | +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| 466 | +; CHECK-NEXT: vle64.v v8, (a0) |
| 467 | +; CHECK-NEXT: addi a0, sp, 16 |
| 468 | +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill |
| 469 | +; CHECK-NEXT: mv s0, a1 |
| 470 | +; CHECK-NEXT: call g |
| 471 | +; CHECK-NEXT: addi a0, sp, 16 |
| 472 | +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload |
| 473 | +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma |
| 474 | +; CHECK-NEXT: vse64.v v8, (s0) |
| 475 | +; CHECK-NEXT: csrr a0, vlenb |
| 476 | +; CHECK-NEXT: add sp, sp, a0 |
| 477 | +; CHECK-NEXT: .cfi_def_cfa sp, 32 |
| 478 | +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| 479 | +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| 480 | +; CHECK-NEXT: .cfi_restore ra |
| 481 | +; CHECK-NEXT: .cfi_restore s0 |
| 482 | +; CHECK-NEXT: addi sp, sp, 32 |
| 483 | +; CHECK-NEXT: .cfi_def_cfa_offset 0 |
| 484 | +; CHECK-NEXT: ret |
| 485 | + %p0 = getelementptr i8, ptr %p, i64 0 |
| 486 | + %p1 = getelementptr i8, ptr %p, i64 8 |
| 487 | + %x0 = load double, ptr %p0 |
| 488 | + %x1 = load double, ptr %p1 |
| 489 | + call void @g() |
| 490 | + %q0 = getelementptr i8, ptr %q, i64 0 |
| 491 | + %q1 = getelementptr i8, ptr %q, i64 8 |
| 492 | + store double %x0, ptr %q0 |
| 493 | + store double %x1, ptr %q1 |
| 494 | + ret void |
| 495 | +} |
0 commit comments