|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s |
3 |
| -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s |
4 |
| -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB |
5 |
| -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB |
| 2 | +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s |
| 3 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s |
| 4 | +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s |
| 5 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s |
| 6 | +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB |
| 7 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB |
6 | 8 |
|
7 | 9 | ; Integers
|
8 | 10 |
|
@@ -364,6 +366,62 @@ declare <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64>
|
364 | 366 |
|
365 | 367 | ; Floats
|
366 | 368 |
|
| 369 | +define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) { |
| 370 | +; CHECK-LABEL: vector_interleave_nxv4bf16_nxv2bf16: |
| 371 | +; CHECK: # %bb.0: |
| 372 | +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma |
| 373 | +; CHECK-NEXT: vwaddu.vv v10, v8, v9 |
| 374 | +; CHECK-NEXT: li a0, -1 |
| 375 | +; CHECK-NEXT: vwmaccu.vx v10, a0, v9 |
| 376 | +; CHECK-NEXT: csrr a0, vlenb |
| 377 | +; CHECK-NEXT: srli a0, a0, 2 |
| 378 | +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| 379 | +; CHECK-NEXT: vslidedown.vx v8, v10, a0 |
| 380 | +; CHECK-NEXT: add a1, a0, a0 |
| 381 | +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma |
| 382 | +; CHECK-NEXT: vslideup.vx v10, v8, a0 |
| 383 | +; CHECK-NEXT: vmv.v.v v8, v10 |
| 384 | +; CHECK-NEXT: ret |
| 385 | +; |
| 386 | +; ZVBB-LABEL: vector_interleave_nxv4bf16_nxv2bf16: |
| 387 | +; ZVBB: # %bb.0: |
| 388 | +; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma |
| 389 | +; ZVBB-NEXT: vwsll.vi v10, v9, 16 |
| 390 | +; ZVBB-NEXT: vwaddu.wv v10, v10, v8 |
| 391 | +; ZVBB-NEXT: csrr a0, vlenb |
| 392 | +; ZVBB-NEXT: srli a0, a0, 2 |
| 393 | +; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| 394 | +; ZVBB-NEXT: vslidedown.vx v8, v10, a0 |
| 395 | +; ZVBB-NEXT: add a1, a0, a0 |
| 396 | +; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma |
| 397 | +; ZVBB-NEXT: vslideup.vx v10, v8, a0 |
| 398 | +; ZVBB-NEXT: vmv.v.v v8, v10 |
| 399 | +; ZVBB-NEXT: ret |
| 400 | + %res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) |
| 401 | + ret <vscale x 4 x bfloat> %res |
| 402 | +} |
| 403 | + |
| 404 | +define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) { |
| 405 | +; CHECK-LABEL: vector_interleave_nxv8bf16_nxv4bf16: |
| 406 | +; CHECK: # %bb.0: |
| 407 | +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma |
| 408 | +; CHECK-NEXT: vwaddu.vv v10, v8, v9 |
| 409 | +; CHECK-NEXT: li a0, -1 |
| 410 | +; CHECK-NEXT: vwmaccu.vx v10, a0, v9 |
| 411 | +; CHECK-NEXT: vmv2r.v v8, v10 |
| 412 | +; CHECK-NEXT: ret |
| 413 | +; |
| 414 | +; ZVBB-LABEL: vector_interleave_nxv8bf16_nxv4bf16: |
| 415 | +; ZVBB: # %bb.0: |
| 416 | +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma |
| 417 | +; ZVBB-NEXT: vwsll.vi v10, v9, 16 |
| 418 | +; ZVBB-NEXT: vwaddu.wv v10, v10, v8 |
| 419 | +; ZVBB-NEXT: vmv2r.v v8, v10 |
| 420 | +; ZVBB-NEXT: ret |
| 421 | + %res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) |
| 422 | + ret <vscale x 8 x bfloat> %res |
| 423 | +} |
| 424 | + |
367 | 425 | define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
|
368 | 426 | ; CHECK-LABEL: vector_interleave_nxv4f16_nxv2f16:
|
369 | 427 | ; CHECK: # %bb.0:
|
@@ -442,6 +500,27 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
|
442 | 500 | ret <vscale x 4 x float> %res
|
443 | 501 | }
|
444 | 502 |
|
| 503 | +define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) { |
| 504 | +; CHECK-LABEL: vector_interleave_nxv16bf16_nxv8bf16: |
| 505 | +; CHECK: # %bb.0: |
| 506 | +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma |
| 507 | +; CHECK-NEXT: vwaddu.vv v12, v8, v10 |
| 508 | +; CHECK-NEXT: li a0, -1 |
| 509 | +; CHECK-NEXT: vwmaccu.vx v12, a0, v10 |
| 510 | +; CHECK-NEXT: vmv4r.v v8, v12 |
| 511 | +; CHECK-NEXT: ret |
| 512 | +; |
| 513 | +; ZVBB-LABEL: vector_interleave_nxv16bf16_nxv8bf16: |
| 514 | +; ZVBB: # %bb.0: |
| 515 | +; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma |
| 516 | +; ZVBB-NEXT: vwsll.vi v12, v10, 16 |
| 517 | +; ZVBB-NEXT: vwaddu.wv v12, v12, v8 |
| 518 | +; ZVBB-NEXT: vmv4r.v v8, v12 |
| 519 | +; ZVBB-NEXT: ret |
| 520 | + %res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) |
| 521 | + ret <vscale x 16 x bfloat> %res |
| 522 | +} |
| 523 | + |
445 | 524 | define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
|
446 | 525 | ; CHECK-LABEL: vector_interleave_nxv16f16_nxv8f16:
|
447 | 526 | ; CHECK: # %bb.0:
|
@@ -527,6 +606,33 @@ declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x hal
|
527 | 606 | declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>)
|
528 | 607 | declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
|
529 | 608 |
|
| 609 | +define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) { |
| 610 | +; CHECK-LABEL: vector_interleave_nxv64bf16_nxv32bf16: |
| 611 | +; CHECK: # %bb.0: |
| 612 | +; CHECK-NEXT: vmv8r.v v24, v8 |
| 613 | +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| 614 | +; CHECK-NEXT: vwaddu.vv v8, v24, v16 |
| 615 | +; CHECK-NEXT: li a0, -1 |
| 616 | +; CHECK-NEXT: vwmaccu.vx v8, a0, v16 |
| 617 | +; CHECK-NEXT: vwaddu.vv v0, v28, v20 |
| 618 | +; CHECK-NEXT: vwmaccu.vx v0, a0, v20 |
| 619 | +; CHECK-NEXT: vmv8r.v v16, v0 |
| 620 | +; CHECK-NEXT: ret |
| 621 | +; |
| 622 | +; ZVBB-LABEL: vector_interleave_nxv64bf16_nxv32bf16: |
| 623 | +; ZVBB: # %bb.0: |
| 624 | +; ZVBB-NEXT: vmv8r.v v24, v8 |
| 625 | +; ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| 626 | +; ZVBB-NEXT: vwsll.vi v8, v16, 16 |
| 627 | +; ZVBB-NEXT: vwaddu.wv v8, v8, v24 |
| 628 | +; ZVBB-NEXT: vwsll.vi v0, v20, 16 |
| 629 | +; ZVBB-NEXT: vwaddu.wv v0, v0, v28 |
| 630 | +; ZVBB-NEXT: vmv8r.v v16, v0 |
| 631 | +; ZVBB-NEXT: ret |
| 632 | + %res = call <vscale x 64 x bfloat> @llvm.vector.interleave2.nxv64bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) |
| 633 | + ret <vscale x 64 x bfloat> %res |
| 634 | +} |
| 635 | + |
530 | 636 | define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) {
|
531 | 637 | ; CHECK-LABEL: vector_interleave_nxv64f16_nxv32f16:
|
532 | 638 | ; CHECK: # %bb.0:
|
|
0 commit comments