Skip to content

[RISCV] Lower interleave + deinterleave for zvfhmin and zvfbfmin #108404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 12, 2024

Conversation

lukel97
Copy link
Contributor

@lukel97 lukel97 commented Sep 12, 2024

Fortunately f16 and bf16 are always < EEW, so we can always lower via widening or narrowing. This means we don't need to add patterns for vrgather_vv_vl just yet.

Fortunately f16 and bf16 are always < EEW, so we can always lower via widening or narrowing. This means we don't need to add patterns for vrgather_vv_vl just yet.
@llvmbot
Copy link
Member

llvmbot commented Sep 12, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Luke Lau (lukel97)

Changes

Fortunately f16 and bf16 are always < EEW, so we can always lower via widening or narrowing. This means we don't need to add patterns for vrgather_vv_vl just yet.


Full diff: https://github.com/llvm/llvm-project/pull/108404.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4-2)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (+57-2)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (+110-4)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab52f977c344dd..ccc74ca9965899 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1078,7 +1078,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                             ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
                            VT, Custom);
         setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
-                            ISD::EXTRACT_SUBVECTOR},
+                            ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
+                            ISD::VECTOR_DEINTERLEAVE},
                            VT, Custom);
         if (Subtarget.hasStdExtZfhmin())
           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@@ -1117,7 +1118,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                            Custom);
         setOperationAction(ISD::SELECT_CC, VT, Expand);
         setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
-                            ISD::EXTRACT_SUBVECTOR},
+                            ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
+                            ISD::VECTOR_DEINTERLEAVE},
                            VT, Custom);
         if (Subtarget.hasStdExtZfbfmin())
           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index efc17b70923aa0..28f7eb4329e3b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
 
 ; Integers
 
@@ -255,6 +257,18 @@ declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv1
 
 ; Floats
 
+define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v9, v8, 16
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
+ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
+}
+
 define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv4f16(<vscale x 4 x half> %vec) {
 ; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0:
@@ -267,6 +281,19 @@ define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_n
 ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
 }
 
+define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wi v11, v8, 16
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vmv.v.v v9, v11
+; CHECK-NEXT:    ret
+%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
+ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
+}
+
 define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
 ; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
 ; CHECK:       # %bb.0:
@@ -294,6 +321,19 @@ define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32
 ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
 }
 
+define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv16bf16(<vscale x 16 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wi v14, v8, 16
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    vmv.v.v v10, v14
+; CHECK-NEXT:    ret
+%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
+ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
+}
+
 define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
 ; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
 ; CHECK:       # %bb.0:
@@ -344,6 +384,21 @@ declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nx
 declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
 declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
 
+define {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @vector_deinterleave_nxv32bf16_nxv64bf16(<vscale x 64 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv32bf16_nxv64bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv8r.v v24, v8
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vnsrl.wi v8, v24, 0
+; CHECK-NEXT:    vnsrl.wi v12, v16, 0
+; CHECK-NEXT:    vnsrl.wi v0, v24, 16
+; CHECK-NEXT:    vnsrl.wi v4, v16, 16
+; CHECK-NEXT:    vmv8r.v v16, v0
+; CHECK-NEXT:    ret
+%retval = call {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @llvm.vector.deinterleave2.nxv64bf16(<vscale x 64 x bfloat> %vec)
+ret {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} %retval
+}
+
 define {<vscale x 32 x half>, <vscale x 32 x half>} @vector_deinterleave_nxv32f16_nxv64f16(<vscale x 64 x half> %vec) {
 ; CHECK-LABEL: vector_deinterleave_nxv32f16_nxv64f16:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 2e9f62e2f552c1..83c235d8e87ab7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
 
 ; Integers
 
@@ -364,6 +366,62 @@ declare <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64>
 
 ; Floats
 
+define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    srli a0, a0, 2
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vx v8, v10, a0
+; CHECK-NEXT:    add a1, a0, a0
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vslideup.vx v10, v8, a0
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+;
+; ZVBB-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
+; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT:    vwsll.vi v10, v9, 16
+; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
+; ZVBB-NEXT:    csrr a0, vlenb
+; ZVBB-NEXT:    srli a0, a0, 2
+; ZVBB-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; ZVBB-NEXT:    vslidedown.vx v8, v10, a0
+; ZVBB-NEXT:    add a1, a0, a0
+; ZVBB-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; ZVBB-NEXT:    vslideup.vx v10, v8, a0
+; ZVBB-NEXT:    vmv.v.v v8, v10
+; ZVBB-NEXT:    ret
+  %res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
+  ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+;
+; ZVBB-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
+; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; ZVBB-NEXT:    vwsll.vi v10, v9, 16
+; ZVBB-NEXT:    vwaddu.wv v10, v10, v8
+; ZVBB-NEXT:    vmv2r.v v8, v10
+; ZVBB-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
+  ret <vscale x 8 x bfloat> %res
+}
+
 define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
 ; CHECK-LABEL: vector_interleave_nxv4f16_nxv2f16:
 ; CHECK:       # %bb.0:
@@ -442,6 +500,27 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
   ret <vscale x 4 x float> %res
 }
 
+define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwaddu.vv v12, v8, v10
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v12, a0, v10
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+;
+; ZVBB-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
+; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; ZVBB-NEXT:    vwsll.vi v12, v10, 16
+; ZVBB-NEXT:    vwaddu.wv v12, v12, v8
+; ZVBB-NEXT:    vmv4r.v v8, v12
+; ZVBB-NEXT:    ret
+  %res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
+  ret <vscale x 16 x bfloat> %res
+}
+
 define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: vector_interleave_nxv16f16_nxv8f16:
 ; CHECK:       # %bb.0:
@@ -527,6 +606,33 @@ declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x hal
 declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
 
+define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmv8r.v v24, v8
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vwaddu.vv v8, v24, v16
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v8, a0, v16
+; CHECK-NEXT:    vwaddu.vv v0, v28, v20
+; CHECK-NEXT:    vwmaccu.vx v0, a0, v20
+; CHECK-NEXT:    vmv8r.v v16, v0
+; CHECK-NEXT:    ret
+;
+; ZVBB-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
+; ZVBB:       # %bb.0:
+; ZVBB-NEXT:    vmv8r.v v24, v8
+; ZVBB-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; ZVBB-NEXT:    vwsll.vi v8, v16, 16
+; ZVBB-NEXT:    vwaddu.wv v8, v8, v24
+; ZVBB-NEXT:    vwsll.vi v0, v20, 16
+; ZVBB-NEXT:    vwaddu.wv v0, v0, v28
+; ZVBB-NEXT:    vmv8r.v v16, v0
+; ZVBB-NEXT:    ret
+  %res = call <vscale x 64 x bfloat> @llvm.vector.interleave2.nxv64bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
+  ret <vscale x 64 x bfloat> %res
+}
+
 define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) {
 ; CHECK-LABEL: vector_interleave_nxv64f16_nxv32f16:
 ; CHECK:       # %bb.0:

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@lukel97 lukel97 merged commit c6ca13d into llvm:main Sep 12, 2024
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants