-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Lower interleave + deinterleave for zvfhmin and zvfbfmin #108404
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Lower interleave + deinterleave for zvfhmin and zvfbfmin #108404
Conversation
Fortunately f16 and bf16 are always < EEW, so we can always lower via widening or narrowing. This means we don't need to add patterns for vrgather_vv_vl just yet.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesFortunately f16 and bf16 are always < EEW, so we can always lower via widening or narrowing. This means we don't need to add patterns for vrgather_vv_vl just yet. Full diff: https://github.com/llvm/llvm-project/pull/108404.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab52f977c344dd..ccc74ca9965899 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1078,7 +1078,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
- ISD::EXTRACT_SUBVECTOR},
+ ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_DEINTERLEAVE},
VT, Custom);
if (Subtarget.hasStdExtZfhmin())
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@@ -1117,7 +1118,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
- ISD::EXTRACT_SUBVECTOR},
+ ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_DEINTERLEAVE},
VT, Custom);
if (Subtarget.hasStdExtZfbfmin())
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index efc17b70923aa0..28f7eb4329e3b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
; Integers
@@ -255,6 +257,18 @@ declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv1
; Floats
+define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnsrl.wi v9, v8, 16
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
+ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
+}
+
define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv4f16(<vscale x 4 x half> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
; CHECK: # %bb.0:
@@ -267,6 +281,19 @@ define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_n
ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
}
+define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-NEXT: vnsrl.wi v11, v8, 16
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: vmv.v.v v9, v11
+; CHECK-NEXT: ret
+%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
+ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
+}
+
define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
; CHECK: # %bb.0:
@@ -294,6 +321,19 @@ define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32
ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
}
+define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv16bf16(<vscale x 16 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-NEXT: vnsrl.wi v14, v8, 16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: vmv.v.v v10, v14
+; CHECK-NEXT: ret
+%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
+ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
+}
+
define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
; CHECK: # %bb.0:
@@ -344,6 +384,21 @@ declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nx
declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
+define {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @vector_deinterleave_nxv32bf16_nxv64bf16(<vscale x 64 x bfloat> %vec) {
+; CHECK-LABEL: vector_deinterleave_nxv32bf16_nxv64bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v24, 0
+; CHECK-NEXT: vnsrl.wi v12, v16, 0
+; CHECK-NEXT: vnsrl.wi v0, v24, 16
+; CHECK-NEXT: vnsrl.wi v4, v16, 16
+; CHECK-NEXT: vmv8r.v v16, v0
+; CHECK-NEXT: ret
+%retval = call {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @llvm.vector.deinterleave2.nxv64bf16(<vscale x 64 x bfloat> %vec)
+ret {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} %retval
+}
+
define {<vscale x 32 x half>, <vscale x 32 x half>} @vector_deinterleave_nxv32f16_nxv64f16(<vscale x 64 x half> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv32f16_nxv64f16:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 2e9f62e2f552c1..83c235d8e87ab7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
; Integers
@@ -364,6 +366,62 @@ declare <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64>
; Floats
+define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vwmaccu.vx v10, a0, v9
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v10, a0
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vwsll.vi v10, v9, 16
+; ZVBB-NEXT: vwaddu.wv v10, v10, v8
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: srli a0, a0, 2
+; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslidedown.vx v8, v10, a0
+; ZVBB-NEXT: add a1, a0, a0
+; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v10, v8, a0
+; ZVBB-NEXT: vmv.v.v v8, v10
+; ZVBB-NEXT: ret
+ %res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
+ ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vwaddu.vv v10, v8, v9
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vwmaccu.vx v10, a0, v9
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vwsll.vi v10, v9, 16
+; ZVBB-NEXT: vwaddu.wv v10, v10, v8
+; ZVBB-NEXT: vmv2r.v v8, v10
+; ZVBB-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
+ ret <vscale x 8 x bfloat> %res
+}
+
define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
; CHECK-LABEL: vector_interleave_nxv4f16_nxv2f16:
; CHECK: # %bb.0:
@@ -442,6 +500,27 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
ret <vscale x 4 x float> %res
}
+define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vwaddu.vv v12, v8, v10
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vwmaccu.vx v12, a0, v10
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVBB-NEXT: vwsll.vi v12, v10, 16
+; ZVBB-NEXT: vwaddu.wv v12, v12, v8
+; ZVBB-NEXT: vmv4r.v v8, v12
+; ZVBB-NEXT: ret
+ %res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
+ ret <vscale x 16 x bfloat> %res
+}
+
define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: vector_interleave_nxv16f16_nxv8f16:
; CHECK: # %bb.0:
@@ -527,6 +606,33 @@ declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x hal
declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
+; CHECK-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vwaddu.vv v8, v24, v16
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vwmaccu.vx v8, a0, v16
+; CHECK-NEXT: vwaddu.vv v0, v28, v20
+; CHECK-NEXT: vwmaccu.vx v0, a0, v20
+; CHECK-NEXT: vmv8r.v v16, v0
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vmv8r.v v24, v8
+; ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVBB-NEXT: vwsll.vi v8, v16, 16
+; ZVBB-NEXT: vwaddu.wv v8, v8, v24
+; ZVBB-NEXT: vwsll.vi v0, v20, 16
+; ZVBB-NEXT: vwaddu.wv v0, v0, v28
+; ZVBB-NEXT: vmv8r.v v16, v0
+; ZVBB-NEXT: ret
+ %res = call <vscale x 64 x bfloat> @llvm.vector.interleave2.nxv64bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
+ ret <vscale x 64 x bfloat> %res
+}
+
define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) {
; CHECK-LABEL: vector_interleave_nxv64f16_nxv32f16:
; CHECK: # %bb.0:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fortunately f16 and bf16 are always < EEW, so we can always lower via widening or narrowing. This means we don't need to add patterns for vrgather_vv_vl just yet.