[RISCV] Use integer VTypeInfo predicate for vmv_v_v_vl pattern (#114915)

lukel97 · web-flow · commit a7d1d381d286 · 2024-11-05T13:01:36.000+08:00
When lowering fixed length f16 insert_subvector nodes at index 0 we
crashed with zvfhmin because we couldn't select vmv_v_v_vl.
This was due to the predicates requiring full zvfh, even though we only
need zve32x. Use the integer VTypeInfo instead similarly to
VPatSlideVL_VX_VI.

The extract_subvector tests aren't related but were just added for
consistency with the insert_subvector tests.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2305,7 +2305,8 @@ foreach vti = AllIntegerVectors in {
 
 // 11.16. Vector Integer Move Instructions
 foreach vti = AllVectors in {
-  let Predicates = GetVTypePredicates<vti>.Predicates in {
+  defvar ivti = GetIntVTypeInfo<vti>.Vti;
+  let Predicates = GetVTypePredicates<ivti>.Predicates in {
     def : Pat<(vti.Vector (riscv_vmv_v_v_vl vti.RegClass:$passthru,
                                             vti.RegClass:$rs2, VLOpFrag)),
               (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
@@ -1,12 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
 
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfh,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfh,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA
 
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvfh,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfh,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS %s
 
 define void @extract_v2i8_v4i8_0(ptr %x, ptr %y) {
 ; CHECK-LABEL: extract_v2i8_v4i8_0:
@@ -866,6 +872,66 @@ define <1 x i64> @extract_v1i64_v2i64_1(<2 x i64> %x) {
   ret <1 x i64> %v
 }
 
+define void @extract_v2bf16_v4bf16_0(ptr %x, ptr %y) {
+; CHECK-LABEL: extract_v2bf16_v4bf16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <4 x bfloat>, ptr %x
+  %c = call <2 x bfloat> @llvm.vector.extract.v2bf16.v4bf16(<4 x bfloat> %a, i64 0)
+  store <2 x bfloat> %c, ptr %y
+  ret void
+}
+
+define void @extract_v2bf16_v4bf16_2(ptr %x, ptr %y) {
+; CHECK-LABEL: extract_v2bf16_v4bf16_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 2
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <4 x bfloat>, ptr %x
+  %c = call <2 x bfloat> @llvm.vector.extract.v2bf16.v4bf16(<4 x bfloat> %a, i64 2)
+  store <2 x bfloat> %c, ptr %y
+  ret void
+}
+
+define void @extract_v2f16_v4f16_0(ptr %x, ptr %y) {
+; CHECK-LABEL: extract_v2f16_v4f16_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <4 x half>, ptr %x
+  %c = call <2 x half> @llvm.vector.extract.v2f16.v4f16(<4 x half> %a, i64 0)
+  store <2 x half> %c, ptr %y
+  ret void
+}
+
+define void @extract_v2f16_v4f16_2(ptr %x, ptr %y) {
+; CHECK-LABEL: extract_v2f16_v4f16_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 2
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <4 x half>, ptr %x
+  %c = call <2 x half> @llvm.vector.extract.v2f16.v4f16(<4 x half> %a, i64 2)
+  store <2 x half> %c, ptr %y
+  ret void
+}
+
 declare <2 x i1> @llvm.vector.extract.v2i1.v64i1(<64 x i1> %vec, i64 %idx)
 declare <8 x i1> @llvm.vector.extract.v8i1.v64i1(<64 x i1> %vec, i64 %idx)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -1,12 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV32VLA
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV64VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV32VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV64VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV32VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV64VLA
 
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV32VLA
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV64VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV32VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV64VLA
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zvfh,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV32VLA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zvfh,+zvfbfmin -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,VLA,RV64VLA
 
-; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS,RV32VLS %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS,RV64VLS %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvfhmin,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS,RV32VLS %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfhmin,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS,RV64VLS %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zvfh,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS,RV32VLS %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zvfh,+zvfbfmin -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS,RV64VLS %s
 
 define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
 ; VLA-LABEL: insert_nxv8i32_v2i32_0:
@@ -860,6 +866,90 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) {
   ret void
 }
 
+define <vscale x 8 x bfloat> @insert_nxv8bf16_v2bf16_0(<vscale x 8 x bfloat> %vec, ptr %svp) {
+; VLA-LABEL: insert_nxv8bf16_v2bf16_0:
+; VLA:       # %bb.0:
+; VLA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLA-NEXT:    vle16.v v10, (a0)
+; VLA-NEXT:    vsetivli zero, 2, e16, m2, tu, ma
+; VLA-NEXT:    vmv.v.v v8, v10
+; VLA-NEXT:    ret
+;
+; VLS-LABEL: insert_nxv8bf16_v2bf16_0:
+; VLS:       # %bb.0:
+; VLS-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLS-NEXT:    vle16.v v10, (a0)
+; VLS-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; VLS-NEXT:    vmv.v.v v8, v10
+; VLS-NEXT:    ret
+  %sv = load <2 x bfloat>, ptr %svp
+  %v = call <vscale x 8 x bfloat> @llvm.vector.insert.v2bf16.nxv8bf16(<vscale x 8 x bfloat> %vec, <2 x bfloat> %sv, i64 0)
+  ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @insert_nxv8bf16_v2bf16_2(<vscale x 8 x bfloat> %vec, ptr %svp) {
+; VLA-LABEL: insert_nxv8bf16_v2bf16_2:
+; VLA:       # %bb.0:
+; VLA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLA-NEXT:    vle16.v v10, (a0)
+; VLA-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
+; VLA-NEXT:    vslideup.vi v8, v10, 2
+; VLA-NEXT:    ret
+;
+; VLS-LABEL: insert_nxv8bf16_v2bf16_2:
+; VLS:       # %bb.0:
+; VLS-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLS-NEXT:    vle16.v v10, (a0)
+; VLS-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; VLS-NEXT:    vslideup.vi v8, v10, 2
+; VLS-NEXT:    ret
+  %sv = load <2 x bfloat>, ptr %svp
+  %v = call <vscale x 8 x bfloat> @llvm.vector.insert.v2bf16.nxv8bf16(<vscale x 8 x bfloat> %vec, <2 x bfloat> %sv, i64 2)
+  ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x half> @insert_nxv8f16_v2f16_0(<vscale x 8 x half> %vec, ptr %svp) {
+; VLA-LABEL: insert_nxv8f16_v2f16_0:
+; VLA:       # %bb.0:
+; VLA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLA-NEXT:    vle16.v v10, (a0)
+; VLA-NEXT:    vsetivli zero, 2, e16, m2, tu, ma
+; VLA-NEXT:    vmv.v.v v8, v10
+; VLA-NEXT:    ret
+;
+; VLS-LABEL: insert_nxv8f16_v2f16_0:
+; VLS:       # %bb.0:
+; VLS-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLS-NEXT:    vle16.v v10, (a0)
+; VLS-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; VLS-NEXT:    vmv.v.v v8, v10
+; VLS-NEXT:    ret
+  %sv = load <2 x half>, ptr %svp
+  %v = call <vscale x 8 x half> @llvm.vector.insert.v2f16.nxv8f16(<vscale x 8 x half> %vec, <2 x half> %sv, i64 0)
+  ret <vscale x 8 x half> %v
+}
+
+define <vscale x 8 x half> @insert_nxv8f16_v2f16_2(<vscale x 8 x half> %vec, ptr %svp) {
+; VLA-LABEL: insert_nxv8f16_v2f16_2:
+; VLA:       # %bb.0:
+; VLA-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLA-NEXT:    vle16.v v10, (a0)
+; VLA-NEXT:    vsetivli zero, 4, e16, m2, tu, ma
+; VLA-NEXT:    vslideup.vi v8, v10, 2
+; VLA-NEXT:    ret
+;
+; VLS-LABEL: insert_nxv8f16_v2f16_2:
+; VLS:       # %bb.0:
+; VLS-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; VLS-NEXT:    vle16.v v10, (a0)
+; VLS-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; VLS-NEXT:    vslideup.vi v8, v10, 2
+; VLS-NEXT:    ret
+  %sv = load <2 x half>, ptr %svp
+  %v = call <vscale x 8 x half> @llvm.vector.insert.v2f16.nxv8f16(<vscale x 8 x half> %vec, <2 x half> %sv, i64 2)
+  ret <vscale x 8 x half> %v
+}
+
 declare <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64)
 declare <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64)