-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Fold PseudoVMV_V_V with undef passthru #106840
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
If a vmv.v.v has an undef passthru then we can just replace it with its input operand, since the tail is completely undefined. The intrinsic tests in vmv.v.v.ll were updated to have a passthru operand to prevent them from being all folded away.
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesIf a vmv.v.v has an undef passthru then we can just replace it with its input operand, since the tail is completely undefined. The intrinsic tests in vmv.v.v.ll were updated to have a passthru operand to prevent them from being all folded away. Patch is 39.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106840.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 412fd790061a37..2c421c3ca105ab 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -66,6 +66,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
bool convertToWholeRegister(MachineInstr &MI) const;
bool convertToUnmasked(MachineInstr &MI) const;
bool convertVMergeToVMv(MachineInstr &MI) const;
+ bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
bool foldVMV_V_V(MachineInstr &MI);
bool isAllOnesMask(const MachineInstr *MaskDef) const;
@@ -472,6 +473,20 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
return true;
}
+/// If a PseudoVMV_V_V's passthru is undef then we can replace it with its input
+bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
+ if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
+ return false;
+
+ if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ return false;
+
+ MRI->replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
+ MI.eraseFromParent();
+ V0Defs.erase(&MI);
+ return true;
+}
+
/// If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
/// into it.
///
@@ -581,6 +596,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
Changed |= convertToUnmasked(MI);
Changed |= convertToWholeRegister(MI);
Changed |= convertVMergeToVMv(MI);
+ Changed |= foldUndefPassthruVMV_V_V(MI);
Changed |= foldVMV_V_V(MI);
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
index 1419eede6ca9d1..19a918148e6eb8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
@@ -15,7 +15,6 @@ body: |
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
; CHECK-NEXT: $v0 = COPY %mask
- ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 $noreg, %true, %avl, 5 /* e32 */, 0 /* tu, mu */
%false:vr = COPY $v8
%true:vr = COPY $v9
%avl:gprnox0 = COPY $x1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
index 3952e48c5c28fc..ef827e1839c939 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -194,3 +194,11 @@ define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthr
%b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a.bitcast, iXLen %avl)
ret <vscale x 2 x i32> %b
}
+
+define <vscale x 1 x i64> @undef_passthru(<vscale x 1 x i64> %v, iXLen %avl) {
+; CHECK-LABEL: undef_passthru:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %x = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.vnxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %v, iXLen %avl)
+ ret <vscale x 1 x i64> %x
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v.ll
index 7217c2cfafca29..784b807a6a2e54 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v.ll
@@ -9,17 +9,17 @@ declare <vscale x 1 x i8> @llvm.riscv.vmv.v.v.nxv1i8(
<vscale x 1 x i8>,
iXLen);
-define <vscale x 1 x i8> @intrinsic_vmv.v.v_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_vmv.v.v_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vmv.v.v.nxv1i8(
- <vscale x 1 x i8> undef,
<vscale x 1 x i8> %0,
- iXLen %1)
+ <vscale x 1 x i8> %1,
+ iXLen %2)
ret <vscale x 1 x i8> %a
}
@@ -29,17 +29,17 @@ declare <vscale x 2 x i8> @llvm.riscv.vmv.v.v.nxv2i8(
<vscale x 2 x i8>,
iXLen);
-define <vscale x 2 x i8> @intrinsic_vmv.v.v_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_vmv.v.v_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i8> @llvm.riscv.vmv.v.v.nxv2i8(
- <vscale x 2 x i8> undef,
<vscale x 2 x i8> %0,
- iXLen %1)
+ <vscale x 2 x i8> %1,
+ iXLen %2)
ret <vscale x 2 x i8> %a
}
@@ -49,17 +49,17 @@ declare <vscale x 4 x i8> @llvm.riscv.vmv.v.v.nxv4i8(
<vscale x 4 x i8>,
iXLen);
-define <vscale x 4 x i8> @intrinsic_vmv.v.v_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_vmv.v.v_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i8> @llvm.riscv.vmv.v.v.nxv4i8(
- <vscale x 4 x i8> undef,
<vscale x 4 x i8> %0,
- iXLen %1)
+ <vscale x 4 x i8> %1,
+ iXLen %2)
ret <vscale x 4 x i8> %a
}
@@ -69,17 +69,17 @@ declare <vscale x 8 x i8> @llvm.riscv.vmv.v.v.nxv8i8(
<vscale x 8 x i8>,
iXLen);
-define <vscale x 8 x i8> @intrinsic_vmv.v.v_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_vmv.v.v_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i8> @llvm.riscv.vmv.v.v.nxv8i8(
- <vscale x 8 x i8> undef,
<vscale x 8 x i8> %0,
- iXLen %1)
+ <vscale x 8 x i8> %1,
+ iXLen %2)
ret <vscale x 8 x i8> %a
}
@@ -89,17 +89,17 @@ declare <vscale x 16 x i8> @llvm.riscv.vmv.v.v.nxv16i8(
<vscale x 16 x i8>,
iXLen);
-define <vscale x 16 x i8> @intrinsic_vmv.v.v_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_vmv.v.v_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i8> @llvm.riscv.vmv.v.v.nxv16i8(
- <vscale x 16 x i8> undef,
<vscale x 16 x i8> %0,
- iXLen %1)
+ <vscale x 16 x i8> %1,
+ iXLen %2)
ret <vscale x 16 x i8> %a
}
@@ -109,17 +109,17 @@ declare <vscale x 32 x i8> @llvm.riscv.vmv.v.v.nxv32i8(
<vscale x 32 x i8>,
iXLen);
-define <vscale x 32 x i8> @intrinsic_vmv.v.v_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_vmv.v.v_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i8> @llvm.riscv.vmv.v.v.nxv32i8(
- <vscale x 32 x i8> undef,
<vscale x 32 x i8> %0,
- iXLen %1)
+ <vscale x 32 x i8> %1,
+ iXLen %2)
ret <vscale x 32 x i8> %a
}
@@ -129,17 +129,17 @@ declare <vscale x 64 x i8> @llvm.riscv.vmv.v.v.nxv64i8(
<vscale x 64 x i8>,
iXLen);
-define <vscale x 64 x i8> @intrinsic_vmv.v.v_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, iXLen %1) nounwind {
+define <vscale x 64 x i8> @intrinsic_vmv.v.v_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, <vscale x 64 x i8> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv64i8_nxv64i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 64 x i8> @llvm.riscv.vmv.v.v.nxv64i8(
- <vscale x 64 x i8> undef,
<vscale x 64 x i8> %0,
- iXLen %1)
+ <vscale x 64 x i8> %1,
+ iXLen %2)
ret <vscale x 64 x i8> %a
}
@@ -149,17 +149,17 @@ declare <vscale x 1 x i16> @llvm.riscv.vmv.v.v.nxv1i16(
<vscale x 1 x i16>,
iXLen);
-define <vscale x 1 x i16> @intrinsic_vmv.v.v_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, iXLen %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vmv.v.v_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vmv.v.v.nxv1i16(
- <vscale x 1 x i16> undef,
<vscale x 1 x i16> %0,
- iXLen %1)
+ <vscale x 1 x i16> %1,
+ iXLen %2)
ret <vscale x 1 x i16> %a
}
@@ -169,17 +169,17 @@ declare <vscale x 2 x i16> @llvm.riscv.vmv.v.v.nxv2i16(
<vscale x 2 x i16>,
iXLen);
-define <vscale x 2 x i16> @intrinsic_vmv.v.v_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, iXLen %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vmv.v.v_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vmv.v.v.nxv2i16(
- <vscale x 2 x i16> undef,
<vscale x 2 x i16> %0,
- iXLen %1)
+ <vscale x 2 x i16> %1,
+ iXLen %2)
ret <vscale x 2 x i16> %a
}
@@ -189,17 +189,17 @@ declare <vscale x 4 x i16> @llvm.riscv.vmv.v.v.nxv4i16(
<vscale x 4 x i16>,
iXLen);
-define <vscale x 4 x i16> @intrinsic_vmv.v.v_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, iXLen %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vmv.v.v_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vmv.v.v.nxv4i16(
- <vscale x 4 x i16> undef,
<vscale x 4 x i16> %0,
- iXLen %1)
+ <vscale x 4 x i16> %1,
+ iXLen %2)
ret <vscale x 4 x i16> %a
}
@@ -209,17 +209,17 @@ declare <vscale x 8 x i16> @llvm.riscv.vmv.v.v.nxv8i16(
<vscale x 8 x i16>,
iXLen);
-define <vscale x 8 x i16> @intrinsic_vmv.v.v_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, iXLen %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vmv.v.v_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vmv.v.v.nxv8i16(
- <vscale x 8 x i16> undef,
<vscale x 8 x i16> %0,
- iXLen %1)
+ <vscale x 8 x i16> %1,
+ iXLen %2)
ret <vscale x 8 x i16> %a
}
@@ -229,17 +229,17 @@ declare <vscale x 16 x i16> @llvm.riscv.vmv.v.v.nxv16i16(
<vscale x 16 x i16>,
iXLen);
-define <vscale x 16 x i16> @intrinsic_vmv.v.v_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, iXLen %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vmv.v.v_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vmv.v.v.nxv16i16(
- <vscale x 16 x i16> undef,
<vscale x 16 x i16> %0,
- iXLen %1)
+ <vscale x 16 x i16> %1,
+ iXLen %2)
ret <vscale x 16 x i16> %a
}
@@ -249,17 +249,17 @@ declare <vscale x 32 x i16> @llvm.riscv.vmv.v.v.nxv32i16(
<vscale x 32 x i16>,
iXLen);
-define <vscale x 32 x i16> @intrinsic_vmv.v.v_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, iXLen %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_vmv.v.v_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i16> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv32i16_nxv32i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vmv.v.v.nxv32i16(
- <vscale x 32 x i16> undef,
<vscale x 32 x i16> %0,
- iXLen %1)
+ <vscale x 32 x i16> %1,
+ iXLen %2)
ret <vscale x 32 x i16> %a
}
@@ -269,17 +269,17 @@ declare <vscale x 1 x i32> @llvm.riscv.vmv.v.v.nxv1i32(
<vscale x 1 x i32>,
iXLen);
-define <vscale x 1 x i32> @intrinsic_vmv.v.v_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, iXLen %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vmv.v.v_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vmv.v.v.nxv1i32(
- <vscale x 1 x i32> undef,
<vscale x 1 x i32> %0,
- iXLen %1)
+ <vscale x 1 x i32> %1,
+ iXLen %2)
ret <vscale x 1 x i32> %a
}
@@ -289,17 +289,17 @@ declare <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(
<vscale x 2 x i32>,
iXLen);
-define <vscale x 2 x i32> @intrinsic_vmv.v.v_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, iXLen %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vmv.v.v_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(
- <vscale x 2 x i32> undef,
<vscale x 2 x i32> %0,
- iXLen %1)
+ <vscale x 2 x i32> %1,
+ iXLen %2)
ret <vscale x 2 x i32> %a
}
@@ -309,17 +309,17 @@ declare <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(
<vscale x 4 x i32>,
iXLen);
-define <vscale x 4 x i32> @intrinsic_vmv.v.v_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, iXLen %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vmv.v.v_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(
- <vscale x 4 x i32> undef,
<vscale x 4 x i32> %0,
- iXLen %1)
+ <vscale x 4 x i32> %1,
+ iXLen %2)
ret <vscale x 4 x i32> %a
}
@@ -329,17 +329,17 @@ declare <vscale x 8 x i32> @llvm.riscv.vmv.v.v.nxv8i32(
<vscale x 8 x i32>,
iXLen);
-define <vscale x 8 x i32> @intrinsic_vmv.v.v_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, iXLen %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vmv.v.v_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vmv.v.v.nxv8i32(
- <vscale x 8 x i32> undef,
<vscale x 8 x i32> %0,
- iXLen %1)
+ <vscale x 8 x i32> %1,
+ iXLen %2)
ret <vscale x 8 x i32> %a
}
@@ -349,17 +349,17 @@ declare <vscale x 16 x i32> @llvm.riscv.vmv.v.v.nxv16i32(
<vscale x 16 x i32>,
iXLen);
-define <vscale x 16 x i32> @intrinsic_vmv.v.v_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, iXLen %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vmv.v.v_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i32> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv16i32_nxv16i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vmv.v.v.nxv16i32(
- <vscale x 16 x i32> undef,
<vscale x 16 x i32> %0,
- iXLen %1)
+ <vscale x 16 x i32> %1,
+ iXLen %2)
ret <vscale x 16 x i32> %a
}
@@ -369,17 +369,17 @@ declare <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(
<vscale x 1 x i64>,
iXLen);
-define <vscale x 1 x i64> @intrinsic_vmv.v.v_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, iXLen %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vmv.v.v_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vmv.v.v.nxv1i64(
- <vscale x 1 x i64> undef,
<vscale x 1 x i64> %0,
- iXLen %1)
+ <vscale x 1 x i64> %1,
+ iXLen %2)
ret <vscale x 1 x i64> %a
}
@@ -389,17 +389,17 @@ declare <vscale x 2 x i64> @llvm.riscv.vmv.v.v.nxv2i64(
<vscale x 2 x i64>,
iXLen);
-define <vscale x 2 x i64> @intrinsic_vmv.v.v_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, iXLen %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vmv.v.v_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) nounwind {
; CHECK-LABEL: intrinsic_vmv.v.v_v_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
-; CHECK-NEXT: vmv.v.v v8, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vmv.v.v.nxv2i64(
- <vscale x 2 x i64> undef,
<vscale x 2 x i64> %0,
- iXLen %1)
+ <vscale x 2 x i64> %1,
+ iXLen %2)
ret <vscale x 2 x i64> %a
}
@@ -409,17 +409,17 @@ declare <vscale x 4 x i64> @llvm.riscv.vmv.v.v.nxv4i64(
<vscale x 4 x i64>,
iXLen);
-d...
[truncated]
|
After thinking about this a bit, the main difference with this and foldVMV_V_V is that this handles the case where a vmv.v.v has an input that's not another vector pseudo. I'm not sure how often this happens in practice, so I'm closing for now. |
If a vmv.v.v has an undef passthru then we can just replace it with its input operand, since the tail is completely undefined. This is a reattempt of llvm#106840, but also checks to see if the input was a pseudo where we can relax its tail policy to undef. This also means we don't need to check for undef passthrus in foldVMV_V_V anymore because they will be handled by foldUndefPassthruVMV_V_V.
If a vmv.v.v has an undef passthru then we can just replace it with its input operand, since the tail is completely undefined. This is a reattempt of llvm#106840, but also checks to see if the input was a pseudo where we can relax its tail policy to undef. This also means we don't need to check for undef passthrus in foldVMV_V_V anymore because they will be handled by foldUndefPassthruVMV_V_V.
) If a vmv.v.v has an undef passthru then we can just replace it with its input operand, since the tail is completely undefined. This is a reattempt of #106840, but also checks to see if the input was a pseudo where we can relax its tail policy to undef. This also means we don't need to check for undef passthrus in foldVMV_V_V anymore because they will be handled by foldUndefPassthruVMV_V_V.
If a vmv.v.v has an undef passthru then we can just replace it with its input operand, since the tail is completely undefined.
The intrinsic tests in vmv.v.v.ll were updated to have a passthru operand to prevent them from being all folded away.