-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Teach fillUpExtensionSupportForSplat to handle nxvXi64 VMV_V_X_VL on RV32. #99251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…X_VL on RV32. A nxvXi64 VMV_V_X_VL on RV32 sign extends its 32 bit input to 64 bits. If that input is positive, the sign extend can also be considered as a zero extend.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesA nxvXi64 VMV_V_X_VL on RV32 sign extends its 32 bit input to 64 bits. If that input is positive, the sign extend can also be considered as a zero extend. Patch is 87.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/99251.diff 16 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8b5e56bff4097..1e2d25109204a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14477,10 +14477,22 @@ struct NodeExtensionHelper {
// widening opcode by splatting to smaller element size.
unsigned EltBits = VT.getScalarSizeInBits();
unsigned ScalarBits = Op.getValueSizeInBits();
- // Make sure we're getting all element bits from the scalar register.
- // FIXME: Support implicit sign extension of vmv.v.x?
- if (ScalarBits < EltBits)
+ // Make sure we're not getting all bits from the element, we need special
+ // handling.
+ if (ScalarBits < EltBits) {
+ // This should only occur on RV32.
+ assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
+ !Subtarget.is64Bit() && "Unexpected splat");
+ // vmv.v.x sign extends narrow inputs.
+ SupportsSExt = true;
+
+ // If the input is positive, then sign extend is also zero extend.
+ if (DAG.SignBitIsZero(Op))
+ SupportsZExt = true;
+
+ EnforceOneUse = false;
return;
+ }
unsigned NarrowSize = EltBits / 2;
// If the narrow type cannot be expressed with a legal VMV,
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index 6e538f3dfb38e..d51f5eacd7d91 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1229,36 +1229,20 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv1i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; RV32F-NEXT: vmv.v.x v9, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v10, v8
-; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vwsubu.wv v9, v9, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT: vminu.vx v8, v9, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv1i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64F-NEXT: vmv.v.x v9, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v10, v8
-; RV64F-NEXT: vsrl.vi v8, v10, 23
-; RV64F-NEXT: vwsubu.vv v10, v9, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64F-NEXT: vminu.vx v8, v10, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv1i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v9, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT: vsrl.vi v8, v10, 23
+; CHECK-F-NEXT: vwsubu.vv v10, v9, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v10, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv1i64:
; CHECK-D: # %bb.0:
@@ -1385,36 +1369,20 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv2i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; RV32F-NEXT: vmv.v.x v10, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v12, v8
-; RV32F-NEXT: vsrl.vi v8, v12, 23
-; RV32F-NEXT: vwsubu.wv v10, v10, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT: vminu.vx v8, v10, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv2i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64F-NEXT: vmv.v.x v10, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v11, v8
-; RV64F-NEXT: vsrl.vi v8, v11, 23
-; RV64F-NEXT: vwsubu.vv v12, v10, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64F-NEXT: vminu.vx v8, v12, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv2i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-F-NEXT: vmv.v.x v10, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v11, v8
+; CHECK-F-NEXT: vsrl.vi v8, v11, 23
+; CHECK-F-NEXT: vwsubu.vv v12, v10, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v12, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv2i64:
; CHECK-D: # %bb.0:
@@ -1541,36 +1509,20 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv4i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; RV32F-NEXT: vmv.v.x v12, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v16, v8
-; RV32F-NEXT: vsrl.vi v8, v16, 23
-; RV32F-NEXT: vwsubu.wv v12, v12, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV32F-NEXT: vminu.vx v8, v12, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv4i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64F-NEXT: vmv.v.x v12, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v14, v8
-; RV64F-NEXT: vsrl.vi v8, v14, 23
-; RV64F-NEXT: vwsubu.vv v16, v12, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64F-NEXT: vminu.vx v8, v16, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv4i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v12, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v14, v8
+; CHECK-F-NEXT: vsrl.vi v8, v14, 23
+; CHECK-F-NEXT: vwsubu.vv v16, v12, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v16, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv4i64:
; CHECK-D: # %bb.0:
@@ -1697,36 +1649,20 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_nxv8i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV32F-NEXT: vmv.v.x v16, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v24, v8
-; RV32F-NEXT: vsrl.vi v8, v24, 23
-; RV32F-NEXT: vwsubu.wv v16, v16, v8
-; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32F-NEXT: vminu.vx v8, v16, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_nxv8i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64F-NEXT: vmv.v.x v16, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v20, v8
-; RV64F-NEXT: vsrl.vi v8, v20, 23
-; RV64F-NEXT: vwsubu.vv v24, v16, v8
-; RV64F-NEXT: li a1, 64
-; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64F-NEXT: vminu.vx v8, v24, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_nxv8i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-F-NEXT: vmv.v.x v16, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v20, v8
+; CHECK-F-NEXT: vsrl.vi v8, v20, 23
+; CHECK-F-NEXT: vwsubu.vv v24, v16, v8
+; CHECK-F-NEXT: li a1, 64
+; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; CHECK-F-NEXT: vminu.vx v8, v24, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_nxv8i64:
; CHECK-D: # %bb.0:
@@ -2895,31 +2831,17 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv1i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; RV32F-NEXT: vmv.v.x v9, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v10, v8
-; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vwsubu.wv v9, v9, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv1r.v v8, v9
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv1i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64F-NEXT: vmv.v.x v9, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v10, v8
-; RV64F-NEXT: vsrl.vi v10, v10, 23
-; RV64F-NEXT: vwsubu.vv v8, v9, v10
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v9, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8
+; CHECK-F-NEXT: vsrl.vi v10, v10, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64:
; CHECK-D: # %bb.0:
@@ -3043,31 +2965,17 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv2i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; RV32F-NEXT: vmv.v.x v10, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v12, v8
-; RV32F-NEXT: vsrl.vi v8, v12, 23
-; RV32F-NEXT: vwsubu.wv v10, v10, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv2r.v v8, v10
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv2i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64F-NEXT: vmv.v.x v10, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v11, v8
-; RV64F-NEXT: vsrl.vi v11, v11, 23
-; RV64F-NEXT: vwsubu.vv v8, v10, v11
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-F-NEXT: vmv.v.x v10, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v11, v8
+; CHECK-F-NEXT: vsrl.vi v11, v11, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64:
; CHECK-D: # %bb.0:
@@ -3191,31 +3099,17 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv4i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; RV32F-NEXT: vmv.v.x v12, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v16, v8
-; RV32F-NEXT: vsrl.vi v8, v16, 23
-; RV32F-NEXT: vwsubu.wv v12, v12, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv4r.v v8, v12
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv4i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64F-NEXT: vmv.v.x v12, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v14, v8
-; RV64F-NEXT: vsrl.vi v14, v14, 23
-; RV64F-NEXT: vwsubu.vv v8, v12, v14
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-F-NEXT: vmv.v.x v12, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v14, v8
+; CHECK-F-NEXT: vsrl.vi v14, v14, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v12, v14
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64:
; CHECK-D: # %bb.0:
@@ -3339,31 +3233,17 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: ctlz_zero_undef_nxv8i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: li a0, 190
-; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV32F-NEXT: vmv.v.x v16, a0
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v24, v8
-; RV32F-NEXT: vsrl.vi v8, v24, 23
-; RV32F-NEXT: vwsubu.wv v16, v16, v8
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: vmv8r.v v8, v16
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: ctlz_zero_undef_nxv8i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: li a0, 190
-; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64F-NEXT: vmv.v.x v16, a0
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vfncvt.f.xu.w v20, v8
-; RV64F-NEXT: vsrl.vi v20, v20, 23
-; RV64F-NEXT: vwsubu.vv v8, v16, v20
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: li a0, 190
+; CHECK-F-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-F-NEXT: vmv.v.x v16, a0
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vfncvt.f.xu.w v20, v8
+; CHECK-F-NEXT: vsrl.vi v20, v20, 23
+; CHECK-F-NEXT: vwsubu.vv v8, v16, v20
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64:
; CHECK-D: # %bb.0:
@@ -3387,4 +3267,6 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
+; RV32F: {{.*}}
; RV64: {{.*}}
+; RV64F: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 479664c6f5f62..9737107974075 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1223,12 +1223,11 @@ define <vscale x 1 x i64> @cttz_nxv1i64(<vscale x 1 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v9, v8
; RV32F-NEXT: vsrl.vi v8, v9, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT: vzext.vf2 v9, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v9, a1
+; RV32F-NEXT: vwsubu.vx v9, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v9, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -1385,12 +1384,11 @@ define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v10, v8
; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT: vzext.vf2 v10, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v10, a1
+; RV32F-NEXT: vwsubu.vx v10, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v10, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -1547,12 +1545,11 @@ define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v12, v8
; RV32F-NEXT: vsrl.vi v8, v12, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV32F-NEXT: vzext.vf2 v12, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v12, a1
+; RV32F-NEXT: vwsubu.vx v12, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v12, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -1709,12 +1706,11 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32F-NEXT: vfncvt.f.xu.w v16, v8
; RV32F-NEXT: vsrl.vi v8, v16, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32F-NEXT: vzext.vf2 v16, v8
; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v16, a1
+; RV32F-NEXT: vwsubu.vx v16, v8, a1
; RV32F-NEXT: li a1, 64
-; RV32F-NEXT: vmerge.vxm v8, v8, a1, v0
+; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV32F-NEXT: vmerge.vxm v8, v16, a1, v0
; RV32F-NEXT: fsrm a0
; RV32F-NEXT: ret
;
@@ -2887,35 +2883,19 @@ define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: cttz_zero_undef_nxv1i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV32F-NEXT: vrsub.vi v9, v8, 0
-; RV32F-NEXT: vand.vv v8, v8, v9
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v9, v8
-; RV32F-NEXT: vsrl.vi v8, v9, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV32F-NEXT: vzext.vf2 v9, v8
-; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v9, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: cttz_zero_undef_nxv1i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64F-NEXT: vrsub.vi v9, v8, 0
-; RV64F-NEXT: vand.vv v8, v8, v9
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV64F-NEXT: vfncvt.f.xu.w v9, v8
-; RV64F-NEXT: vsrl.vi v9, v9, 23
-; RV64F-NEXT: li a1, 127
-; RV64F-NEXT: vwsubu.vx v8, v9, a1
-; RV64F-NEXT: fsrm a0
-; RV64F-NEXT: ret
+; CHECK-F-LABEL: cttz_zero_undef_nxv1i64:
+; CHECK-F: # %bb.0:
+; CHECK-F-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-F-NEXT: vrsub.vi v9, v8, 0
+; CHECK-F-NEXT: vand.vv v8, v8, v9
+; CHECK-F-NEXT: fsrmi a0, 1
+; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8
+; CHECK-F-NEXT: vsrl.vi v9, v9, 23
+; CHECK-F-NEXT: li a1, 127
+; CHECK-F-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-F-NEXT: fsrm a0
+; CHECK-F-NEXT: ret
;
; CHECK-D-LABEL: cttz_zero_undef_nxv1i64:
; CHECK-D: # %bb.0:
@@ -3021,35 +3001,19 @@ define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
; RV64I-NEXT: vsrl.vx v8, v8, a0
; RV64I-NEXT: ret
;
-; RV32F-LABEL: cttz_zero_undef_nxv2i64:
-; RV32F: # %bb.0:
-; RV32F-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; RV32F-NEXT: vrsub.vi v10, v8, 0
-; RV32F-NEXT: vand.vv v8, v8, v10
-; RV32F-NEXT: fsrmi a0, 1
-; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32F-NEXT: vfncvt.f.xu.w v10, v8
-; RV32F-NEXT: vsrl.vi v8, v10, 23
-; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32F-NEXT: vzext.vf2 v10, v8
-; RV32F-NEXT: li a1, 127
-; RV32F-NEXT: vsub.vx v8, v10, a1
-; RV32F-NEXT: fsrm a0
-; RV32F-NEXT: ret
-;
-; RV64F-LABEL: cttz_zero_undef_nxv2i64:
-; RV64F: # %bb.0:
-; RV64F-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; RV64F-NEXT: vrsub.vi v10, v8, 0
-; RV64F-NEXT: vand.vv v8, v8, v10
-; RV64F-NEXT: fsrmi a0, 1
-; RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64F-NEXT: vfncvt.f.xu.w v10, v8
-; RV64F-NEXT: vsrl.vi v10, v10, 23
-; RV64F-NEXT: li a1, 127...
[truncated]
|
; CHECK-NEXT: li a0, 63 | ||
; CHECK-NEXT: vand.vx v16, v12, a0 | ||
; CHECK-NEXT: vsrl.vv v16, v8, v16 | ||
; CHECK-NEXT: vrsub.vi v12, v12, 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like this causes us to miss the vrsub.vi pattern. But I think it + the subsequent vand.vx could all be done in the scalar domain?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think so. I think it all happens very late and we end up going to target nodes immediately so no generic transforms fire.
// Make sure we're not getting all bits from the element, we need special | ||
// handling. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we're not getting all bits?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…X_VL on RV32. (#99251) Summary: A nxvXi64 VMV_V_X_VL on RV32 sign extends its 32 bit input to 64 bits. If that input is positive, the sign extend can also be considered as a zero extend. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250893
A nxvXi64 VMV_V_X_VL on RV32 sign extends its 32 bit input to 64 bits. If that input is positive, the sign extend can also be considered as a zero extend.