-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Convert vsub.vx to vadd.vi if possible #130669
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
We'd already had this transform for the intrinsics, but hadn't added it for either fixed length or scalable vectors coming from normal IR. For the record, the fact we have three different sets of patterns here really is quite ugly.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesWe'd already had this transform for the intrinsics, but hadn't added it for either fixed length or scalable vectors coming from normal IR. For the record, the fact we have three different sets of patterns here really is quite ugly. Patch is 292.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130669.diff 14 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f3cce950ed7b5..8aa684c56bde0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3573,6 +3573,13 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
/*Decrement=*/true);
}
+bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
+ return selectVSplatImmHelper(
+ N, SplatVal, *CurDAG, *Subtarget,
+ [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
+ /*Decrement=*/false);
+}
+
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
SDValue &SplatVal) {
return selectVSplatImmHelper(
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 5048a80fdd18f..db09ad146b655 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -137,6 +137,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
return selectVSplatUimm(N, Bits, Val);
}
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal);
+ bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal);
// Matches the splat of a value which can be extended or truncated, such that
// only the bottom 8 bits are preserved.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 2bd61883760e5..b2c5261ae6c2d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -877,9 +877,9 @@ foreach mti = AllMasks in
// 11.1. Vector Single-Width Integer Add and Subtract
defm : VPatBinarySDNode_VV_VX_VI<add, "PseudoVADD">;
defm : VPatBinarySDNode_VV_VX<sub, "PseudoVSUB">;
-// Handle VRSUB specially since it's the only integer binary op with reversed
-// pattern operands
foreach vti = AllIntegerVectors in {
+ // Handle VRSUB specially since it's the only integer binary op with reversed
+ // pattern operands
// FIXME: The AddedComplexity here is covering up a missing matcher for
// widening vwsub.vx which can recognize a extended folded into the
// scalar of the splat.
@@ -896,6 +896,15 @@ foreach vti = AllIntegerVectors in {
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
simm5:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
}
+
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ // Match VSUB with a small immediate to vadd.vi by negating the immediate.
+ def : Pat<(sub (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat_simm5_plus1_nodec simm5_plus1:$rs2))),
+ (!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (NegImm simm5_plus1:$rs2), vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
}
// 11.2. Vector Widening Integer Add and Subtract
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 43cfc9d1e77ca..5d98ffedcbb9a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -598,6 +598,8 @@ def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimmBits<5>", [], [],
def SplatPat_uimm6 : ComplexPattern<vAny, 1, "selectVSplatUimmBits<6>", [], [], 3>;
def SplatPat_simm5_plus1
: ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1", [], [], 3>;
+def SplatPat_simm5_plus1_nodec
+ : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NoDec", [], [], 3>;
def SplatPat_simm5_plus1_nonzero
: ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 3>;
@@ -1992,10 +1994,10 @@ multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm, string suffix = ""> {
// 11.1. Vector Single-Width Integer Add and Subtract
defm : VPatBinaryVL_VV_VX_VI<riscv_add_vl, "PseudoVADD">;
defm : VPatBinaryVL_VV_VX<riscv_sub_vl, "PseudoVSUB">;
-// Handle VRSUB specially since it's the only integer binary op with reversed
-// pattern operands
foreach vti = AllIntegerVectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
+ // Handle VRSUB specially since it's the only integer binary op with
+ // reversed pattern operands
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
(vti.Vector vti.RegClass:$rs1),
vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag),
@@ -2008,6 +2010,15 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK")
vti.RegClass:$passthru, vti.RegClass:$rs1, simm5:$rs2,
(vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ // Match VSUB with a small immediate to vadd.vi by negating the immediate.
+ def : Pat<(riscv_sub_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat_simm5_plus1_nodec simm5_plus1:$rs2)),
+ vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag),
+ (!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ (NegImm simm5_plus1:$rs2), (vti.Mask VMV0:$vm),
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index 6f515996677ee..ceca813782461 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -2585,8 +2585,7 @@ define <vscale x 1 x i9> @vp_ctlz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1
; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
-; CHECK-NEXT: li a0, 7
-; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vadd.vi v8, v8, -7, v0.t
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i9:
@@ -2595,8 +2594,7 @@ define <vscale x 1 x i9> @vp_ctlz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
-; CHECK-ZVBB-NEXT: li a0, 7
-; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-ZVBB-NEXT: vadd.vi v8, v8, -7, v0.t
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x i9> %v
@@ -2744,13 +2742,12 @@ define <vscale x 1 x i9> @vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor(<vsc
; CHECK-NEXT: li a0, 142
; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
; CHECK-NEXT: vand.vx v8, v8, a1
-; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8
; CHECK-NEXT: vnsrl.wi v8, v9, 23
; CHECK-NEXT: vrsub.vx v8, v8, a0
-; CHECK-NEXT: vminu.vx v8, v8, a1
-; CHECK-NEXT: li a0, 7
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0
+; CHECK-NEXT: vadd.vi v8, v8, -7
; CHECK-NEXT: ret
;
; CHECK-ZVBB-LABEL: vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor:
@@ -2761,8 +2758,7 @@ define <vscale x 1 x i9> @vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor(<vsc
; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
; CHECK-ZVBB-NEXT: vclz.v v8, v8
-; CHECK-ZVBB-NEXT: li a0, 7
-; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVBB-NEXT: vadd.vi v8, v8, -7
; CHECK-ZVBB-NEXT: ret
%va.not = call <vscale x 1 x i9> @llvm.vp.xor.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i9> splat (i9 -1), <vscale x 1 x i1> %m, i32 %evl)
%v = call <vscale x 1 x i9> @llvm.ctlz(<vscale x 1 x i9> %va.not, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 5761ae0926eae..bd7a20f9ef590 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -11,12 +11,11 @@
define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv1i8:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: li a0, 85
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: li a0, 51
@@ -77,12 +76,11 @@ declare <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8>, i1)
define <vscale x 2 x i8> @cttz_nxv2i8(<vscale x 2 x i8> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv2i8:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: li a0, 85
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: li a0, 51
@@ -143,12 +141,11 @@ declare <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8>, i1)
define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv4i8:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: li a0, 85
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: li a0, 51
@@ -209,12 +206,11 @@ declare <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8>, i1)
define <vscale x 8 x i8> @cttz_nxv8i8(<vscale x 8 x i8> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv8i8:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: li a0, 85
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: li a0, 51
@@ -275,12 +271,11 @@ declare <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8>, i1)
define <vscale x 16 x i8> @cttz_nxv16i8(<vscale x 16 x i8> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv16i8:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v10, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: li a0, 85
-; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10
; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
; CHECK-ZVE64X-NEXT: li a0, 51
@@ -341,12 +336,11 @@ declare <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8>, i1)
define <vscale x 32 x i8> @cttz_nxv32i8(<vscale x 32 x i8> %va) {
; CHECK-LABEL: cttz_nxv32i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT: vnot.v v12, v8
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NEXT: vadd.vi v12, v8, -1
+; CHECK-NEXT: vnot.v v8, v8
; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vv v8, v12, v8
+; CHECK-NEXT: vand.vv v8, v8, v12
; CHECK-NEXT: vsrl.vi v12, v8, 1
; CHECK-NEXT: vand.vx v12, v12, a0
; CHECK-NEXT: li a0, 51
@@ -373,12 +367,11 @@ declare <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8>, i1)
define <vscale x 64 x i8> @cttz_nxv64i8(<vscale x 64 x i8> %va) {
; CHECK-LABEL: cttz_nxv64i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
-; CHECK-NEXT: vnot.v v16, v8
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v8, -1
+; CHECK-NEXT: vnot.v v8, v8
; CHECK-NEXT: li a0, 85
-; CHECK-NEXT: vand.vv v8, v16, v8
+; CHECK-NEXT: vand.vv v8, v8, v16
; CHECK-NEXT: vsrl.vi v16, v8, 1
; CHECK-NEXT: vand.vx v16, v16, a0
; CHECK-NEXT: li a0, 51
@@ -405,13 +398,12 @@ declare <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8>, i1)
define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv1i16:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: lui a0, 3
@@ -472,13 +464,12 @@ declare <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16>, i1)
define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv2i16:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: lui a0, 3
@@ -539,13 +530,12 @@ declare <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16>, i1)
define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv4i16:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: lui a0, 3
@@ -606,13 +596,12 @@ declare <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16>, i1)
define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv8i16:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v10, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10
; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
-; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8
; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0
; CHECK-ZVE64X-NEXT: lui a0, 3
@@ -673,13 +662,12 @@ declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>, i1)
define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv16i16:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v12, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: lui a0, 5
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12
; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
-; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8
; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0
; CHECK-ZVE64X-NEXT: lui a0, 3
@@ -740,13 +728,12 @@ declare <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16>, i1)
define <vscale x 32 x i16> @cttz_nxv32i16(<vscale x 32 x i16> %va) {
; CHECK-LABEL: cttz_nxv32i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vnot.v v16, v8
-; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v8, -1
+; CHECK-NEXT: vnot.v v8, v8
; CHECK-NEXT: lui a0, 5
+; CHECK-NEXT: vand.vv v8, v8, v16
; CHECK-NEXT: addi a0, a0, 1365
-; CHECK-NEXT: vand.vv v8, v16, v8
; CHECK-NEXT: vsrl.vi v16, v8, 1
; CHECK-NEXT: vand.vx v16, v16, a0
; CHECK-NEXT: lui a0, 3
@@ -779,13 +766,12 @@ declare <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16>, i1)
define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv1i32:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: lui a0, 209715
@@ -850,13 +836,12 @@ declare <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32>, i1)
define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv2i32:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVE64X-NEXT: vnot.v v9, v8
-; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0
+; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1
+; CHECK-ZVE64X-NEXT: vnot.v v8, v8
; CHECK-ZVE64X-NEXT: lui a0, 349525
+; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9
; CHECK-ZVE64X-NEXT: addi a0, a0, 1365
-; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8
; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1
; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0
; CHECK-ZVE64X-NEXT: lui a0, 209715
@@ -921,13 +906,12 @@ declare <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32>, i1)
define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
; CHECK-ZVE64X-LABEL: cttz_nxv4i32:
; CHECK-ZVE64X: # %bb.0:
-; CHECK-ZVE64X-NEXT: li a0, 1
-; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-ZVE64X-...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
||
let Predicates = GetVTypePredicates<vti>.Predicates in { | ||
// Match VSUB with a small immediate to vadd.vi by negating the immediate. | ||
def : Pat<(sub (vti.Vector vti.RegClass:$rs1), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a bit surprised that DAGCombiner doesn't canonicalize this, but I checked the code and it's only canonicalizing scalars.
We'd already had this transform for the intrinsics, but hadn't added it for either fixed length or scalable vectors coming from normal IR.
For the record, the fact we have three different sets of patterns here really is quite ugly.