[LLVM][SelectionDAG] Remove scalable vector restriction from poison analysis. (#102504)

paulwalker-arm · web-flow · commit 4197386dbde3 · 2024-08-13T12:53:20.000+01:00
The following functions have an early exit for scalable vectors:
  SelectionDAG::canCreateUndefOrPoison
  SelectionDAG:isGuaranteedNotToBeUndefOrPoison
    
The implementations of these don't look to be sensitive to the
vector type other than some uses of demanded elts analysis that
doesn't fully support scalable types.  That said the initial
calculation demands all elements and so I've followed the same
scheme as used by TargetLowering::SimplifyDemandedBits.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5140,12 +5140,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
   if (Op.getOpcode() == ISD::FREEZE)
     return true;
 
-  // TODO: Assume we don't know anything for now.
   EVT VT = Op.getValueType();
-  if (VT.isScalableVector())
-    return false;
-
-  APInt DemandedElts = VT.isVector()
+  APInt DemandedElts = VT.isFixedLengthVector()
                            ? APInt::getAllOnes(VT.getVectorNumElements())
                            : APInt(1, 1);
   return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
@@ -5190,6 +5186,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
     }
     return true;
 
+  case ISD::SPLAT_VECTOR:
+    return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
+                                            Depth + 1);
+
   case ISD::VECTOR_SHUFFLE: {
     APInt DemandedLHS, DemandedRHS;
     auto *SVN = cast<ShuffleVectorSDNode>(Op);
@@ -5236,12 +5236,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
                                           bool ConsiderFlags,
                                           unsigned Depth) const {
-  // TODO: Assume we don't know anything for now.
   EVT VT = Op.getValueType();
-  if (VT.isScalableVector())
-    return true;
-
-  APInt DemandedElts = VT.isVector()
+  APInt DemandedElts = VT.isFixedLengthVector()
                            ? APInt::getAllOnes(VT.getVectorNumElements())
                            : APInt(1, 1);
   return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
@@ -5251,11 +5247,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
                                           bool PoisonOnly, bool ConsiderFlags,
                                           unsigned Depth) const {
-  // TODO: Assume we don't know anything for now.
-  EVT VT = Op.getValueType();
-  if (VT.isScalableVector())
-    return true;
-
   if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
     return true;
 
@@ -5292,6 +5283,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   case ISD::BITCAST:
   case ISD::BUILD_VECTOR:
   case ISD::BUILD_PAIR:
+  case ISD::SPLAT_VECTOR:
     return false;
 
   case ISD::SELECT_CC:
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -229,8 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
 ; CHECK-NEXT:    mov z6.d, z1.d
 ; CHECK-NEXT:    mov z7.d, z0.d
 ; CHECK-NEXT:    add x2, x2, x11
-; CHECK-NEXT:    cmpne p2.d, p0/z, z2.d, #0
-; CHECK-NEXT:    and p1.b, p1/z, p1.b, p2.b
+; CHECK-NEXT:    and z2.d, z2.d, #0xffffffff
+; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
 ; CHECK-NEXT:    zip2 p2.d, p1.d, p1.d
 ; CHECK-NEXT:    zip1 p1.d, p1.d, p1.d
 ; CHECK-NEXT:    ld1d { z2.d }, p2/z, [x0, #1, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -319,9 +319,8 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
 define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: ctz_and_nxv16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmpne p2.b, p1/z, z0.b, z1.b
-; CHECK-NEXT:    and p0.b, p0/z, p0.b, p2.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -544,3 +544,119 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
   %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
   ret %svboolx2 %ins.2
 }
+
+define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
+; CHECK-LABEL: logical_and_oeq_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 8 x half> %x, zeroinitializer
+  %z = select <vscale x 8 x i1> %pg, <vscale x 8 x i1> %y, <vscale x 8 x i1> zeroinitializer
+ ret <vscale x 8 x i1> %z
+}
+
+define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x half> %x) {
+; CHECK-LABEL: logical_and_ogt_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmgt p0.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %y = fcmp ogt <vscale x 4 x half> %x, zeroinitializer
+  %z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
+  ret <vscale x 4 x i1> %z
+}
+
+define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x half> %x) {
+; CHECK-LABEL: logical_and_oge_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmge p0.h, p0/z, z0.h, #0.0
+; CHECK-NEXT:    ret
+  %y = fcmp oge <vscale x 2 x half> %x, zeroinitializer
+  %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
+  ret <vscale x 2 x i1> %z
+}
+
+define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: logical_and_olt_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmlt p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
+  %z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
+  ret <vscale x 4 x i1> %z
+}
+
+define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x float> %x) {
+; CHECK-LABEL: logical_and_ole_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmle p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT:    ret
+  %y = fcmp ole <vscale x 2 x float> %x, zeroinitializer
+  %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
+  ret <vscale x 2 x i1> %z
+}
+
+define <vscale x 2 x i1> @logical_and_une_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
+; CHECK-LABEL: logical_and_une_zero_pred:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmne p0.d, p0/z, z0.d, #0.0
+; CHECK-NEXT:    ret
+  %y = fcmp une <vscale x 2 x double> %x, zeroinitializer
+  %z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
+  ret <vscale x 2 x i1> %z
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_ogt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmgt p1.s, p1/z, z0.s, z1.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp ogt <vscale x 4 x float> %x, %y
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_ogt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmgt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_olt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmgt p1.s, p1/z, z1.s, z0.s
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp olt <vscale x 4 x float> %x, %y
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}
+
+define %svboolx2 @logical_and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    fcmlt p1.s, p1/z, z0.s, #0.0
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %cmp = fcmp olt <vscale x 4 x float> %x, zeroinitializer
+  %and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
+  %ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
+  %ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
+  ret %svboolx2 %ins.2
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
@@ -24,8 +24,7 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z4.s
 ; CHECK-NEXT:    fcmge p2.s, p0/z, z0.s, z3.s
 ; CHECK-NEXT:    add z0.d, z2.d, z1.d
-; CHECK-NEXT:    not p2.b, p0/z, p2.b
-; CHECK-NEXT:    and p2.b, p1/z, p1.b, p2.b
+; CHECK-NEXT:    bic p2.b, p1/z, p1.b, p2.b
 ; CHECK-NEXT:    mov z0.d, p2/m, z2.d
 ; CHECK-NEXT:    sel z0.d, p1, z0.d, z2.d
 ; CHECK-NEXT:    uaddv d0, p0, z0.d
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1501,18 +1501,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_zext_i1(<vscale x 8 x i1> %va, i16 %b)
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    slli a0, a0, 16
 ; RV32-NEXT:    srli a0, a0, 16
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    vadd.vi v8, v8, 1, v0.t
+; RV32-NEXT:    addi a0, a0, 1
+; RV32-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vwadd_vx_splat_zext_i1:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srli a0, a0, 48
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a0
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vmv.v.x v8, a0
-; RV64-NEXT:    vadd.vi v8, v8, 1, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; RV64-NEXT:    vwaddu.vx v8, v12, a0, v0.t
 ; RV64-NEXT:    ret
   %zb = zext i16 %b to i32
   %head = insertelement <vscale x 8 x i32> poison, i32 %zb, i32 0
@@ -1570,20 +1575,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_sext_i1(<vscale x 8 x i1> %va, i16 %b)
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    slli a0, a0, 16
 ; RV32-NEXT:    srai a0, a0, 16
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v8, a0
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    vsub.vx v8, v8, a0, v0.t
+; RV32-NEXT:    addi a0, a0, -1
+; RV32-NEXT:    vmerge.vxm v8, v8, a0, v0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vwadd_vx_splat_sext_i1:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    slli a0, a0, 48
 ; RV64-NEXT:    srai a0, a0, 48
-; RV64-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV64-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a0
+; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 ; RV64-NEXT:    vmv.v.x v8, a0
 ; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
+; RV64-NEXT:    vwsub.vx v8, v12, a0, v0.t
 ; RV64-NEXT:    ret
   %sb = sext i16 %b to i32
   %head = insertelement <vscale x 8 x i32> poison, i32 %sb, i32 0