Skip to content

Commit d4e9e6b

Browse files
[LLVM][SelectionDAG] Remove scalable vector restriction from poison analysis.
The following functions have an early exit for scalable vectors: SelectionDAG::canCreateUndefOrPoison SelectionDAG:isGuaranteedNotToBeUndefOrPoison The implementations of these don't look to be sensitive to the vector types other than some uses of demanded elts analysis that doesn't fully support scalable types. That said the initial calculation demans all elements and so I've followed the same scheme as used by TargetLowering::SimplifyDemandedBits.
1 parent 9e6f2b4 commit d4e9e6b

File tree

6 files changed

+43
-52
lines changed

6 files changed

+43
-52
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5140,14 +5140,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
51405140
if (Op.getOpcode() == ISD::FREEZE)
51415141
return true;
51425142

5143-
// TODO: Assume we don't know anything for now.
51445143
EVT VT = Op.getValueType();
5145-
if (VT.isScalableVector())
5146-
return false;
5147-
5148-
APInt DemandedElts = VT.isVector()
5149-
? APInt::getAllOnes(VT.getVectorNumElements())
5150-
: APInt(1, 1);
5144+
APInt DemandedElts =
5145+
VT.isVector() ? APInt::getAllOnes(
5146+
VT.isScalableVector() ? 1 : VT.getVectorNumElements())
5147+
: APInt(1, 1);
51515148
return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
51525149
}
51535150

@@ -5190,6 +5187,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
51905187
}
51915188
return true;
51925189

5190+
case ISD::SPLAT_VECTOR:
5191+
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
5192+
Depth + 1);
5193+
51935194
case ISD::VECTOR_SHUFFLE: {
51945195
APInt DemandedLHS, DemandedRHS;
51955196
auto *SVN = cast<ShuffleVectorSDNode>(Op);
@@ -5236,26 +5237,21 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
52365237
bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
52375238
bool ConsiderFlags,
52385239
unsigned Depth) const {
5239-
// TODO: Assume we don't know anything for now.
5240+
// Since the number of lanes in a scalable vector is unknown at compile time,
5241+
// we track one bit which is implicitly broadcast to all lanes. This means
5242+
// that all lanes in a scalable vector are considered demanded.
52405243
EVT VT = Op.getValueType();
5241-
if (VT.isScalableVector())
5242-
return true;
5243-
5244-
APInt DemandedElts = VT.isVector()
5245-
? APInt::getAllOnes(VT.getVectorNumElements())
5246-
: APInt(1, 1);
5244+
APInt DemandedElts =
5245+
VT.isVector() ? APInt::getAllOnes(
5246+
VT.isScalableVector() ? 1 : VT.getVectorNumElements())
5247+
: APInt(1, 1);
52475248
return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
52485249
Depth);
52495250
}
52505251

52515252
bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
52525253
bool PoisonOnly, bool ConsiderFlags,
52535254
unsigned Depth) const {
5254-
// TODO: Assume we don't know anything for now.
5255-
EVT VT = Op.getValueType();
5256-
if (VT.isScalableVector())
5257-
return true;
5258-
52595255
if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
52605256
return true;
52615257

@@ -5292,6 +5288,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
52925288
case ISD::BITCAST:
52935289
case ISD::BUILD_VECTOR:
52945290
case ISD::BUILD_PAIR:
5291+
case ISD::SPLAT_VECTOR:
52955292
return false;
52965293

52975294
case ISD::SELECT_CC:

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
229229
; CHECK-NEXT: mov z6.d, z1.d
230230
; CHECK-NEXT: mov z7.d, z0.d
231231
; CHECK-NEXT: add x2, x2, x11
232-
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
233-
; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
232+
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
233+
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
234234
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
235235
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
236236
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]

llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,8 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
319319
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
320320
; CHECK-LABEL: ctz_and_nxv16i1:
321321
; CHECK: // %bb.0:
322+
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
322323
; CHECK-NEXT: ptrue p1.b
323-
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
324-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
325324
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
326325
; CHECK-NEXT: cntp x0, p0, p0.b
327326
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0

llvm/test/CodeGen/AArch64/sve-fcmp.ll

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -548,9 +548,7 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
548548
define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
549549
; CHECK-LABEL: logical_and_oeq_zero_pred:
550550
; CHECK: // %bb.0:
551-
; CHECK-NEXT: ptrue p1.h
552-
; CHECK-NEXT: fcmeq p1.h, p1/z, z0.h, #0.0
553-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
551+
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
554552
; CHECK-NEXT: ret
555553
%y = fcmp oeq <vscale x 8 x half> %x, zeroinitializer
556554
%z = select <vscale x 8 x i1> %pg, <vscale x 8 x i1> %y, <vscale x 8 x i1> zeroinitializer
@@ -560,9 +558,7 @@ define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vsca
560558
define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x half> %x) {
561559
; CHECK-LABEL: logical_and_ogt_zero_pred:
562560
; CHECK: // %bb.0:
563-
; CHECK-NEXT: ptrue p1.s
564-
; CHECK-NEXT: fcmgt p1.h, p1/z, z0.h, #0.0
565-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
561+
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, #0.0
566562
; CHECK-NEXT: ret
567563
%y = fcmp ogt <vscale x 4 x half> %x, zeroinitializer
568564
%z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
@@ -572,9 +568,7 @@ define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vsca
572568
define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x half> %x) {
573569
; CHECK-LABEL: logical_and_oge_zero_pred:
574570
; CHECK: // %bb.0:
575-
; CHECK-NEXT: ptrue p1.d
576-
; CHECK-NEXT: fcmge p1.h, p1/z, z0.h, #0.0
577-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
571+
; CHECK-NEXT: fcmge p0.h, p0/z, z0.h, #0.0
578572
; CHECK-NEXT: ret
579573
%y = fcmp oge <vscale x 2 x half> %x, zeroinitializer
580574
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
@@ -584,9 +578,7 @@ define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vsca
584578
define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
585579
; CHECK-LABEL: logical_and_olt_zero_pred:
586580
; CHECK: // %bb.0:
587-
; CHECK-NEXT: ptrue p1.s
588-
; CHECK-NEXT: fcmlt p1.s, p1/z, z0.s, #0.0
589-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
581+
; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0
590582
; CHECK-NEXT: ret
591583
%y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
592584
%z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
@@ -596,9 +588,7 @@ define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vsca
596588
define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x float> %x) {
597589
; CHECK-LABEL: logical_and_ole_zero_pred:
598590
; CHECK: // %bb.0:
599-
; CHECK-NEXT: ptrue p1.d
600-
; CHECK-NEXT: fcmle p1.s, p1/z, z0.s, #0.0
601-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
591+
; CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #0.0
602592
; CHECK-NEXT: ret
603593
%y = fcmp ole <vscale x 2 x float> %x, zeroinitializer
604594
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
@@ -608,9 +598,7 @@ define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vsca
608598
define <vscale x 2 x i1> @logical_and_une_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
609599
; CHECK-LABEL: logical_and_une_zero_pred:
610600
; CHECK: // %bb.0:
611-
; CHECK-NEXT: ptrue p1.d
612-
; CHECK-NEXT: fcmne p1.d, p1/z, z0.d, #0.0
613-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
601+
; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0
614602
; CHECK-NEXT: ret
615603
%y = fcmp une <vscale x 2 x double> %x, zeroinitializer
616604
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer

llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
2424
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
2525
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
2626
; CHECK-NEXT: add z0.d, z2.d, z1.d
27-
; CHECK-NEXT: not p2.b, p0/z, p2.b
28-
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
27+
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
2928
; CHECK-NEXT: mov z0.d, p2/m, z2.d
3029
; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
3130
; CHECK-NEXT: uaddv d0, p0, z0.d

llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,18 +1501,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_zext_i1(<vscale x 8 x i1> %va, i16 %b)
15011501
; RV32: # %bb.0:
15021502
; RV32-NEXT: slli a0, a0, 16
15031503
; RV32-NEXT: srli a0, a0, 16
1504-
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1504+
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
15051505
; RV32-NEXT: vmv.v.x v8, a0
1506-
; RV32-NEXT: vadd.vi v8, v8, 1, v0.t
1506+
; RV32-NEXT: addi a0, a0, 1
1507+
; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
15071508
; RV32-NEXT: ret
15081509
;
15091510
; RV64-LABEL: vwadd_vx_splat_zext_i1:
15101511
; RV64: # %bb.0:
15111512
; RV64-NEXT: slli a0, a0, 48
15121513
; RV64-NEXT: srli a0, a0, 48
1513-
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1514+
; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1515+
; RV64-NEXT: vmv.v.x v12, a0
1516+
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
15141517
; RV64-NEXT: vmv.v.x v8, a0
1515-
; RV64-NEXT: vadd.vi v8, v8, 1, v0.t
1518+
; RV64-NEXT: li a0, 1
1519+
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1520+
; RV64-NEXT: vwaddu.vx v8, v12, a0, v0.t
15161521
; RV64-NEXT: ret
15171522
%zb = zext i16 %b to i32
15181523
%head = insertelement <vscale x 8 x i32> poison, i32 %zb, i32 0
@@ -1570,20 +1575,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_sext_i1(<vscale x 8 x i1> %va, i16 %b)
15701575
; RV32: # %bb.0:
15711576
; RV32-NEXT: slli a0, a0, 16
15721577
; RV32-NEXT: srai a0, a0, 16
1573-
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1578+
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
15741579
; RV32-NEXT: vmv.v.x v8, a0
1575-
; RV32-NEXT: li a0, 1
1576-
; RV32-NEXT: vsub.vx v8, v8, a0, v0.t
1580+
; RV32-NEXT: addi a0, a0, -1
1581+
; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
15771582
; RV32-NEXT: ret
15781583
;
15791584
; RV64-LABEL: vwadd_vx_splat_sext_i1:
15801585
; RV64: # %bb.0:
15811586
; RV64-NEXT: slli a0, a0, 48
15821587
; RV64-NEXT: srai a0, a0, 48
1583-
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1588+
; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1589+
; RV64-NEXT: vmv.v.x v12, a0
1590+
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
15841591
; RV64-NEXT: vmv.v.x v8, a0
15851592
; RV64-NEXT: li a0, 1
1586-
; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
1593+
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1594+
; RV64-NEXT: vwsub.vx v8, v12, a0, v0.t
15871595
; RV64-NEXT: ret
15881596
%sb = sext i16 %b to i32
15891597
%head = insertelement <vscale x 8 x i32> poison, i32 %sb, i32 0

0 commit comments

Comments
 (0)