Skip to content

Commit 4197386

Browse files
[LLVM][SelectionDAG] Remove scalable vector restriction from poison analysis. (#102504)
The following functions have an early exit for scalable vectors: SelectionDAG::canCreateUndefOrPoison SelectionDAG:isGuaranteedNotToBeUndefOrPoison The implementations of these don't look to be sensitive to the vector type other than some uses of demanded elts analysis that doesn't fully support scalable types. That said the initial calculation demands all elements and so I've followed the same scheme as used by TargetLowering::SimplifyDemandedBits.
1 parent ed7ad0a commit 4197386

File tree

6 files changed

+144
-30
lines changed

6 files changed

+144
-30
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5140,12 +5140,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
51405140
if (Op.getOpcode() == ISD::FREEZE)
51415141
return true;
51425142

5143-
// TODO: Assume we don't know anything for now.
51445143
EVT VT = Op.getValueType();
5145-
if (VT.isScalableVector())
5146-
return false;
5147-
5148-
APInt DemandedElts = VT.isVector()
5144+
APInt DemandedElts = VT.isFixedLengthVector()
51495145
? APInt::getAllOnes(VT.getVectorNumElements())
51505146
: APInt(1, 1);
51515147
return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
@@ -5190,6 +5186,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
51905186
}
51915187
return true;
51925188

5189+
case ISD::SPLAT_VECTOR:
5190+
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
5191+
Depth + 1);
5192+
51935193
case ISD::VECTOR_SHUFFLE: {
51945194
APInt DemandedLHS, DemandedRHS;
51955195
auto *SVN = cast<ShuffleVectorSDNode>(Op);
@@ -5236,12 +5236,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
52365236
bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
52375237
bool ConsiderFlags,
52385238
unsigned Depth) const {
5239-
// TODO: Assume we don't know anything for now.
52405239
EVT VT = Op.getValueType();
5241-
if (VT.isScalableVector())
5242-
return true;
5243-
5244-
APInt DemandedElts = VT.isVector()
5240+
APInt DemandedElts = VT.isFixedLengthVector()
52455241
? APInt::getAllOnes(VT.getVectorNumElements())
52465242
: APInt(1, 1);
52475243
return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
@@ -5251,11 +5247,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
52515247
bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
52525248
bool PoisonOnly, bool ConsiderFlags,
52535249
unsigned Depth) const {
5254-
// TODO: Assume we don't know anything for now.
5255-
EVT VT = Op.getValueType();
5256-
if (VT.isScalableVector())
5257-
return true;
5258-
52595250
if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
52605251
return true;
52615252

@@ -5292,6 +5283,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
52925283
case ISD::BITCAST:
52935284
case ISD::BUILD_VECTOR:
52945285
case ISD::BUILD_PAIR:
5286+
case ISD::SPLAT_VECTOR:
52955287
return false;
52965288

52975289
case ISD::SELECT_CC:

llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
229229
; CHECK-NEXT: mov z6.d, z1.d
230230
; CHECK-NEXT: mov z7.d, z0.d
231231
; CHECK-NEXT: add x2, x2, x11
232-
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
233-
; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
232+
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
233+
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
234234
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
235235
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
236236
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]

llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,8 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
319319
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
320320
; CHECK-LABEL: ctz_and_nxv16i1:
321321
; CHECK: // %bb.0:
322+
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
322323
; CHECK-NEXT: ptrue p1.b
323-
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
324-
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
325324
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
326325
; CHECK-NEXT: cntp x0, p0, p0.b
327326
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0

llvm/test/CodeGen/AArch64/sve-fcmp.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,3 +544,119 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
544544
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
545545
ret %svboolx2 %ins.2
546546
}
547+
548+
define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
549+
; CHECK-LABEL: logical_and_oeq_zero_pred:
550+
; CHECK: // %bb.0:
551+
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
552+
; CHECK-NEXT: ret
553+
%y = fcmp oeq <vscale x 8 x half> %x, zeroinitializer
554+
%z = select <vscale x 8 x i1> %pg, <vscale x 8 x i1> %y, <vscale x 8 x i1> zeroinitializer
555+
ret <vscale x 8 x i1> %z
556+
}
557+
558+
define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x half> %x) {
559+
; CHECK-LABEL: logical_and_ogt_zero_pred:
560+
; CHECK: // %bb.0:
561+
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, #0.0
562+
; CHECK-NEXT: ret
563+
%y = fcmp ogt <vscale x 4 x half> %x, zeroinitializer
564+
%z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
565+
ret <vscale x 4 x i1> %z
566+
}
567+
568+
define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x half> %x) {
569+
; CHECK-LABEL: logical_and_oge_zero_pred:
570+
; CHECK: // %bb.0:
571+
; CHECK-NEXT: fcmge p0.h, p0/z, z0.h, #0.0
572+
; CHECK-NEXT: ret
573+
%y = fcmp oge <vscale x 2 x half> %x, zeroinitializer
574+
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
575+
ret <vscale x 2 x i1> %z
576+
}
577+
578+
define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
579+
; CHECK-LABEL: logical_and_olt_zero_pred:
580+
; CHECK: // %bb.0:
581+
; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0
582+
; CHECK-NEXT: ret
583+
%y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
584+
%z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
585+
ret <vscale x 4 x i1> %z
586+
}
587+
588+
define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x float> %x) {
589+
; CHECK-LABEL: logical_and_ole_zero_pred:
590+
; CHECK: // %bb.0:
591+
; CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #0.0
592+
; CHECK-NEXT: ret
593+
%y = fcmp ole <vscale x 2 x float> %x, zeroinitializer
594+
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
595+
ret <vscale x 2 x i1> %z
596+
}
597+
598+
define <vscale x 2 x i1> @logical_and_une_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
599+
; CHECK-LABEL: logical_and_une_zero_pred:
600+
; CHECK: // %bb.0:
601+
; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0
602+
; CHECK-NEXT: ret
603+
%y = fcmp une <vscale x 2 x double> %x, zeroinitializer
604+
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
605+
ret <vscale x 2 x i1> %z
606+
}
607+
608+
define %svboolx2 @logical_and_of_multiuse_fcmp_ogt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
609+
; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt:
610+
; CHECK: // %bb.0:
611+
; CHECK-NEXT: ptrue p1.s
612+
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, z1.s
613+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
614+
; CHECK-NEXT: ret
615+
%cmp = fcmp ogt <vscale x 4 x float> %x, %y
616+
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
617+
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
618+
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
619+
ret %svboolx2 %ins.2
620+
}
621+
622+
define %svboolx2 @logical_and_of_multiuse_fcmp_ogt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
623+
; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt_zero:
624+
; CHECK: // %bb.0:
625+
; CHECK-NEXT: ptrue p1.s
626+
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, #0.0
627+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
628+
; CHECK-NEXT: ret
629+
%cmp = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
630+
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
631+
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
632+
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
633+
ret %svboolx2 %ins.2
634+
}
635+
636+
define %svboolx2 @logical_and_of_multiuse_fcmp_olt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
637+
; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt:
638+
; CHECK: // %bb.0:
639+
; CHECK-NEXT: ptrue p1.s
640+
; CHECK-NEXT: fcmgt p1.s, p1/z, z1.s, z0.s
641+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
642+
; CHECK-NEXT: ret
643+
%cmp = fcmp olt <vscale x 4 x float> %x, %y
644+
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
645+
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
646+
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
647+
ret %svboolx2 %ins.2
648+
}
649+
650+
define %svboolx2 @logical_and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
651+
; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt_zero:
652+
; CHECK: // %bb.0:
653+
; CHECK-NEXT: ptrue p1.s
654+
; CHECK-NEXT: fcmlt p1.s, p1/z, z0.s, #0.0
655+
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
656+
; CHECK-NEXT: ret
657+
%cmp = fcmp olt <vscale x 4 x float> %x, zeroinitializer
658+
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
659+
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
660+
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
661+
ret %svboolx2 %ins.2
662+
}

llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
2424
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
2525
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
2626
; CHECK-NEXT: add z0.d, z2.d, z1.d
27-
; CHECK-NEXT: not p2.b, p0/z, p2.b
28-
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
27+
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
2928
; CHECK-NEXT: mov z0.d, p2/m, z2.d
3029
; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
3130
; CHECK-NEXT: uaddv d0, p0, z0.d

llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,18 +1501,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_zext_i1(<vscale x 8 x i1> %va, i16 %b)
15011501
; RV32: # %bb.0:
15021502
; RV32-NEXT: slli a0, a0, 16
15031503
; RV32-NEXT: srli a0, a0, 16
1504-
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1504+
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
15051505
; RV32-NEXT: vmv.v.x v8, a0
1506-
; RV32-NEXT: vadd.vi v8, v8, 1, v0.t
1506+
; RV32-NEXT: addi a0, a0, 1
1507+
; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
15071508
; RV32-NEXT: ret
15081509
;
15091510
; RV64-LABEL: vwadd_vx_splat_zext_i1:
15101511
; RV64: # %bb.0:
15111512
; RV64-NEXT: slli a0, a0, 48
15121513
; RV64-NEXT: srli a0, a0, 48
1513-
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1514+
; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1515+
; RV64-NEXT: vmv.v.x v12, a0
1516+
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
15141517
; RV64-NEXT: vmv.v.x v8, a0
1515-
; RV64-NEXT: vadd.vi v8, v8, 1, v0.t
1518+
; RV64-NEXT: li a0, 1
1519+
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1520+
; RV64-NEXT: vwaddu.vx v8, v12, a0, v0.t
15161521
; RV64-NEXT: ret
15171522
%zb = zext i16 %b to i32
15181523
%head = insertelement <vscale x 8 x i32> poison, i32 %zb, i32 0
@@ -1570,20 +1575,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_sext_i1(<vscale x 8 x i1> %va, i16 %b)
15701575
; RV32: # %bb.0:
15711576
; RV32-NEXT: slli a0, a0, 16
15721577
; RV32-NEXT: srai a0, a0, 16
1573-
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1578+
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
15741579
; RV32-NEXT: vmv.v.x v8, a0
1575-
; RV32-NEXT: li a0, 1
1576-
; RV32-NEXT: vsub.vx v8, v8, a0, v0.t
1580+
; RV32-NEXT: addi a0, a0, -1
1581+
; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
15771582
; RV32-NEXT: ret
15781583
;
15791584
; RV64-LABEL: vwadd_vx_splat_sext_i1:
15801585
; RV64: # %bb.0:
15811586
; RV64-NEXT: slli a0, a0, 48
15821587
; RV64-NEXT: srai a0, a0, 48
1583-
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
1588+
; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
1589+
; RV64-NEXT: vmv.v.x v12, a0
1590+
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
15841591
; RV64-NEXT: vmv.v.x v8, a0
15851592
; RV64-NEXT: li a0, 1
1586-
; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
1593+
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
1594+
; RV64-NEXT: vwsub.vx v8, v12, a0, v0.t
15871595
; RV64-NEXT: ret
15881596
%sb = sext i16 %b to i32
15891597
%head = insertelement <vscale x 8 x i32> poison, i32 %sb, i32 0

0 commit comments

Comments
 (0)