Skip to content

[LLVM][SelectionDAG] Remove scalable vector restriction from poison analysis. #102504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5140,12 +5140,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
if (Op.getOpcode() == ISD::FREEZE)
return true;

// TODO: Assume we don't know anything for now.
EVT VT = Op.getValueType();
if (VT.isScalableVector())
return false;

APInt DemandedElts = VT.isVector()
APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
Expand Down Expand Up @@ -5190,6 +5186,10 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
}
return true;

case ISD::SPLAT_VECTOR:
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
Depth + 1);

case ISD::VECTOR_SHUFFLE: {
APInt DemandedLHS, DemandedRHS;
auto *SVN = cast<ShuffleVectorSDNode>(Op);
Expand Down Expand Up @@ -5236,12 +5236,8 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
bool ConsiderFlags,
unsigned Depth) const {
// TODO: Assume we don't know anything for now.
EVT VT = Op.getValueType();
if (VT.isScalableVector())
return true;

APInt DemandedElts = VT.isVector()
APInt DemandedElts = VT.isFixedLengthVector()
? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags,
Expand All @@ -5251,11 +5247,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly,
bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
bool PoisonOnly, bool ConsiderFlags,
unsigned Depth) const {
// TODO: Assume we don't know anything for now.
EVT VT = Op.getValueType();
if (VT.isScalableVector())
return true;

if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
return true;

Expand Down Expand Up @@ -5292,6 +5283,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BITCAST:
case ISD::BUILD_VECTOR:
case ISD::BUILD_PAIR:
case ISD::SPLAT_VECTOR:
return false;

case ISD::SELECT_CC:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-NEXT: mov z6.d, z1.d
; CHECK-NEXT: mov z7.d, z0.d
; CHECK-NEXT: add x2, x2, x11
; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0
; CHECK-NEXT: and p1.b, p1/z, p1.b, p2.b
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,8 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ctz_and_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z1.b
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpne p2.b, p1/z, z0.b, z1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
; CHECK-NEXT: brkb p0.b, p1/z, p0.b
; CHECK-NEXT: cntp x0, p0, p0.b
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
Expand Down
116 changes: 116 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-fcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,119 @@ define %svboolx2 @and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
ret %svboolx2 %ins.2
}

define <vscale x 8 x i1> @logical_and_oeq_zero_pred(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
; CHECK-LABEL: logical_and_oeq_zero_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 8 x half> %x, zeroinitializer
%z = select <vscale x 8 x i1> %pg, <vscale x 8 x i1> %y, <vscale x 8 x i1> zeroinitializer
ret <vscale x 8 x i1> %z
}

define <vscale x 4 x i1> @logical_and_ogt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x half> %x) {
; CHECK-LABEL: logical_and_ogt_zero_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, #0.0
; CHECK-NEXT: ret
%y = fcmp ogt <vscale x 4 x half> %x, zeroinitializer
%z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
ret <vscale x 4 x i1> %z
}

define <vscale x 2 x i1> @logical_and_oge_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x half> %x) {
; CHECK-LABEL: logical_and_oge_zero_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmge p0.h, p0/z, z0.h, #0.0
; CHECK-NEXT: ret
%y = fcmp oge <vscale x 2 x half> %x, zeroinitializer
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
ret <vscale x 2 x i1> %z
}

define <vscale x 4 x i1> @logical_and_olt_zero_pred(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
; CHECK-LABEL: logical_and_olt_zero_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
%z = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %y, <vscale x 4 x i1> zeroinitializer
ret <vscale x 4 x i1> %z
}

define <vscale x 2 x i1> @logical_and_ole_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x float> %x) {
; CHECK-LABEL: logical_and_ole_zero_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp ole <vscale x 2 x float> %x, zeroinitializer
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
ret <vscale x 2 x i1> %z
}

define <vscale x 2 x i1> @logical_and_une_zero_pred(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
; CHECK-LABEL: logical_and_une_zero_pred:
; CHECK: // %bb.0:
; CHECK-NEXT: fcmne p0.d, p0/z, z0.d, #0.0
; CHECK-NEXT: ret
%y = fcmp une <vscale x 2 x double> %x, zeroinitializer
%z = select <vscale x 2 x i1> %pg, <vscale x 2 x i1> %y, <vscale x 2 x i1> zeroinitializer
ret <vscale x 2 x i1> %z
}

define %svboolx2 @logical_and_of_multiuse_fcmp_ogt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, z1.s
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp ogt <vscale x 4 x float> %x, %y
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
ret %svboolx2 %ins.2
}

define %svboolx2 @logical_and_of_multiuse_fcmp_ogt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
; CHECK-LABEL: logical_and_of_multiuse_fcmp_ogt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, #0.0
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
ret %svboolx2 %ins.2
}

define %svboolx2 @logical_and_of_multiuse_fcmp_olt(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmgt p1.s, p1/z, z1.s, z0.s
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp olt <vscale x 4 x float> %x, %y
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
ret %svboolx2 %ins.2
}

define %svboolx2 @logical_and_of_multiuse_fcmp_olt_zero(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
; CHECK-LABEL: logical_and_of_multiuse_fcmp_olt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: fcmlt p1.s, p1/z, z0.s, #0.0
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%cmp = fcmp olt <vscale x 4 x float> %x, zeroinitializer
%and = select <vscale x 4 x i1> %pg, <vscale x 4 x i1> %cmp, <vscale x 4 x i1> zeroinitializer
%ins.1 = insertvalue %svboolx2 poison, <vscale x 4 x i1> %and, 0
%ins.2 = insertvalue %svboolx2 %ins.1, <vscale x 4 x i1> %cmp, 1
ret %svboolx2 %ins.2
}
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s
; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s
; CHECK-NEXT: add z0.d, z2.d, z1.d
; CHECK-NEXT: not p2.b, p0/z, p2.b
; CHECK-NEXT: and p2.b, p1/z, p1.b, p2.b
; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b
; CHECK-NEXT: mov z0.d, p2/m, z2.d
; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d
; CHECK-NEXT: uaddv d0, p0, z0.d
Expand Down
26 changes: 17 additions & 9 deletions llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1501,18 +1501,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_zext_i1(<vscale x 8 x i1> %va, i16 %b)
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vadd.vi v8, v8, 1, v0.t
; RV32-NEXT: addi a0, a0, 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(not blocking discussion for this review)

@topperc What's your feeling on this change? This looks like either the before or after are about equal to me, do you agree?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed it looks about equal

; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
; RV32-NEXT: ret
;
; RV64-LABEL: vwadd_vx_splat_zext_i1:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vadd.vi v8, v8, 1, v0.t
; RV64-NEXT: li a0, 1
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; RV64-NEXT: vwaddu.vx v8, v12, a0, v0.t
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(non blocking for this review, continuing from previous)

This one looks a bit more questionable. It looks like maybe we need a guard in the vwadd combine for the case where the RHS is a legal immediate? It'd be really useful here if we have a vwadd.vi form, but we don't. Ignoring the passthru issue, which form do you think is likely better - vwaddu.vx w/immediate in register or vzext.vf + vadd.vi?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vzext.vf + vadd.vi seems like more work in the vector ALU. So I think vwaddu.vx w/immediate is better.

Hopefully we can find a way to fix this to match the RV32 codegen.

; RV64-NEXT: ret
%zb = zext i16 %b to i32
%head = insertelement <vscale x 8 x i32> poison, i32 %zb, i32 0
Expand Down Expand Up @@ -1570,20 +1575,23 @@ define <vscale x 8 x i32> @vwadd_vx_splat_sext_i1(<vscale x 8 x i1> %va, i16 %b)
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 16
; RV32-NEXT: srai a0, a0, 16
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: li a0, 1
; RV32-NEXT: vsub.vx v8, v8, a0, v0.t
; RV32-NEXT: addi a0, a0, -1
; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
; RV32-NEXT: ret
;
; RV64-LABEL: vwadd_vx_splat_sext_i1:
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srai a0, a0, 48
; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu
; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: li a0, 1
; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; RV64-NEXT: vwsub.vx v8, v12, a0, v0.t
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(non blocking for this review, continuing from previous)

In this case, the rv32 codegen looks clearly better than the rv64.

; RV64-NEXT: ret
%sb = sext i16 %b to i32
%head = insertelement <vscale x 8 x i32> poison, i32 %sb, i32 0
Expand Down
Loading