Skip to content

[DAG] SimplifyDemandedBits - ensure we demand the high bits for shl nsw/nuw ops #70041

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1785,14 +1785,22 @@ bool TargetLowering::SimplifyDemandedBits(
}

APInt InDemandedMask = DemandedBits.lshr(ShAmt);

// If the shift is NUW/NSW, then it does demand the high bits.
if (Op->getFlags().hasNoSignedWrap())
InDemandedMask.setHighBits(ShAmt + 1);
else if (Op->getFlags().hasNoUnsignedWrap())
InDemandedMask.setHighBits(ShAmt);

if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero <<= ShAmt;
Known.One <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);

Known =
KnownBits::shl(Known, KnownBits::makeConstant(APInt(BitWidth, ShAmt)),
/* NUW */ Op->getFlags().hasNoUnsignedWrap(),
/* NSW */ Op->getFlags().hasNoSignedWrap());

// Attempt to avoid multi-use ops if we don't need anything from them.
if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
Expand Down Expand Up @@ -2255,13 +2263,16 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::CTPOP: {
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// If only bit0 of 'active bits' is demanded, replace with PARITY as long as
// we're before op legalization.
// FIXME: Limit to scalars for now.
if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));

if (!TLO.LegalOps && !VT.isVector()) {
APInt NonZeroMask =
APInt::getLowBitsSet(BitWidth, llvm::bit_width(BitWidth));
if ((DemandedBits & NonZeroMask).isOne())
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, Op.getOperand(0)));
}
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
break;
}
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,9 @@ entry:
define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
; CHECK-LABEL: extendedLeftShiftshortTointBy16:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: lsl w8, w0, #16
; CHECK-NEXT: add w0, w8, #16, lsl #12 ; =65536
; CHECK-NEXT: add w8, w0, #1
; CHECK-NEXT: and w8, w8, #0xffff
; CHECK-NEXT: lsl w0, w8, #16
; CHECK-NEXT: ret
entry:
%inc = add i16 %a, 1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/load-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w10, v0.h[3]
; CHECK-NEXT: lsl w8, w8, #16
; CHECK-NEXT: bfi w8, w9, #8, #8
; CHECK-NEXT: orr w8, w8, w10, lsl #24
; CHECK-NEXT: bfi w8, w10, #24, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
Expand Down Expand Up @@ -609,8 +609,8 @@ define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: ldrh w9, [x0]
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: umov w8, v0.h[2]
; CHECK-NEXT: orr w8, w9, w8, lsl #16
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: bfi w9, w8, #16, #8
; CHECK-NEXT: str w9, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4

Expand Down Expand Up @@ -640,7 +640,7 @@ define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) {
; CHECK-NEXT: umov w8, v0.h[3]
; CHECK-NEXT: umov w9, v0.h[2]
; CHECK-NEXT: lsl w8, w8, #24
; CHECK-NEXT: orr w8, w8, w9, lsl #16
; CHECK-NEXT: bfi w8, w9, #16, #8
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%ld = load <4 x i8>, ptr %in, align 4
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/shl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ define amdgpu_kernel void @shl_i16_i_s(ptr addrspace(1) %out, i16 zeroext %a) {
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s4, s4, 15
; VI-NEXT: s_lshl_b32 s4, s4, 12
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
Expand Down
10 changes: 2 additions & 8 deletions llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[40:41], v3, v4
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[40:41]
; GFX9-O0-NEXT: s_mov_b32 s35, 1
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s35, v3
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[40:41]
; GFX9-O0-NEXT: s_mov_b32 s35, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s35
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4
Expand Down Expand Up @@ -101,7 +99,6 @@ define amdgpu_gfx void @strict_wwm_no_cfg(ptr addrspace(8) inreg %tmp14) {
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4
; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4
; GFX9-O3-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down Expand Up @@ -235,9 +232,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0-NEXT: v_readlane_b32 s35, v0, 3
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, v4
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[36:37]
; GFX9-O0-NEXT: s_mov_b32 s36, 1
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s36, v3
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[36:37]
; GFX9-O0-NEXT: s_mov_b32 s36, 2
; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s36
; GFX9-O0-NEXT: s_mov_b32 s40, s35
Expand Down Expand Up @@ -302,7 +297,6 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down
56 changes: 23 additions & 33 deletions llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,16 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9LE-LABEL: test64:
; P9LE: # %bb.0: # %entry
; P9LE-NEXT: add 5, 3, 4
; P9LE-NEXT: lfdx 0, 3, 4
; P9LE-NEXT: lxsdx 2, 3, 4
; P9LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; P9LE-NEXT: xxlxor 2, 2, 2
; P9LE-NEXT: xxlxor 1, 1, 1
; P9LE-NEXT: vspltisw 4, 8
; P9LE-NEXT: lxsd 3, 4(5)
; P9LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; P9LE-NEXT: vadduwm 4, 4, 4
; P9LE-NEXT: lxv 1, 0(3)
; P9LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; P9LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; P9LE-NEXT: xxperm 2, 0, 1
; P9LE-NEXT: lxv 0, 0(3)
; P9LE-NEXT: xxperm 3, 3, 0
; P9LE-NEXT: xxperm 3, 1, 0
; P9LE-NEXT: xxperm 2, 1, 0
; P9LE-NEXT: vnegw 3, 3
; P9LE-NEXT: vslw 3, 3, 4
; P9LE-NEXT: vsubuwm 2, 3, 2
Expand All @@ -50,11 +47,8 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
; P9BE-NEXT: vadduwm 4, 4, 4
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
; P9BE-NEXT: xxperm 3, 1, 0
; P9BE-NEXT: xxperm 2, 1, 0
; P9BE-NEXT: lxv 0, 0(3)
; P9BE-NEXT: xxperm 3, 3, 0
; P9BE-NEXT: vnegw 3, 3
; P9BE-NEXT: vslw 3, 3, 4
; P9BE-NEXT: vsubuwm 2, 3, 2
Expand All @@ -71,11 +65,9 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxsd 3, 4(5)
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
; P9BE-AIX-NEXT: xxperm 3, 1, 0
; P9BE-AIX-NEXT: xxperm 2, 1, 0
; P9BE-AIX-NEXT: lxv 0, 0(3)
; P9BE-AIX-NEXT: xxperm 3, 3, 0
; P9BE-AIX-NEXT: vnegw 3, 3
; P9BE-AIX-NEXT: vslw 3, 3, 4
; P9BE-AIX-NEXT: vsubuwm 2, 3, 2
Expand All @@ -86,25 +78,23 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32-LABEL: test64:
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: lwzux 4, 3, 4
; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
; P9BE-AIX32-NEXT: stw 4, -48(1)
; P9BE-AIX32-NEXT: lwz 4, 4(3)
; P9BE-AIX32-NEXT: lxv 0, -48(1)
; P9BE-AIX32-NEXT: stw 4, -32(1)
; P9BE-AIX32-NEXT: lwz 4, L..C0(2) # %const.0
; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: lwz 3, 8(3)
; P9BE-AIX32-NEXT: lxv 1, -32(1)
; P9BE-AIX32-NEXT: stw 3, -16(1)
; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1
; P9BE-AIX32-NEXT: lxv 2, 0(4)
; P9BE-AIX32-NEXT: lxv 3, -16(1)
; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
; P9BE-AIX32-NEXT: lxv 0, 0(4)
; P9BE-AIX32-NEXT: xxperm 2, 2, 0
; P9BE-AIX32-NEXT: lxv 0, -16(1)
; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0
; P9BE-AIX32-NEXT: lxv 0, 0(3)
; P9BE-AIX32-NEXT: xxperm 3, 3, 0
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
; P9BE-AIX32-NEXT: xxperm 2, 0, 2
; P9BE-AIX32-NEXT: xxmrghw 3, 1, 3
; P9BE-AIX32-NEXT: xxperm 3, 0, 2
; P9BE-AIX32-NEXT: vnegw 3, 3
; P9BE-AIX32-NEXT: vslw 3, 3, 4
; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2
Expand Down Expand Up @@ -180,7 +170,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX: # %bb.0: # %entry
; P9BE-AIX-NEXT: add 5, 3, 4
; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.0
; P9BE-AIX-NEXT: xxlxor 0, 0, 0
; P9BE-AIX-NEXT: vspltisw 4, 8
; P9BE-AIX-NEXT: lxv 1, 0(3)
Expand All @@ -200,7 +190,7 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
; P9BE-AIX32: # %bb.0: # %entry
; P9BE-AIX32-NEXT: add 5, 3, 4
; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.0
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
; P9BE-AIX32-NEXT: vspltisw 4, 8
; P9BE-AIX32-NEXT: lxv 1, 0(3)
Expand Down Expand Up @@ -297,9 +287,9 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX-NEXT: li 7, 16
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: lxsihzx 1, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1
; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.1
; P9BE-AIX-NEXT: lxsihzx 2, 6, 7
; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0
; P9BE-AIX-NEXT: ld 6, L..C3(2) # %const.0
; P9BE-AIX-NEXT: lxv 0, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 3, 6
Expand Down Expand Up @@ -328,7 +318,7 @@ define void @test16(ptr nocapture readonly %sums, i32 signext %delta, i32 signex
; P9BE-AIX32-NEXT: sth 4, -48(1)
; P9BE-AIX32-NEXT: lxv 4, -48(1)
; P9BE-AIX32-NEXT: sth 3, -32(1)
; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0
; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
; P9BE-AIX32-NEXT: lxv 3, -32(1)
; P9BE-AIX32-NEXT: vmrghh 4, 2, 4
; P9BE-AIX32-NEXT: lxv 0, 0(3)
Expand Down Expand Up @@ -437,9 +427,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX-NEXT: add 6, 3, 4
; P9BE-AIX-NEXT: li 7, 8
; P9BE-AIX-NEXT: lxsibzx 3, 3, 4
; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1
; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1
; P9BE-AIX-NEXT: lxsibzx 0, 6, 7
; P9BE-AIX-NEXT: ld 6, L..C6(2) # %const.0
; P9BE-AIX-NEXT: ld 6, L..C5(2) # %const.0
; P9BE-AIX-NEXT: lxv 1, 0(6)
; P9BE-AIX-NEXT: li 6, 0
; P9BE-AIX-NEXT: mtvsrwz 2, 6
Expand All @@ -464,9 +454,9 @@ define void @test8(ptr nocapture readonly %sums, i32 signext %delta, i32 signext
; P9BE-AIX32-NEXT: add 6, 3, 4
; P9BE-AIX32-NEXT: li 7, 8
; P9BE-AIX32-NEXT: lxsibzx 3, 3, 4
; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1
; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.1
; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7
; P9BE-AIX32-NEXT: lwz 6, L..C5(2) # %const.0
; P9BE-AIX32-NEXT: lwz 6, L..C4(2) # %const.0
; P9BE-AIX32-NEXT: lxv 1, 0(6)
; P9BE-AIX32-NEXT: li 6, 0
; P9BE-AIX32-NEXT: mtvsrwz 2, 6
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
; CHECK-NEXT: mv a0, s0
; CHECK-NEXT: mv a1, s1
; CHECK-NEXT: call __addsf3@plt
; CHECK-NEXT: andi a0, a0, -1
; CHECK-NEXT: slli a0, a0, 32
; CHECK-NEXT: slli s2, s2, 32
; CHECK-NEXT: srli a1, s2, 32
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/pr61561.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ define <vscale x 4 x i8> @foo(ptr %p) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1re16.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vsll.vi v8, v8, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vsll.vi v8, v10, 3
; CHECK-NEXT: li a0, 248
; CHECK-NEXT: vand.vx v8, v10, a0
; CHECK-NEXT: vand.vx v8, v8, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: lui a0, 1
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/RISCV/split-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64-LABEL: int32_int32_pair:
; RV64: # %bb.0:
; RV64-NEXT: andi a1, a1, -1
; RV64-NEXT: slli a1, a1, 32
; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: srli a0, a0, 32
Expand All @@ -138,6 +139,7 @@ define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
;
; RV64D-LABEL: int32_int32_pair:
; RV64D: # %bb.0:
; RV64D-NEXT: andi a1, a1, -1
; RV64D-NEXT: slli a1, a1, 32
; RV64D-NEXT: slli a0, a0, 32
; RV64D-NEXT: srli a0, a0, 32
Expand Down
Loading