Skip to content

Commit 7a8c04e

Browse files
committed
[DAG] Attempt shl narrowing in SimplifyDemandedBits
If a shl node leaves the upper half bits zero / undemanded, then see if we can profitably perform this with a half-width shl and a free trunc/zext. Followup to D146121 Differential Revision: https://reviews.llvm.org/D155472
1 parent b30765c commit 7a8c04e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3468
-3895
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,6 +1784,38 @@ bool TargetLowering::SimplifyDemandedBits(
17841784
}
17851785
}
17861786

1787+
// Narrow shift to lower half - similar to ShrinkDemandedOp.
1788+
// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1789+
unsigned HalfWidth = BitWidth / 2;
1790+
if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth) {
1791+
EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1792+
if (isNarrowingProfitable(VT, HalfVT) &&
1793+
isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1794+
isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1795+
(!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT))) {
1796+
// Unless we aren't demanding the upper bits at all, we must ensure
1797+
// that the upper bits of the shift result are known to be zero,
1798+
// which is equivalent to the narrow shift being NUW.
1799+
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, Depth + 1);
1800+
bool IsNUW = Known0.countMinLeadingZeros() >= (ShAmt + HalfWidth);
1801+
if (IsNUW || DemandedBits.countLeadingZeros() >= HalfWidth) {
1802+
unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0, Depth + 1);
1803+
bool IsNSW = NumSignBits > (ShAmt + HalfWidth);
1804+
SDNodeFlags Flags;
1805+
Flags.setNoSignedWrap(IsNSW);
1806+
Flags.setNoUnsignedWrap(IsNUW);
1807+
SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1808+
SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1809+
ShAmt, HalfVT, dl, TLO.LegalTypes());
1810+
SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1811+
NewShiftAmt, Flags);
1812+
SDValue NewExt =
1813+
TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1814+
return TLO.CombineTo(Op, NewExt);
1815+
}
1816+
}
1817+
}
1818+
17871819
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
17881820
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
17891821
Depth + 1))

llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %res
3232
; the base may be the RHS operand of the load in SDAG.
3333
; GCN-LABEL: name: test_complex_reg_offset
3434
; GCN-DAG: %[[BASE:.*]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @0 + 4,
35-
; GCN-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
35+
; SDAG-DAG: %[[OFFSET:.*]]:sreg_32 = nuw nsw S_LSHL_B32
36+
; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
3637
; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], killed %[[OFFSET]], 0, 0
3738
; GISEL: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 0, 0
3839
define amdgpu_ps void @test_complex_reg_offset(ptr addrspace(1) %out) {

0 commit comments

Comments
 (0)