Skip to content

[DAGCombiner] Add basic support for trunc nsw/nuw #113808

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 35 additions & 25 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,8 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
Known = DAG.computeKnownBits(Op);
if (N->getFlags().hasNoUnsignedWrap())
Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
return true;
}

Expand Down Expand Up @@ -13889,23 +13891,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
unsigned OpBits = Op.getScalarValueSizeInBits();
unsigned MidBits = N0.getScalarValueSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);

if (OpBits == DestBits) {
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
// bits, it is already ready.
if (NumSignBits > DestBits-MidBits)
if (N0->getFlags().hasNoSignedWrap() ||
DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
if (OpBits == DestBits) {
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
// bits, it is already ready.
return Op;
} else if (OpBits < DestBits) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
if (NumSignBits > OpBits-MidBits)
}

if (OpBits < DestBits) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
} else {
}

// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits-MidBits)
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
SDNodeFlags Flags;
Flags.setNoSignedWrap(true);
Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
}

// fold (sext (truncate x)) -> (sextinreg x).
Expand Down Expand Up @@ -14176,24 +14182,28 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
unsigned OpBits = SrcVT.getScalarSizeInBits();
unsigned MidBits = MinVT.getScalarSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);

if (OpBits == DestBits) {
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
// bits, it is already ready.
if (NumSignBits > DestBits - MidBits)
if (N0->getFlags().hasNoSignedWrap() ||
DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
if (OpBits == DestBits) {
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
// bits, it is already ready.
return Op;
} else if (OpBits < DestBits) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
// FIXME: This can probably be ZERO_EXTEND nneg?
if (NumSignBits > OpBits - MidBits)
}

if (OpBits < DestBits) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
// FIXME: This can probably be ZERO_EXTEND nneg?
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
} else {
}

// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits - MidBits)
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
SDNodeFlags Flags;
Flags.setNoSignedWrap(true);
Flags.setNoUnsignedWrap(true);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
}
}

Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3826,7 +3826,13 @@ void SelectionDAGBuilder::visitTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
SDNodeFlags Flags;
if (auto *Trunc = dyn_cast<TruncInst>(&I)) {
Flags.setNoSignedWrap(Trunc->hasNoSignedWrap());
Flags.setNoUnsignedWrap(Trunc->hasNoUnsignedWrap());
}

setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N, Flags));
}

void SelectionDAGBuilder::visitZExt(const User &I) {
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2604,8 +2604,12 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
Depth + 1))
Depth + 1)) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
Op->dropFlags(SDNodeFlags::NoWrap);
return true;
}
Known = Known.trunc(BitWidth);

// Attempt to avoid multi-use ops if we don't need anything from them.
Expand Down
60 changes: 60 additions & 0 deletions llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s

define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
; CHECK-LABEL: trunc_nuw_nsw_urem:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #5977 // =0x1759
; CHECK-NEXT: mov w9, #10000 // =0x2710
; CHECK-NEXT: movk w8, #53687, lsl #16
; CHECK-NEXT: mul x8, x0, x8
; CHECK-NEXT: lsr x8, x8, #45
; CHECK-NEXT: msub w0, w8, w9, w0
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw nsw i64 %x to i32
%rem = urem i32 %trunc, 10000
ret i32 %rem
}

define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #52429 // =0xcccd
; CHECK-NEXT: mul w8, w0, w8
; CHECK-NEXT: lsr w0, w8, #23
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw i64 %x to i16
%div = udiv i16 %trunc, 160
%ext = zext nneg i16 %div to i64
ret i64 %ext
}

define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: sext_udiv_trunc_nuw:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w8, #52429 // =0xcccd
; CHECK-NEXT: mul w8, w0, w8
; CHECK-NEXT: lsr w0, w8, #23
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw i64 %x to i16
%div = udiv i16 %trunc, 160
%ext = sext i16 %div to i64
ret i64 %ext
}

define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add w8, w1, #5
; CHECK-NEXT: add x0, x0, w8, uxtw #2
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw nsw i64 %x to i32
%add = add nuw nsw i32 %trunc, 5
%offset = zext nneg i32 %add to i64
%gep = getelementptr nusw float, ptr %p, i64 %offset
ret ptr %gep
}
78 changes: 78 additions & 0 deletions llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s

define signext i8 @trunc_nsw_add(i32 signext %x) nounwind {
; CHECK-LABEL: trunc_nsw_add:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: ret
entry:
%add = add nsw i32 %x, 1
%trunc = trunc nsw i32 %add to i8
ret i8 %trunc
}

define signext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
; CHECK-LABEL: trunc_nuw_nsw_urem:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a1, 210
; CHECK-NEXT: addiw a1, a1, -1167
; CHECK-NEXT: slli a1, a1, 12
; CHECK-NEXT: addi a1, a1, 1881
; CHECK-NEXT: mul a1, a0, a1
; CHECK-NEXT: srli a1, a1, 45
; CHECK-NEXT: lui a2, 2
; CHECK-NEXT: addi a2, a2, 1808
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: subw a0, a0, a1
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw nsw i64 %x to i32
%rem = urem i32 %trunc, 10000
ret i32 %rem
}

define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a1, 13
; CHECK-NEXT: addi a1, a1, -819
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: srliw a0, a0, 23
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw i64 %x to i16
%div = udiv i16 %trunc, 160
%ext = zext nneg i16 %div to i64
ret i64 %ext
}

define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: sext_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a1, 13
; CHECK-NEXT: addi a1, a1, -819
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: srliw a0, a0, 23
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw i64 %x to i16
%div = udiv i16 %trunc, 160
%ext = sext i16 %div to i64
ret i64 %ext
}

define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: addi a0, a0, 20
; CHECK-NEXT: ret
entry:
%trunc = trunc nuw nsw i64 %x to i32
%add = add nuw nsw i32 %trunc, 5
%offset = zext nneg i32 %add to i64
%gep = getelementptr nusw float, ptr %p, i64 %offset
ret ptr %gep
}
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64 | FileCheck %s

define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
; CHECK-LABEL: trunc_nuw_nsw_urem:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movl $3518437209, %ecx # imm = 0xD1B71759
; CHECK-NEXT: imulq %rdi, %rcx
; CHECK-NEXT: shrq $45, %rcx
; CHECK-NEXT: imull $10000, %ecx, %ecx # imm = 0x2710
; CHECK-NEXT: subl %ecx, %eax
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
entry:
%trunc = trunc nuw nsw i64 %x to i32
%rem = urem i32 %trunc, 10000
ret i32 %rem
}

define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
; CHECK-NEXT: shrl $23, %eax
; CHECK-NEXT: retq
entry:
%trunc = trunc nuw i64 %x to i16
%div = udiv i16 %trunc, 160
%ext = zext nneg i16 %div to i64
ret i64 %ext
}

define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
; CHECK-LABEL: sext_udiv_trunc_nuw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
; CHECK-NEXT: shrl $23, %eax
; CHECK-NEXT: retq
entry:
%trunc = trunc nuw i64 %x to i16
%div = udiv i16 %trunc, 160
%ext = sext i16 %div to i64
ret i64 %ext
}

define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: leaq 20(%rdi,%rsi,4), %rax
; CHECK-NEXT: retq
entry:
%trunc = trunc nuw nsw i64 %x to i32
%add = add nuw nsw i32 %trunc, 5
%offset = zext nneg i32 %add to i64
%gep = getelementptr nusw float, ptr %p, i64 %offset
ret ptr %gep
}

; Make sure nsw flag is dropped after we simplify the operand of TRUNCATE.

define i32 @simplify_demanded_bits_drop_flag(i1 zeroext %x, i1 zeroext %y) nounwind {
; CHECK-LABEL: simplify_demanded_bits_drop_flag:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: negl %edi
; CHECK-NEXT: shll $2, %esi
; CHECK-NEXT: xorl %edi, %esi
; CHECK-NEXT: movslq %esi, %rax
; CHECK-NEXT: imulq $-1634202141, %rax, %rax # imm = 0x9E980DE3
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq $63, %rcx
; CHECK-NEXT: sarq $44, %rax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: retq
entry:
%sel = select i1 %y, i64 4, i64 0
%conv0 = sext i1 %x to i64
%xor = xor i64 %sel, %conv0
%conv1 = trunc nsw i64 %xor to i32
%div = sdiv i32 %conv1, -10765
ret i32 %div
}
Loading