Skip to content

Commit f74aed7

Browse files
authored
[DAGCombiner] Add basic support for trunc nsw/nuw (#113808)
This patch adds basic support for `trunc nsw/nuw` in SDAG. It will allow DAGCombiner to further eliminate in-reg `zext/sext` instructions.
1 parent 3aa2f63 commit f74aed7

File tree

6 files changed

+268
-27
lines changed

6 files changed

+268
-27
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2330,6 +2330,8 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
23302330
if (N->getOpcode() == ISD::TRUNCATE) {
23312331
Op = N->getOperand(0);
23322332
Known = DAG.computeKnownBits(Op);
2333+
if (N->getFlags().hasNoUnsignedWrap())
2334+
Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
23332335
return true;
23342336
}
23352337

@@ -13889,23 +13891,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
1388913891
unsigned OpBits = Op.getScalarValueSizeInBits();
1389013892
unsigned MidBits = N0.getScalarValueSizeInBits();
1389113893
unsigned DestBits = VT.getScalarSizeInBits();
13892-
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
1389313894

13894-
if (OpBits == DestBits) {
13895-
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13896-
// bits, it is already ready.
13897-
if (NumSignBits > DestBits-MidBits)
13895+
if (N0->getFlags().hasNoSignedWrap() ||
13896+
DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
13897+
if (OpBits == DestBits) {
13898+
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13899+
// bits, it is already ready.
1389813900
return Op;
13899-
} else if (OpBits < DestBits) {
13900-
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13901-
// bits, just sext from i32.
13902-
if (NumSignBits > OpBits-MidBits)
13901+
}
13902+
13903+
if (OpBits < DestBits) {
13904+
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13905+
// bits, just sext from i32.
1390313906
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13904-
} else {
13907+
}
13908+
1390513909
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
1390613910
// bits, just truncate to i32.
13907-
if (NumSignBits > OpBits-MidBits)
13908-
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13911+
SDNodeFlags Flags;
13912+
Flags.setNoSignedWrap(true);
13913+
Flags.setNoUnsignedWrap(N0->getFlags().hasNoUnsignedWrap());
13914+
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
1390913915
}
1391013916

1391113917
// fold (sext (truncate x)) -> (sextinreg x).
@@ -14176,24 +14182,28 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1417614182
unsigned OpBits = SrcVT.getScalarSizeInBits();
1417714183
unsigned MidBits = MinVT.getScalarSizeInBits();
1417814184
unsigned DestBits = VT.getScalarSizeInBits();
14179-
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
1418014185

14181-
if (OpBits == DestBits) {
14182-
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14183-
// bits, it is already ready.
14184-
if (NumSignBits > DestBits - MidBits)
14186+
if (N0->getFlags().hasNoSignedWrap() ||
14187+
DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
14188+
if (OpBits == DestBits) {
14189+
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
14190+
// bits, it is already ready.
1418514191
return Op;
14186-
} else if (OpBits < DestBits) {
14187-
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14188-
// bits, just sext from i32.
14189-
// FIXME: This can probably be ZERO_EXTEND nneg?
14190-
if (NumSignBits > OpBits - MidBits)
14192+
}
14193+
14194+
if (OpBits < DestBits) {
14195+
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
14196+
// bits, just sext from i32.
14197+
// FIXME: This can probably be ZERO_EXTEND nneg?
1419114198
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
14192-
} else {
14199+
}
14200+
1419314201
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
1419414202
// bits, just truncate to i32.
14195-
if (NumSignBits > OpBits - MidBits)
14196-
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
14203+
SDNodeFlags Flags;
14204+
Flags.setNoSignedWrap(true);
14205+
Flags.setNoUnsignedWrap(true);
14206+
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op, Flags);
1419714207
}
1419814208
}
1419914209

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3826,7 +3826,13 @@ void SelectionDAGBuilder::visitTrunc(const User &I) {
38263826
SDValue N = getValue(I.getOperand(0));
38273827
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
38283828
I.getType());
3829-
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3829+
SDNodeFlags Flags;
3830+
if (auto *Trunc = dyn_cast<TruncInst>(&I)) {
3831+
Flags.setNoSignedWrap(Trunc->hasNoSignedWrap());
3832+
Flags.setNoUnsignedWrap(Trunc->hasNoUnsignedWrap());
3833+
}
3834+
3835+
setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N, Flags));
38303836
}
38313837

38323838
void SelectionDAGBuilder::visitZExt(const User &I) {

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2604,8 +2604,12 @@ bool TargetLowering::SimplifyDemandedBits(
26042604
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
26052605
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
26062606
if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2607-
Depth + 1))
2607+
Depth + 1)) {
2608+
// Disable the nsw and nuw flags. We can no longer guarantee that we
2609+
// won't wrap after simplification.
2610+
Op->dropFlags(SDNodeFlags::NoWrap);
26082611
return true;
2612+
}
26092613
Known = Known.trunc(BitWidth);
26102614

26112615
// Attempt to avoid multi-use ops if we don't need anything from them.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3+
4+
define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
5+
; CHECK-LABEL: trunc_nuw_nsw_urem:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: mov w8, #5977 // =0x1759
8+
; CHECK-NEXT: mov w9, #10000 // =0x2710
9+
; CHECK-NEXT: movk w8, #53687, lsl #16
10+
; CHECK-NEXT: mul x8, x0, x8
11+
; CHECK-NEXT: lsr x8, x8, #45
12+
; CHECK-NEXT: msub w0, w8, w9, w0
13+
; CHECK-NEXT: ret
14+
entry:
15+
%trunc = trunc nuw nsw i64 %x to i32
16+
%rem = urem i32 %trunc, 10000
17+
ret i32 %rem
18+
}
19+
20+
define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
21+
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
22+
; CHECK: // %bb.0: // %entry
23+
; CHECK-NEXT: mov w8, #52429 // =0xcccd
24+
; CHECK-NEXT: mul w8, w0, w8
25+
; CHECK-NEXT: lsr w0, w8, #23
26+
; CHECK-NEXT: ret
27+
entry:
28+
%trunc = trunc nuw i64 %x to i16
29+
%div = udiv i16 %trunc, 160
30+
%ext = zext nneg i16 %div to i64
31+
ret i64 %ext
32+
}
33+
34+
define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
35+
; CHECK-LABEL: sext_udiv_trunc_nuw:
36+
; CHECK: // %bb.0: // %entry
37+
; CHECK-NEXT: mov w8, #52429 // =0xcccd
38+
; CHECK-NEXT: mul w8, w0, w8
39+
; CHECK-NEXT: lsr w0, w8, #23
40+
; CHECK-NEXT: ret
41+
entry:
42+
%trunc = trunc nuw i64 %x to i16
43+
%div = udiv i16 %trunc, 160
44+
%ext = sext i16 %div to i64
45+
ret i64 %ext
46+
}
47+
48+
define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
49+
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
50+
; CHECK: // %bb.0: // %entry
51+
; CHECK-NEXT: add w8, w1, #5
52+
; CHECK-NEXT: add x0, x0, w8, uxtw #2
53+
; CHECK-NEXT: ret
54+
entry:
55+
%trunc = trunc nuw nsw i64 %x to i32
56+
%add = add nuw nsw i32 %trunc, 5
57+
%offset = zext nneg i32 %add to i64
58+
%gep = getelementptr nusw float, ptr %p, i64 %offset
59+
ret ptr %gep
60+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s
3+
4+
define signext i8 @trunc_nsw_add(i32 signext %x) nounwind {
5+
; CHECK-LABEL: trunc_nsw_add:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: addiw a0, a0, 1
8+
; CHECK-NEXT: ret
9+
entry:
10+
%add = add nsw i32 %x, 1
11+
%trunc = trunc nsw i32 %add to i8
12+
ret i8 %trunc
13+
}
14+
15+
define signext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
16+
; CHECK-LABEL: trunc_nuw_nsw_urem:
17+
; CHECK: # %bb.0: # %entry
18+
; CHECK-NEXT: lui a1, 210
19+
; CHECK-NEXT: addiw a1, a1, -1167
20+
; CHECK-NEXT: slli a1, a1, 12
21+
; CHECK-NEXT: addi a1, a1, 1881
22+
; CHECK-NEXT: mul a1, a0, a1
23+
; CHECK-NEXT: srli a1, a1, 45
24+
; CHECK-NEXT: lui a2, 2
25+
; CHECK-NEXT: addi a2, a2, 1808
26+
; CHECK-NEXT: mul a1, a1, a2
27+
; CHECK-NEXT: subw a0, a0, a1
28+
; CHECK-NEXT: ret
29+
entry:
30+
%trunc = trunc nuw nsw i64 %x to i32
31+
%rem = urem i32 %trunc, 10000
32+
ret i32 %rem
33+
}
34+
35+
define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
36+
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
37+
; CHECK: # %bb.0: # %entry
38+
; CHECK-NEXT: lui a1, 13
39+
; CHECK-NEXT: addi a1, a1, -819
40+
; CHECK-NEXT: mul a0, a0, a1
41+
; CHECK-NEXT: srliw a0, a0, 23
42+
; CHECK-NEXT: ret
43+
entry:
44+
%trunc = trunc nuw i64 %x to i16
45+
%div = udiv i16 %trunc, 160
46+
%ext = zext nneg i16 %div to i64
47+
ret i64 %ext
48+
}
49+
50+
define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
51+
; CHECK-LABEL: sext_udiv_trunc_nuw:
52+
; CHECK: # %bb.0: # %entry
53+
; CHECK-NEXT: lui a1, 13
54+
; CHECK-NEXT: addi a1, a1, -819
55+
; CHECK-NEXT: mul a0, a0, a1
56+
; CHECK-NEXT: srliw a0, a0, 23
57+
; CHECK-NEXT: ret
58+
entry:
59+
%trunc = trunc nuw i64 %x to i16
60+
%div = udiv i16 %trunc, 160
61+
%ext = sext i16 %div to i64
62+
ret i64 %ext
63+
}
64+
65+
define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
66+
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
67+
; CHECK: # %bb.0: # %entry
68+
; CHECK-NEXT: slli a1, a1, 2
69+
; CHECK-NEXT: add a0, a1, a0
70+
; CHECK-NEXT: addi a0, a0, 20
71+
; CHECK-NEXT: ret
72+
entry:
73+
%trunc = trunc nuw nsw i64 %x to i32
74+
%add = add nuw nsw i32 %trunc, 5
75+
%offset = zext nneg i32 %add to i64
76+
%gep = getelementptr nusw float, ptr %p, i64 %offset
77+
ret ptr %gep
78+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
3+
4+
define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
5+
; CHECK-LABEL: trunc_nuw_nsw_urem:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: movq %rdi, %rax
8+
; CHECK-NEXT: movl $3518437209, %ecx # imm = 0xD1B71759
9+
; CHECK-NEXT: imulq %rdi, %rcx
10+
; CHECK-NEXT: shrq $45, %rcx
11+
; CHECK-NEXT: imull $10000, %ecx, %ecx # imm = 0x2710
12+
; CHECK-NEXT: subl %ecx, %eax
13+
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
14+
; CHECK-NEXT: retq
15+
entry:
16+
%trunc = trunc nuw nsw i64 %x to i32
17+
%rem = urem i32 %trunc, 10000
18+
ret i32 %rem
19+
}
20+
21+
define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
22+
; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
23+
; CHECK: # %bb.0: # %entry
24+
; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
25+
; CHECK-NEXT: shrl $23, %eax
26+
; CHECK-NEXT: retq
27+
entry:
28+
%trunc = trunc nuw i64 %x to i16
29+
%div = udiv i16 %trunc, 160
30+
%ext = zext nneg i16 %div to i64
31+
ret i64 %ext
32+
}
33+
34+
define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
35+
; CHECK-LABEL: sext_udiv_trunc_nuw:
36+
; CHECK: # %bb.0: # %entry
37+
; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
38+
; CHECK-NEXT: shrl $23, %eax
39+
; CHECK-NEXT: retq
40+
entry:
41+
%trunc = trunc nuw i64 %x to i16
42+
%div = udiv i16 %trunc, 160
43+
%ext = sext i16 %div to i64
44+
ret i64 %ext
45+
}
46+
47+
define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
48+
; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
49+
; CHECK: # %bb.0: # %entry
50+
; CHECK-NEXT: leaq 20(%rdi,%rsi,4), %rax
51+
; CHECK-NEXT: retq
52+
entry:
53+
%trunc = trunc nuw nsw i64 %x to i32
54+
%add = add nuw nsw i32 %trunc, 5
55+
%offset = zext nneg i32 %add to i64
56+
%gep = getelementptr nusw float, ptr %p, i64 %offset
57+
ret ptr %gep
58+
}
59+
60+
; Make sure nsw flag is dropped after we simplify the operand of TRUNCATE.
61+
62+
define i32 @simplify_demanded_bits_drop_flag(i1 zeroext %x, i1 zeroext %y) nounwind {
63+
; CHECK-LABEL: simplify_demanded_bits_drop_flag:
64+
; CHECK: # %bb.0: # %entry
65+
; CHECK-NEXT: negl %edi
66+
; CHECK-NEXT: shll $2, %esi
67+
; CHECK-NEXT: xorl %edi, %esi
68+
; CHECK-NEXT: movslq %esi, %rax
69+
; CHECK-NEXT: imulq $-1634202141, %rax, %rax # imm = 0x9E980DE3
70+
; CHECK-NEXT: movq %rax, %rcx
71+
; CHECK-NEXT: shrq $63, %rcx
72+
; CHECK-NEXT: sarq $44, %rax
73+
; CHECK-NEXT: addl %ecx, %eax
74+
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
75+
; CHECK-NEXT: retq
76+
entry:
77+
%sel = select i1 %y, i64 4, i64 0
78+
%conv0 = sext i1 %x to i64
79+
%xor = xor i64 %sel, %conv0
80+
%conv1 = trunc nsw i64 %xor to i32
81+
%div = sdiv i32 %conv1, -10765
82+
ret i32 %div
83+
}

0 commit comments

Comments
 (0)