-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAGCombiner] Add basic support for trunc nsw/nuw
#113808
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
trunc nsw/nuw
trunc nsw/nuw
trunc nsw/nuw
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: Yingwei Zheng (dtcxzyw) ChangesThis patch adds basic support for Full diff: https://github.com/llvm/llvm-project/pull/113808.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ad2d2ede302af8..0b249f2f7267bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2329,6 +2329,8 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
Known = DAG.computeKnownBits(Op);
+ if (N->getFlags().hasNoUnsignedWrap())
+ Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
return true;
}
@@ -13793,23 +13795,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
unsigned OpBits = Op.getScalarValueSizeInBits();
unsigned MidBits = N0.getScalarValueSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
- unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
- if (OpBits == DestBits) {
- // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
- // bits, it is already ready.
- if (NumSignBits > DestBits-MidBits)
+ if (N0->getFlags().hasNoSignedWrap() ||
+ DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
return Op;
- } else if (OpBits < DestBits) {
- // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
- // bits, just sext from i32.
- if (NumSignBits > OpBits-MidBits)
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
- } else {
- // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
- // bits, just truncate to i32.
- if (NumSignBits > OpBits-MidBits)
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
}
// fold (sext (truncate x)) -> (sextinreg x).
@@ -14083,24 +14084,23 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
unsigned OpBits = SrcVT.getScalarSizeInBits();
unsigned MidBits = MinVT.getScalarSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
- unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
- if (OpBits == DestBits) {
- // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
- // bits, it is already ready.
- if (NumSignBits > DestBits - MidBits)
+ if (N0->getFlags().hasNoSignedWrap() ||
+ DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
return Op;
- } else if (OpBits < DestBits) {
- // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
- // bits, just sext from i32.
- // FIXME: This can probably be ZERO_EXTEND nneg?
- if (NumSignBits > OpBits - MidBits)
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ // FIXME: This can probably be ZERO_EXTEND nneg?
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
- } else {
- // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
- // bits, just truncate to i32.
- if (NumSignBits > OpBits - MidBits)
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8450553743074c..e1e4db79627ef6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3823,7 +3823,13 @@ void SelectionDAGBuilder::visitTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
- setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
+ SDNodeFlags Flags;
+ if (auto *Trunc = dyn_cast<TruncInst>(&I)) {
+ Flags.setNoSignedWrap(Trunc->hasNoSignedWrap());
+ Flags.setNoUnsignedWrap(Trunc->hasNoUnsignedWrap());
+ }
+
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N, Flags));
}
void SelectionDAGBuilder::visitZExt(const User &I) {
diff --git a/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..6041db74639f32
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #5977 // =0x1759
+; CHECK-NEXT: mov w9, #10000 // =0x2710
+; CHECK-NEXT: movk w8, #53687, lsl #16
+; CHECK-NEXT: mul x8, x0, x8
+; CHECK-NEXT: lsr x8, x8, #45
+; CHECK-NEXT: msub w0, w8, w9, w0
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #52429 // =0xcccd
+; CHECK-NEXT: mul w8, w0, w8
+; CHECK-NEXT: lsr w0, w8, #23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #52429 // =0xcccd
+; CHECK-NEXT: mul w8, w0, w8
+; CHECK-NEXT: lsr w0, w8, #23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w1, #5
+; CHECK-NEXT: add x0, x0, w8, uxtw #2
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
diff --git a/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..f270775adcc155
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s
+
+define signext i8 @trunc_nsw_add(i32 signext %x) nounwind {
+; CHECK-LABEL: trunc_nsw_add:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addiw a0, a0, 1
+; CHECK-NEXT: ret
+entry:
+ %add = add nsw i32 %x, 1
+ %trunc = trunc nsw i32 %add to i8
+ ret i8 %trunc
+}
+
+define signext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 210
+; CHECK-NEXT: addiw a1, a1, -1167
+; CHECK-NEXT: slli a1, a1, 12
+; CHECK-NEXT: addi a1, a1, 1881
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: srli a1, a1, 45
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: addi a2, a2, 1808
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 13
+; CHECK-NEXT: addi a1, a1, -819
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: srliw a0, a0, 23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 13
+; CHECK-NEXT: addi a1, a1, -819
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: srliw a0, a0, 23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: addi a0, a0, 20
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
diff --git a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..40b48bec8fffd7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
+
+define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movl $3518437209, %ecx # imm = 0xD1B71759
+; CHECK-NEXT: imulq %rdi, %rcx
+; CHECK-NEXT: shrq $45, %rcx
+; CHECK-NEXT: imull $10000, %ecx, %ecx # imm = 0x2710
+; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
+; CHECK-NEXT: shrl $23, %eax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
+; CHECK-NEXT: shrl $23, %eax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: leaq 20(%rdi,%rsi,4), %rax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
|
@llvm/pr-subscribers-backend-x86 Author: Yingwei Zheng (dtcxzyw) ChangesThis patch adds basic support for Full diff: https://github.com/llvm/llvm-project/pull/113808.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ad2d2ede302af8..0b249f2f7267bd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2329,6 +2329,8 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
Known = DAG.computeKnownBits(Op);
+ if (N->getFlags().hasNoUnsignedWrap())
+ Known.Zero.setBitsFrom(N.getScalarValueSizeInBits());
return true;
}
@@ -13793,23 +13795,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
unsigned OpBits = Op.getScalarValueSizeInBits();
unsigned MidBits = N0.getScalarValueSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
- unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
- if (OpBits == DestBits) {
- // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
- // bits, it is already ready.
- if (NumSignBits > DestBits-MidBits)
+ if (N0->getFlags().hasNoSignedWrap() ||
+ DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
return Op;
- } else if (OpBits < DestBits) {
- // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
- // bits, just sext from i32.
- if (NumSignBits > OpBits-MidBits)
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
- } else {
- // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
- // bits, just truncate to i32.
- if (NumSignBits > OpBits-MidBits)
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
}
// fold (sext (truncate x)) -> (sextinreg x).
@@ -14083,24 +14084,23 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
unsigned OpBits = SrcVT.getScalarSizeInBits();
unsigned MidBits = MinVT.getScalarSizeInBits();
unsigned DestBits = VT.getScalarSizeInBits();
- unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
- if (OpBits == DestBits) {
- // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
- // bits, it is already ready.
- if (NumSignBits > DestBits - MidBits)
+ if (N0->getFlags().hasNoSignedWrap() ||
+ DAG.ComputeNumSignBits(Op) > OpBits - MidBits) {
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
return Op;
- } else if (OpBits < DestBits) {
- // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
- // bits, just sext from i32.
- // FIXME: This can probably be ZERO_EXTEND nneg?
- if (NumSignBits > OpBits - MidBits)
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ // FIXME: This can probably be ZERO_EXTEND nneg?
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
- } else {
- // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
- // bits, just truncate to i32.
- if (NumSignBits > OpBits - MidBits)
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+ }
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8450553743074c..e1e4db79627ef6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3823,7 +3823,13 @@ void SelectionDAGBuilder::visitTrunc(const User &I) {
SDValue N = getValue(I.getOperand(0));
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
- setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
+ SDNodeFlags Flags;
+ if (auto *Trunc = dyn_cast<TruncInst>(&I)) {
+ Flags.setNoSignedWrap(Trunc->hasNoSignedWrap());
+ Flags.setNoUnsignedWrap(Trunc->hasNoUnsignedWrap());
+ }
+
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N, Flags));
}
void SelectionDAGBuilder::visitZExt(const User &I) {
diff --git a/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..6041db74639f32
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/trunc-nsw-nuw.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #5977 // =0x1759
+; CHECK-NEXT: mov w9, #10000 // =0x2710
+; CHECK-NEXT: movk w8, #53687, lsl #16
+; CHECK-NEXT: mul x8, x0, x8
+; CHECK-NEXT: lsr x8, x8, #45
+; CHECK-NEXT: msub w0, w8, w9, w0
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #52429 // =0xcccd
+; CHECK-NEXT: mul w8, w0, w8
+; CHECK-NEXT: lsr w0, w8, #23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #52429 // =0xcccd
+; CHECK-NEXT: mul w8, w0, w8
+; CHECK-NEXT: lsr w0, w8, #23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add w8, w1, #5
+; CHECK-NEXT: add x0, x0, w8, uxtw #2
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
diff --git a/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..f270775adcc155
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/trunc-nsw-nuw.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s
+
+define signext i8 @trunc_nsw_add(i32 signext %x) nounwind {
+; CHECK-LABEL: trunc_nsw_add:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addiw a0, a0, 1
+; CHECK-NEXT: ret
+entry:
+ %add = add nsw i32 %x, 1
+ %trunc = trunc nsw i32 %add to i8
+ ret i8 %trunc
+}
+
+define signext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 210
+; CHECK-NEXT: addiw a1, a1, -1167
+; CHECK-NEXT: slli a1, a1, 12
+; CHECK-NEXT: addi a1, a1, 1881
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: srli a1, a1, 45
+; CHECK-NEXT: lui a2, 2
+; CHECK-NEXT: addi a2, a2, 1808
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 13
+; CHECK-NEXT: addi a1, a1, -819
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: srliw a0, a0, 23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 13
+; CHECK-NEXT: addi a1, a1, -819
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: srliw a0, a0, 23
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: addi a0, a0, 20
+; CHECK-NEXT: ret
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
diff --git a/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
new file mode 100644
index 00000000000000..40b48bec8fffd7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-nsw-nuw.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
+
+define zeroext i32 @trunc_nuw_nsw_urem(i64 %x) nounwind {
+; CHECK-LABEL: trunc_nuw_nsw_urem:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movl $3518437209, %ecx # imm = 0xD1B71759
+; CHECK-NEXT: imulq %rdi, %rcx
+; CHECK-NEXT: shrq $45, %rcx
+; CHECK-NEXT: imull $10000, %ecx, %ecx # imm = 0x2710
+; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %rem = urem i32 %trunc, 10000
+ ret i32 %rem
+}
+
+define i64 @zext_nneg_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: zext_nneg_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
+; CHECK-NEXT: shrl $23, %eax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = zext nneg i16 %div to i64
+ ret i64 %ext
+}
+
+define i64 @sext_udiv_trunc_nuw(i64 %x) nounwind {
+; CHECK-LABEL: sext_udiv_trunc_nuw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: imull $52429, %edi, %eax # imm = 0xCCCD
+; CHECK-NEXT: shrl $23, %eax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw i64 %x to i16
+ %div = udiv i16 %trunc, 160
+ %ext = sext i16 %div to i64
+ ret i64 %ext
+}
+
+define ptr @gep_nusw_zext_nneg_add_trunc_nuw_nsw(ptr %p, i64 %x) nounwind {
+; CHECK-LABEL: gep_nusw_zext_nneg_add_trunc_nuw_nsw:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: leaq 20(%rdi,%rsi,4), %rax
+; CHECK-NEXT: retq
+entry:
+ %trunc = trunc nuw nsw i64 %x to i32
+ %add = add nuw nsw i32 %trunc, 5
+ %offset = zext nneg i32 %add to i64
+ %gep = getelementptr nusw float, ptr %p, i64 %offset
+ ret ptr %gep
+}
|
// bits, it is already ready. | ||
if (NumSignBits > DestBits - MidBits) | ||
if (N0->getFlags().hasNoSignedWrap() || | ||
DAG.ComputeNumSignBits(Op) > OpBits - MidBits) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should probably just set nsw
if visitTrunc
if computeNumSignBits >= OldSize - NewSize
Likewise nuw
if high bits are zero.
Miscompilation:
Before:
After:
|
These flags should be cleared in |
This patch allows using enumeration values directly and simplifies the implementation with bitwise logic. It addresses the comment in #113808 (comment).
fced38f
to
9137580
Compare
// bits, it is already ready. | ||
if (NumSignBits > DestBits-MidBits) | ||
if (N0->getFlags().hasNoSignedWrap() || | ||
DAG.ComputeNumSignBits(Op) > OpBits - MidBits) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still think the way to do this is just set nsw
/nuw
in visitTRUNC
and drop the compute...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unlike in InstCombine, we don't infer poison generating flags in SDAG. Without recursive analysis we may miss some optimization opportunities.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess you're right. Is there a good reason for that?
This patch allows using enumeration values directly and simplifies the implementation with bitwise logic. It addresses the comment in llvm#113808 (comment).
This patch allows using enumeration values directly and simplifies the implementation with bitwise logic. It addresses the comment in llvm#113808 (comment).
Ping. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
c6cfcbf
to
4b2274a
Compare
This patch adds basic support for
trunc nsw/nuw
in SDAG. It will allow DAGCombiner to further eliminate in-regzext/sext
instructions.