-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAGCombiner][RISCV] Don't propagate the exact flag from udiv/sdiv to urem/srem. #145387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… urem/srem. If we simplify a udiv/sdiv using the exact flag we shouldn't propagate that simplifaction to any urem/srem that happens to use the same operands. If the exact flag is wrong, the udiv/sdiv will produce poison, but that doesn't mean we can make the urem/srem simplify to 0. Fixes llvm#145360.
@llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesIf we simplify a udiv/sdiv using the exact flag we shouldn't Fixes #145360. Full diff: https://github.com/llvm/llvm-project/pull/145387.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 56a5643e13442..74a31e7a1fe7a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4961,11 +4961,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// (Dividend - (Quotient * Divisor).
if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
{ N0, N1 })) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
- AddToWorklist(Mul.getNode());
- AddToWorklist(Sub.getNode());
- CombineTo(RemNode, Sub);
+ // If the udiv has the exact flag we shouldn't propagate it to the
+ // remainder node.
+ if (!N->getFlags().hasExact()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
}
return V;
}
@@ -5101,11 +5105,15 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
// (Dividend - (Quotient * Divisor).
if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
{ N0, N1 })) {
- SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
- AddToWorklist(Mul.getNode());
- AddToWorklist(Sub.getNode());
- CombineTo(RemNode, Sub);
+ // If the udiv has the exact flag we shouldn't propagate it to the
+ // remainder node.
+ if (!N->getFlags().hasExact()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ AddToWorklist(Sub.getNode());
+ CombineTo(RemNode, Sub);
+ }
}
return V;
}
diff --git a/llvm/test/CodeGen/RISCV/pr145360.ll b/llvm/test/CodeGen/RISCV/pr145360.ll
new file mode 100644
index 0000000000000..4251ac60c8bf6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr145360.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s
+
+define i32 @signed(i32 %0, ptr %1) {
+; CHECK-LABEL: signed:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sraiw a2, a0, 31
+; CHECK-NEXT: srliw a2, a2, 24
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: andi a2, a2, -256
+; CHECK-NEXT: subw a2, a0, a2
+; CHECK-NEXT: sraiw a0, a0, 8
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %rem = srem i32 %0, 256
+ store i32 %rem, ptr %1, align 4
+ %div = sdiv exact i32 %0, 256
+ ret i32 %div
+}
+
+define i32 @unsigned(i32 %0, ptr %1) {
+; CHECK-LABEL: unsigned:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a2, a0, 32
+; CHECK-NEXT: lui a3, 699051
+; CHECK-NEXT: addi a3, a3, -1365
+; CHECK-NEXT: slli a4, a3, 32
+; CHECK-NEXT: mulhu a2, a2, a4
+; CHECK-NEXT: srli a2, a2, 36
+; CHECK-NEXT: slli a4, a2, 5
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: subw a2, a2, a4
+; CHECK-NEXT: srliw a4, a0, 3
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: mulw a0, a4, a3
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %rem = urem i32 %0, 24
+ store i32 %rem, ptr %1, align 4
+ %div = udiv exact i32 %0, 24
+ ret i32 %div
+}
+
+define i32 @signed_div_first(i32 %0, ptr %1) {
+; CHECK-LABEL: signed_div_first:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sraiw a2, a0, 31
+; CHECK-NEXT: srliw a2, a2, 24
+; CHECK-NEXT: add a3, a0, a2
+; CHECK-NEXT: sraiw a2, a3, 8
+; CHECK-NEXT: andi a3, a3, -256
+; CHECK-NEXT: subw a0, a0, a3
+; CHECK-NEXT: sw a0, 0(a1)
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: ret
+ %div = sdiv exact i32 %0, 256
+ %rem = srem i32 %0, 256
+ store i32 %rem, ptr %1, align 4
+ ret i32 %div
+}
+
+define i32 @unsigned_div_first(i32 %0, ptr %1) {
+; CHECK-LABEL: unsigned_div_first:
+; CHECK: # %bb.0:
+; CHECK-NEXT: slli a2, a0, 32
+; CHECK-NEXT: lui a3, 699051
+; CHECK-NEXT: addi a3, a3, -1365
+; CHECK-NEXT: slli a3, a3, 32
+; CHECK-NEXT: mulhu a2, a2, a3
+; CHECK-NEXT: srli a2, a2, 36
+; CHECK-NEXT: slli a3, a2, 5
+; CHECK-NEXT: slli a4, a2, 3
+; CHECK-NEXT: subw a4, a4, a3
+; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: sw a0, 0(a1)
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: ret
+ %div = udiv exact i32 %0, 24
+ %rem = urem i32 %0, 24
+ store i32 %rem, ptr %1, align 4
+ ret i32 %div
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one minor
… urem/srem. (llvm#145387) If we simplify a udiv/sdiv using the exact flag we shouldn't propagate that simplifaction to any urem/srem that happens to use the same operands. If the exact flag is wrong, the udiv/sdiv will produce poison, but that doesn't mean we can make the urem/srem simplify to 0. Fixes llvm#145360.
… urem/srem. (llvm#145387) If we simplify a udiv/sdiv using the exact flag we shouldn't propagate that simplifaction to any urem/srem that happens to use the same operands. If the exact flag is wrong, the udiv/sdiv will produce poison, but that doesn't mean we can make the urem/srem simplify to 0. Fixes llvm#145360.
If we simplify a udiv/sdiv using the exact flag we shouldn't
propagate that simplifaction to any urem/srem that happens to
use the same operands. If the exact flag is wrong, the udiv/sdiv
will produce poison, but that doesn't mean we can make the urem/srem
simplify to 0.
Fixes #145360.