-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SelectionDAG][RISCV] Preserve nneg flag when folding (trunc (zext X))->(zext X). #144807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…)->(zext X). If X is known non-negative, that's still true if we fold the truncate to create a smaller zext. In the 128 tests, SelectionDAGBuilder agressively truncates the zext nneg to i64 to match getShiftAmountTy. If we don't preserve the nneg we can't see that the shift amount argument be signext mean we don't need to do any extension.
@llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesIf X is known non-negative, that's still true if we fold the truncate In the i128 tests, SelectionDAGBuilder aggressively truncates the Full diff: https://github.com/llvm/llvm-project/pull/144807.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0e078f9dd88b4..a6b9cc81edde6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15740,8 +15740,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
N0.getOpcode() == ISD::SIGN_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND) {
// if the source is smaller than the dest, we still need an extend.
- if (N0.getOperand(0).getValueType().bitsLT(VT))
- return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
+ if (N0.getOperand(0).getValueType().bitsLT(VT)) {
+ SDNodeFlags Flags;
+ if (N0.getOpcode() == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N0->getFlags().hasNonNeg());
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
+ }
// if the source is larger than the dest, than we just need the truncate.
if (N0.getOperand(0).getValueType().bitsGT(VT))
return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b0e3f534e2aaa..5d8db8be9731f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6474,8 +6474,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
if (N1.getOperand(0).getValueType().getScalarType().bitsLT(
- VT.getScalarType()))
- return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ VT.getScalarType())) {
+ SDNodeFlags Flags;
+ if (OpOpcode == ISD::ZERO_EXTEND)
+ Flags.setNonNeg(N1->getFlags().hasNonNeg());
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
+ }
if (N1.getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
return N1.getOperand(0);
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 249dabba0cc28..32a037918a5a7 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -484,3 +484,298 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
%res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b)
ret i128 %res
}
+
+define i64 @lshr64_shamt32(i64 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: lshr64_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a4, a2, -32
+; RV32I-NEXT: srl a3, a1, a2
+; RV32I-NEXT: bltz a4, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: j .LBB11_3
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: slli a1, a1, 1
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: .LBB11_3:
+; RV32I-NEXT: srai a1, a4, 31
+; RV32I-NEXT: and a1, a1, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: lshr64_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srl a0, a0, a1
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i64
+ %1 = lshr i64 %a, %zext
+ ret i64 %1
+}
+
+define i64 @ashr64_shamt32(i64 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: ashr64_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: mv a3, a1
+; RV32I-NEXT: addi a4, a2, -32
+; RV32I-NEXT: sra a1, a1, a2
+; RV32I-NEXT: bltz a4, .LBB12_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srai a3, a3, 31
+; RV32I-NEXT: mv a0, a1
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB12_2:
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: slli a3, a3, 1
+; RV32I-NEXT: sll a2, a3, a2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ashr64_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sra a0, a0, a1
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i64
+ %1 = ashr i64 %a, %zext
+ ret i64 %1
+}
+
+define i64 @shl64_shamt32(i64 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: shl64_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi a4, a2, -32
+; RV32I-NEXT: sll a3, a0, a2
+; RV32I-NEXT: bltz a4, .LBB13_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: j .LBB13_3
+; RV32I-NEXT: .LBB13_2:
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: not a2, a2
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: srl a0, a0, a2
+; RV32I-NEXT: or a1, a1, a0
+; RV32I-NEXT: .LBB13_3:
+; RV32I-NEXT: srai a0, a4, 31
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: shl64_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: sll a0, a0, a1
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i64
+ %1 = shl i64 %a, %zext
+ ret i64 %1
+}
+
+define i128 @lshr128_shamt32(i128 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: lshr128_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sw zero, 16(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
+; RV32I-NEXT: srli a6, a2, 3
+; RV32I-NEXT: mv a7, sp
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: andi a6, a6, 12
+; RV32I-NEXT: xori t0, t0, 31
+; RV32I-NEXT: add a6, a7, a6
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: lw a1, 0(a6)
+; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: lw a4, 8(a6)
+; RV32I-NEXT: lw a5, 12(a6)
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slli a6, a3, 1
+; RV32I-NEXT: srl a3, a3, a2
+; RV32I-NEXT: slli a7, a4, 1
+; RV32I-NEXT: srl a4, a4, a2
+; RV32I-NEXT: srl a2, a5, a2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: sll a6, a6, t0
+; RV32I-NEXT: sll a7, a7, t0
+; RV32I-NEXT: sll a5, a5, t0
+; RV32I-NEXT: or a1, a1, a6
+; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: lshr128_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi a4, a2, -64
+; RV64I-NEXT: srl a3, a1, a2
+; RV64I-NEXT: bltz a4, .LBB14_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a0, a3
+; RV64I-NEXT: j .LBB14_3
+; RV64I-NEXT: .LBB14_2:
+; RV64I-NEXT: srl a0, a0, a2
+; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: sll a1, a1, a2
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: .LBB14_3:
+; RV64I-NEXT: srai a1, a4, 63
+; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i128
+ %1 = lshr i128 %a, %zext
+ ret i128 %1
+}
+
+define i128 @ashr128_shamt32(i128 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: ashr128_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: srli a6, a2, 3
+; RV32I-NEXT: mv a7, sp
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: andi a6, a6, 12
+; RV32I-NEXT: xori t0, t0, 31
+; RV32I-NEXT: add a6, a7, a6
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a1, 28(sp)
+; RV32I-NEXT: lw a1, 0(a6)
+; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: lw a4, 8(a6)
+; RV32I-NEXT: lw a5, 12(a6)
+; RV32I-NEXT: srl a1, a1, a2
+; RV32I-NEXT: slli a6, a3, 1
+; RV32I-NEXT: srl a3, a3, a2
+; RV32I-NEXT: slli a7, a4, 1
+; RV32I-NEXT: srl a4, a4, a2
+; RV32I-NEXT: sra a2, a5, a2
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: sll a6, a6, t0
+; RV32I-NEXT: sll a7, a7, t0
+; RV32I-NEXT: sll a5, a5, t0
+; RV32I-NEXT: or a1, a1, a6
+; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
+; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: ashr128_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: mv a3, a1
+; RV64I-NEXT: addi a4, a2, -64
+; RV64I-NEXT: sra a1, a1, a2
+; RV64I-NEXT: bltz a4, .LBB15_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: srai a3, a3, 63
+; RV64I-NEXT: mv a0, a1
+; RV64I-NEXT: mv a1, a3
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB15_2:
+; RV64I-NEXT: srl a0, a0, a2
+; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: slli a3, a3, 1
+; RV64I-NEXT: sll a2, a3, a2
+; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i128
+ %1 = ashr i128 %a, %zext
+ ret i128 %1
+}
+
+define i128 @shl128_shamt32(i128 %a, i32 signext %b) nounwind {
+; RV32I-LABEL: shl128_shamt32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
+; RV32I-NEXT: srli a6, a2, 3
+; RV32I-NEXT: addi a7, sp, 16
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: andi a6, a6, 12
+; RV32I-NEXT: sub a6, a7, a6
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a1, 28(sp)
+; RV32I-NEXT: lw a1, 0(a6)
+; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: lw a4, 8(a6)
+; RV32I-NEXT: lw a5, 12(a6)
+; RV32I-NEXT: xori a6, t0, 31
+; RV32I-NEXT: sll a7, a3, a2
+; RV32I-NEXT: srli t0, a1, 1
+; RV32I-NEXT: sll a5, a5, a2
+; RV32I-NEXT: sll a1, a1, a2
+; RV32I-NEXT: sll a2, a4, a2
+; RV32I-NEXT: srli a3, a3, 1
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: srl t0, t0, a6
+; RV32I-NEXT: srl a3, a3, a6
+; RV32I-NEXT: srl a4, a4, a6
+; RV32I-NEXT: or a6, a7, t0
+; RV32I-NEXT: or a2, a2, a3
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a6, 4(a0)
+; RV32I-NEXT: sw a2, 8(a0)
+; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: shl128_shamt32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi a4, a2, -64
+; RV64I-NEXT: sll a3, a0, a2
+; RV64I-NEXT: bltz a4, .LBB16_2
+; RV64I-NEXT: # %bb.1:
+; RV64I-NEXT: mv a1, a3
+; RV64I-NEXT: j .LBB16_3
+; RV64I-NEXT: .LBB16_2:
+; RV64I-NEXT: sll a1, a1, a2
+; RV64I-NEXT: not a2, a2
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: srl a0, a0, a2
+; RV64I-NEXT: or a1, a1, a0
+; RV64I-NEXT: .LBB16_3:
+; RV64I-NEXT: srai a0, a4, 63
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: ret
+ %zext = zext nneg i32 %b to i128
+ %1 = shl i128 %a, %zext
+ ret i128 %1
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/14825 Here is the relevant piece of the build log for the reference
|
If X is known non-negative, that's still true if we fold the truncate
to create a smaller zext.
In the i128 tests, SelectionDAGBuilder aggressively truncates the
zext nneg
to i64 to matchgetShiftAmountTy
. If we don't preservethe
nneg
we can't see that the shift amount argument beingsignext
means we don't need to do any extension