Skip to content

Commit f668a08

Browse files
authored
[DAGCombiner][RISCV] Optimize (zext nneg (truncate X)) if X has known sign bits. (#82227)
This treats the zext nneg as sext if X is known to have sufficient sign bits to allow the zext or truncate or both to removed. This code is taken from the same optimization for sext.
1 parent 0ca74c3 commit f668a08

File tree

3 files changed

+34
-25
lines changed

3 files changed

+34
-25
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13758,6 +13758,32 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1375813758
EVT SrcVT = N0.getOperand(0).getValueType();
1375913759
EVT MinVT = N0.getValueType();
1376013760

13761+
if (N->getFlags().hasNonNeg()) {
13762+
SDValue Op = N0.getOperand(0);
13763+
unsigned OpBits = SrcVT.getScalarSizeInBits();
13764+
unsigned MidBits = MinVT.getScalarSizeInBits();
13765+
unsigned DestBits = VT.getScalarSizeInBits();
13766+
unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13767+
13768+
if (OpBits == DestBits) {
13769+
// Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13770+
// bits, it is already ready.
13771+
if (NumSignBits > DestBits - MidBits)
13772+
return Op;
13773+
} else if (OpBits < DestBits) {
13774+
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13775+
// bits, just sext from i32.
13776+
// FIXME: This can probably be ZERO_EXTEND nneg?
13777+
if (NumSignBits > OpBits - MidBits)
13778+
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13779+
} else {
13780+
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13781+
// bits, just truncate to i32.
13782+
if (NumSignBits > OpBits - MidBits)
13783+
return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13784+
}
13785+
}
13786+
1376113787
// Try to mask before the extension to avoid having to generate a larger mask,
1376213788
// possibly over several sub-vectors.
1376313789
if (SrcVT.bitsLT(VT) && VT.isVector()) {

llvm/test/CodeGen/RISCV/sext-zext-trunc.ll

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -756,31 +756,18 @@ define void @zext_nneg_dominating_icmp_i64(i16 signext %0) {
756756
; RV32I: # %bb.0:
757757
; RV32I-NEXT: bltz a0, .LBB46_2
758758
; RV32I-NEXT: # %bb.1:
759-
; RV32I-NEXT: slli a0, a0, 16
760-
; RV32I-NEXT: srli a0, a0, 16
761-
; RV32I-NEXT: li a1, 0
759+
; RV32I-NEXT: srai a1, a0, 31
762760
; RV32I-NEXT: tail bar_i64
763761
; RV32I-NEXT: .LBB46_2:
764762
; RV32I-NEXT: ret
765763
;
766-
; RV64I-LABEL: zext_nneg_dominating_icmp_i64:
767-
; RV64I: # %bb.0:
768-
; RV64I-NEXT: bltz a0, .LBB46_2
769-
; RV64I-NEXT: # %bb.1:
770-
; RV64I-NEXT: slli a0, a0, 48
771-
; RV64I-NEXT: srli a0, a0, 48
772-
; RV64I-NEXT: tail bar_i64
773-
; RV64I-NEXT: .LBB46_2:
774-
; RV64I-NEXT: ret
775-
;
776-
; RV64ZBB-LABEL: zext_nneg_dominating_icmp_i64:
777-
; RV64ZBB: # %bb.0:
778-
; RV64ZBB-NEXT: bltz a0, .LBB46_2
779-
; RV64ZBB-NEXT: # %bb.1:
780-
; RV64ZBB-NEXT: zext.h a0, a0
781-
; RV64ZBB-NEXT: tail bar_i64
782-
; RV64ZBB-NEXT: .LBB46_2:
783-
; RV64ZBB-NEXT: ret
764+
; RV64-LABEL: zext_nneg_dominating_icmp_i64:
765+
; RV64: # %bb.0:
766+
; RV64-NEXT: bltz a0, .LBB46_2
767+
; RV64-NEXT: # %bb.1:
768+
; RV64-NEXT: tail bar_i64
769+
; RV64-NEXT: .LBB46_2:
770+
; RV64-NEXT: ret
784771
%2 = icmp sgt i16 %0, -1
785772
br i1 %2, label %3, label %5
786773

@@ -800,8 +787,6 @@ define void @zext_nneg_dominating_icmp_i32(i16 signext %0) {
800787
; RV32I: # %bb.0:
801788
; RV32I-NEXT: bltz a0, .LBB47_2
802789
; RV32I-NEXT: # %bb.1:
803-
; RV32I-NEXT: slli a0, a0, 16
804-
; RV32I-NEXT: srli a0, a0, 16
805790
; RV32I-NEXT: tail bar_i32
806791
; RV32I-NEXT: .LBB47_2:
807792
; RV32I-NEXT: ret

llvm/test/CodeGen/VE/Scalar/ctlz.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ define signext i32 @func32s(i32 signext %p) {
3434
; CHECK-NEXT: and %s0, %s0, (32)0
3535
; CHECK-NEXT: ldz %s0, %s0
3636
; CHECK-NEXT: lea %s0, -32(, %s0)
37-
; CHECK-NEXT: and %s0, %s0, (32)0
3837
; CHECK-NEXT: b.l.t (, %s10)
3938
%r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true)
4039
ret i32 %r
@@ -202,7 +201,6 @@ define signext i32 @func32sx(i32 signext %p) {
202201
; CHECK-NEXT: and %s0, %s0, (32)0
203202
; CHECK-NEXT: ldz %s0, %s0
204203
; CHECK-NEXT: lea %s0, -32(, %s0)
205-
; CHECK-NEXT: and %s0, %s0, (32)0
206204
; CHECK-NEXT: b.l.t (, %s10)
207205
%r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 false)
208206
ret i32 %r

0 commit comments

Comments
 (0)