-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Use addiw for or_is_add when or input is sign extended. #128635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
We prefer to emit addi instead of ori because its more compressible, but this can pessimize the sext.w removal pass. If the input to the OR is known to be a sign extended 32 bit value, we can use addiw instead of addi which will give more power to the sext.w removal pass. As it is known to produce sign a sign extended value and only consume the lower 32 bits. Fixes llvm#128468.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesWe prefer to emit addi instead of ori because its more compressible, but this can pessimize the sext.w removal pass. If the input to the OR is known to be a sign extended 32 bit value, we can use addiw instead of addi which will give more power to the sext.w removal pass. As it is known to produce sign a sign extended value and only consume the lower 32 bits. Fixes #128468. Full diff: https://github.com/llvm/llvm-project/pull/128635.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index a962e64581797..c775561dd2d2d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -2033,6 +2033,12 @@ def : PatGprImm<binop_allwusers<and>, ANDI, u32simm12>;
def : PatGprImm<binop_allwusers<or>, ORI, u32simm12>;
def : PatGprImm<binop_allwusers<xor>, XORI, u32simm12>;
+// Select 'or' as ADDIW if the immediate bits are known to be 0 in $rs1 and
+// $rs1 is sign extended. This can improve compressibility. Using ADDIW gives
+// more power to RISCVOptWInstrs.
+def : Pat<(or_is_add 33signbits_node:$rs1, simm12:$imm),
+ (ADDIW $rs1, simm12:$imm)>;
+
/// Loads
def : LdPat<sextloadi32, LW, i64>;
diff --git a/llvm/test/CodeGen/RISCV/or-is-add.ll b/llvm/test/CodeGen/RISCV/or-is-add.ll
index 73561675b17ec..ab20312d2c4f6 100644
--- a/llvm/test/CodeGen/RISCV/or-is-add.ll
+++ b/llvm/test/CodeGen/RISCV/or-is-add.ll
@@ -11,8 +11,8 @@ define signext i32 @test1(i32 signext %x) {
;
; RV64-LABEL: test1:
; RV64: # %bb.0:
-; RV64-NEXT: slliw a0, a0, 1
-; RV64-NEXT: addi a0, a0, 1
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, 1
; RV64-NEXT: ret
%a = shl i32 %x, 1
%b = or i32 %a, 1
@@ -45,8 +45,8 @@ define signext i32 @test3(i32 signext %x) {
;
; RV64-LABEL: test3:
; RV64: # %bb.0:
-; RV64-NEXT: slliw a0, a0, 3
-; RV64-NEXT: addi a0, a0, 6
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: addiw a0, a0, 6
; RV64-NEXT: ret
%a = shl i32 %x, 3
%b = add i32 %a, 6
@@ -83,7 +83,7 @@ define signext i32 @test5(i32 signext %x) {
; RV64-LABEL: test5:
; RV64: # %bb.0:
; RV64-NEXT: srliw a0, a0, 24
-; RV64-NEXT: addi a0, a0, 256
+; RV64-NEXT: addiw a0, a0, 256
; RV64-NEXT: ret
%a = lshr i32 %x, 24
%b = xor i32 %a, 256
@@ -101,7 +101,7 @@ define i64 @test6(i64 %x) {
; RV64-LABEL: test6:
; RV64: # %bb.0:
; RV64-NEXT: srli a0, a0, 54
-; RV64-NEXT: addi a0, a0, 1024
+; RV64-NEXT: addiw a0, a0, 1024
; RV64-NEXT: ret
%a = lshr i64 %x, 54
%b = xor i64 %a, 1024
@@ -121,3 +121,105 @@ define signext i32 @test7(i32 signext %x) {
%a = or disjoint i32 %x, 1
ret i32 %a
}
+
+define void @pr128468(ptr %0, i32 signext %1, i32 signext %2) {
+; RV32-LABEL: pr128468:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a3, a1, 3
+; RV32-NEXT: add a3, a0, a3
+; RV32-NEXT: lw a2, 4(a3)
+; RV32-NEXT: bgez a2, .LBB7_6
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: addi a2, a2, 1
+; RV32-NEXT: beq a2, a1, .LBB7_6
+; RV32-NEXT: # %bb.2: # %.preheader
+; RV32-NEXT: addi a3, a3, 4
+; RV32-NEXT: j .LBB7_4
+; RV32-NEXT: .LBB7_3: # in Loop: Header=BB7_4 Depth=1
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: addi a3, a3, 4
+; RV32-NEXT: beq a1, a1, .LBB7_6
+; RV32-NEXT: .LBB7_4: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: lw a4, 0(a1)
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: sw a4, 0(a3)
+; RV32-NEXT: slli a3, a2, 3
+; RV32-NEXT: add a3, a0, a3
+; RV32-NEXT: lw a2, 4(a3)
+; RV32-NEXT: bgez a2, .LBB7_3
+; RV32-NEXT: # %bb.5: # in Loop: Header=BB7_4 Depth=1
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: addi a2, a2, 1
+; RV32-NEXT: addi a3, a3, 4
+; RV32-NEXT: bne a2, a1, .LBB7_4
+; RV32-NEXT: .LBB7_6:
+; RV32-NEXT: ret
+;
+; RV64-LABEL: pr128468:
+; RV64: # %bb.0:
+; RV64-NEXT: slliw a2, a1, 1
+; RV64-NEXT: slli a3, a2, 2
+; RV64-NEXT: add a3, a0, a3
+; RV64-NEXT: lw a4, 4(a3)
+; RV64-NEXT: bgez a4, .LBB7_6
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: addiw a2, a2, 1
+; RV64-NEXT: beq a2, a1, .LBB7_6
+; RV64-NEXT: # %bb.2: # %.preheader
+; RV64-NEXT: addi a3, a3, 4
+; RV64-NEXT: j .LBB7_4
+; RV64-NEXT: .LBB7_3: # in Loop: Header=BB7_4 Depth=1
+; RV64-NEXT: mv a2, a1
+; RV64-NEXT: addi a3, a3, 4
+; RV64-NEXT: beq a1, a1, .LBB7_6
+; RV64-NEXT: .LBB7_4: # =>This Inner Loop Header: Depth=1
+; RV64-NEXT: slli a1, a1, 2
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: lw a4, 0(a1)
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: slliw a2, a2, 1
+; RV64-NEXT: sw a4, 0(a3)
+; RV64-NEXT: slli a3, a2, 2
+; RV64-NEXT: add a3, a0, a3
+; RV64-NEXT: lw a4, 4(a3)
+; RV64-NEXT: bgez a4, .LBB7_3
+; RV64-NEXT: # %bb.5: # in Loop: Header=BB7_4 Depth=1
+; RV64-NEXT: addiw a2, a2, 1
+; RV64-NEXT: addi a3, a3, 4
+; RV64-NEXT: bne a2, a1, .LBB7_4
+; RV64-NEXT: .LBB7_6:
+; RV64-NEXT: ret
+ %4 = shl nsw i32 %1, 1
+ %5 = or disjoint i32 %4, 1
+ %6 = sext i32 %5 to i64
+ %7 = getelementptr inbounds i32, ptr %0, i64 %6
+ %8 = load i32, ptr %7, align 4
+ %9 = icmp sgt i32 %8, -1
+ %10 = icmp eq i32 %5, %1
+ %11 = or i1 %9, %10
+ br i1 %11, label %27, label %12
+
+12: ; preds = %3, %12
+ %13 = phi i32 [ %25, %12 ], [ %5, %3 ]
+ %14 = phi ptr [ %22, %12 ], [ %7, %3 ]
+ %15 = phi i32 [ %13, %12 ], [ %1, %3 ]
+ %16 = sext i32 %15 to i64
+ %17 = getelementptr inbounds i32, ptr %0, i64 %16
+ %18 = load i32, ptr %17, align 4
+ store i32 %18, ptr %14, align 4
+ %19 = shl nsw i32 %13, 1
+ %20 = or disjoint i32 %19, 1
+ %21 = sext i32 %20 to i64
+ %22 = getelementptr inbounds i32, ptr %0, i64 %21
+ %23 = load i32, ptr %22, align 4
+ %24 = icmp slt i32 %23, 0
+ %25 = select i1 %24, i32 %20, i32 %13
+ %26 = icmp eq i32 %25, %13
+ br i1 %26, label %27, label %12
+
+27: ; preds = %12, %3
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll
index 90a81c535cef2..4538572e90cac 100644
--- a/llvm/test/CodeGen/RISCV/select-const.ll
+++ b/llvm/test/CodeGen/RISCV/select-const.ll
@@ -114,7 +114,7 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind {
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: li a1, 32
; RV64ZICOND-NEXT: czero.nez a0, a1, a0
-; RV64ZICOND-NEXT: addi a0, a0, 6
+; RV64ZICOND-NEXT: addiw a0, a0, 6
; RV64ZICOND-NEXT: ret
%1 = select i1 %a, i32 6, i32 38
ret i32 %1
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index 4405cc3f5e163..303c4ac23b313 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -1884,15 +1884,22 @@ define i32 @select_cst_diff2(i1 zeroext %cond) {
; RV64IMXVTCONDOPS: # %bb.0:
; RV64IMXVTCONDOPS-NEXT: li a1, 2
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
-; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 120
+; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 120
; RV64IMXVTCONDOPS-NEXT: ret
;
-; CHECKZICOND-LABEL: select_cst_diff2:
-; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 2
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
-; CHECKZICOND-NEXT: addi a0, a0, 120
-; CHECKZICOND-NEXT: ret
+; RV32IMZICOND-LABEL: select_cst_diff2:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: li a1, 2
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 120
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst_diff2:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: li a1, 2
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addiw a0, a0, 120
+; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 120, i32 122
ret i32 %ret
}
@@ -2074,15 +2081,22 @@ define i32 @select_cst_diff8_invert(i1 zeroext %cond) {
; RV64IMXVTCONDOPS: # %bb.0:
; RV64IMXVTCONDOPS-NEXT: li a1, 8
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
-; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
+; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
; RV64IMXVTCONDOPS-NEXT: ret
;
-; CHECKZICOND-LABEL: select_cst_diff8_invert:
-; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 8
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
-; CHECKZICOND-NEXT: addi a0, a0, 6
-; CHECKZICOND-NEXT: ret
+; RV32IMZICOND-LABEL: select_cst_diff8_invert:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: li a1, 8
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 6
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst_diff8_invert:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: li a1, 8
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addiw a0, a0, 6
+; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 6, i32 14
ret i32 %ret
}
@@ -2151,15 +2165,22 @@ define i32 @select_cst_diff1024_invert(i1 zeroext %cond) {
; RV64IMXVTCONDOPS: # %bb.0:
; RV64IMXVTCONDOPS-NEXT: li a1, 1024
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
-; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
+; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
; RV64IMXVTCONDOPS-NEXT: ret
;
-; CHECKZICOND-LABEL: select_cst_diff1024_invert:
-; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 1024
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
-; CHECKZICOND-NEXT: addi a0, a0, 6
-; CHECKZICOND-NEXT: ret
+; RV32IMZICOND-LABEL: select_cst_diff1024_invert:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: li a1, 1024
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 6
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst_diff1024_invert:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: li a1, 1024
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addiw a0, a0, 6
+; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 6, i32 1030
ret i32 %ret
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
This feels a bit like fixing a symptom instead of the true cause. A worthwhile thing to do, but should we go further than this and extend the reasoning in the sext.w removal pas too?
Probably, but that's a big change to the pass. We'll need to implement the equivalent of computeKnownBits for every instruction inside of it. |
We prefer to emit addi instead of ori because its more compressible, but this can pessimize the sext.w removal pass.
If the input to the OR is known to be a sign extended 32 bit value, we can use addiw instead of addi which will give more power to the sext.w removal pass. As it is known to produce sign a sign extended value and only consume the lower 32 bits.
Fixes #128468.