Skip to content

Commit 4f18f3f

Browse files
authored
[RISCV] Use addiw for or_is_add when or input is sign extended. (#128635)
We prefer to emit addi instead of ori because its more compressible, but this can pessimize the sext.w removal pass. If the input to the OR is known to be a sign extended 32 bit value, we can use addiw instead of addi which will give more power to the sext.w removal pass. As it is known to produce sign a sign extended value and only consume the lower 32 bits. Fixes #128468.
1 parent 48db4e8 commit 4f18f3f

File tree

4 files changed

+157
-28
lines changed

4 files changed

+157
-28
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2033,6 +2033,12 @@ def : PatGprImm<binop_allwusers<and>, ANDI, u32simm12>;
20332033
def : PatGprImm<binop_allwusers<or>, ORI, u32simm12>;
20342034
def : PatGprImm<binop_allwusers<xor>, XORI, u32simm12>;
20352035

2036+
// Select 'or' as ADDIW if the immediate bits are known to be 0 in $rs1 and
2037+
// $rs1 is sign extended. This can improve compressibility. Using ADDIW gives
2038+
// more power to RISCVOptWInstrs.
2039+
def : Pat<(or_is_add 33signbits_node:$rs1, simm12:$imm),
2040+
(ADDIW $rs1, simm12:$imm)>;
2041+
20362042
/// Loads
20372043

20382044
def : LdPat<sextloadi32, LW, i64>;

llvm/test/CodeGen/RISCV/or-is-add.ll

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ define signext i32 @test1(i32 signext %x) {
1111
;
1212
; RV64-LABEL: test1:
1313
; RV64: # %bb.0:
14-
; RV64-NEXT: slliw a0, a0, 1
15-
; RV64-NEXT: addi a0, a0, 1
14+
; RV64-NEXT: slli a0, a0, 1
15+
; RV64-NEXT: addiw a0, a0, 1
1616
; RV64-NEXT: ret
1717
%a = shl i32 %x, 1
1818
%b = or i32 %a, 1
@@ -45,8 +45,8 @@ define signext i32 @test3(i32 signext %x) {
4545
;
4646
; RV64-LABEL: test3:
4747
; RV64: # %bb.0:
48-
; RV64-NEXT: slliw a0, a0, 3
49-
; RV64-NEXT: addi a0, a0, 6
48+
; RV64-NEXT: slli a0, a0, 3
49+
; RV64-NEXT: addiw a0, a0, 6
5050
; RV64-NEXT: ret
5151
%a = shl i32 %x, 3
5252
%b = add i32 %a, 6
@@ -83,7 +83,7 @@ define signext i32 @test5(i32 signext %x) {
8383
; RV64-LABEL: test5:
8484
; RV64: # %bb.0:
8585
; RV64-NEXT: srliw a0, a0, 24
86-
; RV64-NEXT: addi a0, a0, 256
86+
; RV64-NEXT: addiw a0, a0, 256
8787
; RV64-NEXT: ret
8888
%a = lshr i32 %x, 24
8989
%b = xor i32 %a, 256
@@ -101,7 +101,7 @@ define i64 @test6(i64 %x) {
101101
; RV64-LABEL: test6:
102102
; RV64: # %bb.0:
103103
; RV64-NEXT: srli a0, a0, 54
104-
; RV64-NEXT: addi a0, a0, 1024
104+
; RV64-NEXT: addiw a0, a0, 1024
105105
; RV64-NEXT: ret
106106
%a = lshr i64 %x, 54
107107
%b = xor i64 %a, 1024
@@ -121,3 +121,105 @@ define signext i32 @test7(i32 signext %x) {
121121
%a = or disjoint i32 %x, 1
122122
ret i32 %a
123123
}
124+
125+
define void @pr128468(ptr %0, i32 signext %1, i32 signext %2) {
126+
; RV32-LABEL: pr128468:
127+
; RV32: # %bb.0:
128+
; RV32-NEXT: slli a3, a1, 3
129+
; RV32-NEXT: add a3, a0, a3
130+
; RV32-NEXT: lw a2, 4(a3)
131+
; RV32-NEXT: bgez a2, .LBB7_6
132+
; RV32-NEXT: # %bb.1:
133+
; RV32-NEXT: slli a2, a1, 1
134+
; RV32-NEXT: addi a2, a2, 1
135+
; RV32-NEXT: beq a2, a1, .LBB7_6
136+
; RV32-NEXT: # %bb.2: # %.preheader
137+
; RV32-NEXT: addi a3, a3, 4
138+
; RV32-NEXT: j .LBB7_4
139+
; RV32-NEXT: .LBB7_3: # in Loop: Header=BB7_4 Depth=1
140+
; RV32-NEXT: mv a2, a1
141+
; RV32-NEXT: addi a3, a3, 4
142+
; RV32-NEXT: beq a1, a1, .LBB7_6
143+
; RV32-NEXT: .LBB7_4: # =>This Inner Loop Header: Depth=1
144+
; RV32-NEXT: slli a1, a1, 2
145+
; RV32-NEXT: add a1, a0, a1
146+
; RV32-NEXT: lw a4, 0(a1)
147+
; RV32-NEXT: mv a1, a2
148+
; RV32-NEXT: sw a4, 0(a3)
149+
; RV32-NEXT: slli a3, a2, 3
150+
; RV32-NEXT: add a3, a0, a3
151+
; RV32-NEXT: lw a2, 4(a3)
152+
; RV32-NEXT: bgez a2, .LBB7_3
153+
; RV32-NEXT: # %bb.5: # in Loop: Header=BB7_4 Depth=1
154+
; RV32-NEXT: slli a2, a1, 1
155+
; RV32-NEXT: addi a2, a2, 1
156+
; RV32-NEXT: addi a3, a3, 4
157+
; RV32-NEXT: bne a2, a1, .LBB7_4
158+
; RV32-NEXT: .LBB7_6:
159+
; RV32-NEXT: ret
160+
;
161+
; RV64-LABEL: pr128468:
162+
; RV64: # %bb.0:
163+
; RV64-NEXT: slliw a2, a1, 1
164+
; RV64-NEXT: slli a3, a2, 2
165+
; RV64-NEXT: add a3, a0, a3
166+
; RV64-NEXT: lw a4, 4(a3)
167+
; RV64-NEXT: bgez a4, .LBB7_6
168+
; RV64-NEXT: # %bb.1:
169+
; RV64-NEXT: addiw a2, a2, 1
170+
; RV64-NEXT: beq a2, a1, .LBB7_6
171+
; RV64-NEXT: # %bb.2: # %.preheader
172+
; RV64-NEXT: addi a3, a3, 4
173+
; RV64-NEXT: j .LBB7_4
174+
; RV64-NEXT: .LBB7_3: # in Loop: Header=BB7_4 Depth=1
175+
; RV64-NEXT: mv a2, a1
176+
; RV64-NEXT: addi a3, a3, 4
177+
; RV64-NEXT: beq a1, a1, .LBB7_6
178+
; RV64-NEXT: .LBB7_4: # =>This Inner Loop Header: Depth=1
179+
; RV64-NEXT: slli a1, a1, 2
180+
; RV64-NEXT: add a1, a0, a1
181+
; RV64-NEXT: lw a4, 0(a1)
182+
; RV64-NEXT: mv a1, a2
183+
; RV64-NEXT: slliw a2, a2, 1
184+
; RV64-NEXT: sw a4, 0(a3)
185+
; RV64-NEXT: slli a3, a2, 2
186+
; RV64-NEXT: add a3, a0, a3
187+
; RV64-NEXT: lw a4, 4(a3)
188+
; RV64-NEXT: bgez a4, .LBB7_3
189+
; RV64-NEXT: # %bb.5: # in Loop: Header=BB7_4 Depth=1
190+
; RV64-NEXT: addiw a2, a2, 1
191+
; RV64-NEXT: addi a3, a3, 4
192+
; RV64-NEXT: bne a2, a1, .LBB7_4
193+
; RV64-NEXT: .LBB7_6:
194+
; RV64-NEXT: ret
195+
%4 = shl nsw i32 %1, 1
196+
%5 = or disjoint i32 %4, 1
197+
%6 = sext i32 %5 to i64
198+
%7 = getelementptr inbounds i32, ptr %0, i64 %6
199+
%8 = load i32, ptr %7, align 4
200+
%9 = icmp sgt i32 %8, -1
201+
%10 = icmp eq i32 %5, %1
202+
%11 = or i1 %9, %10
203+
br i1 %11, label %27, label %12
204+
205+
12: ; preds = %3, %12
206+
%13 = phi i32 [ %25, %12 ], [ %5, %3 ]
207+
%14 = phi ptr [ %22, %12 ], [ %7, %3 ]
208+
%15 = phi i32 [ %13, %12 ], [ %1, %3 ]
209+
%16 = sext i32 %15 to i64
210+
%17 = getelementptr inbounds i32, ptr %0, i64 %16
211+
%18 = load i32, ptr %17, align 4
212+
store i32 %18, ptr %14, align 4
213+
%19 = shl nsw i32 %13, 1
214+
%20 = or disjoint i32 %19, 1
215+
%21 = sext i32 %20 to i64
216+
%22 = getelementptr inbounds i32, ptr %0, i64 %21
217+
%23 = load i32, ptr %22, align 4
218+
%24 = icmp slt i32 %23, 0
219+
%25 = select i1 %24, i32 %20, i32 %13
220+
%26 = icmp eq i32 %25, %13
221+
br i1 %26, label %27, label %12
222+
223+
27: ; preds = %12, %3
224+
ret void
225+
}

llvm/test/CodeGen/RISCV/select-const.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind {
114114
; RV64ZICOND: # %bb.0:
115115
; RV64ZICOND-NEXT: li a1, 32
116116
; RV64ZICOND-NEXT: czero.nez a0, a1, a0
117-
; RV64ZICOND-NEXT: addi a0, a0, 6
117+
; RV64ZICOND-NEXT: addiw a0, a0, 6
118118
; RV64ZICOND-NEXT: ret
119119
%1 = select i1 %a, i32 6, i32 38
120120
ret i32 %1

llvm/test/CodeGen/RISCV/select.ll

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,15 +1884,22 @@ define i32 @select_cst_diff2(i1 zeroext %cond) {
18841884
; RV64IMXVTCONDOPS: # %bb.0:
18851885
; RV64IMXVTCONDOPS-NEXT: li a1, 2
18861886
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
1887-
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 120
1887+
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 120
18881888
; RV64IMXVTCONDOPS-NEXT: ret
18891889
;
1890-
; CHECKZICOND-LABEL: select_cst_diff2:
1891-
; CHECKZICOND: # %bb.0:
1892-
; CHECKZICOND-NEXT: li a1, 2
1893-
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
1894-
; CHECKZICOND-NEXT: addi a0, a0, 120
1895-
; CHECKZICOND-NEXT: ret
1890+
; RV32IMZICOND-LABEL: select_cst_diff2:
1891+
; RV32IMZICOND: # %bb.0:
1892+
; RV32IMZICOND-NEXT: li a1, 2
1893+
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
1894+
; RV32IMZICOND-NEXT: addi a0, a0, 120
1895+
; RV32IMZICOND-NEXT: ret
1896+
;
1897+
; RV64IMZICOND-LABEL: select_cst_diff2:
1898+
; RV64IMZICOND: # %bb.0:
1899+
; RV64IMZICOND-NEXT: li a1, 2
1900+
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
1901+
; RV64IMZICOND-NEXT: addiw a0, a0, 120
1902+
; RV64IMZICOND-NEXT: ret
18961903
%ret = select i1 %cond, i32 120, i32 122
18971904
ret i32 %ret
18981905
}
@@ -2074,15 +2081,22 @@ define i32 @select_cst_diff8_invert(i1 zeroext %cond) {
20742081
; RV64IMXVTCONDOPS: # %bb.0:
20752082
; RV64IMXVTCONDOPS-NEXT: li a1, 8
20762083
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
2077-
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
2084+
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
20782085
; RV64IMXVTCONDOPS-NEXT: ret
20792086
;
2080-
; CHECKZICOND-LABEL: select_cst_diff8_invert:
2081-
; CHECKZICOND: # %bb.0:
2082-
; CHECKZICOND-NEXT: li a1, 8
2083-
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
2084-
; CHECKZICOND-NEXT: addi a0, a0, 6
2085-
; CHECKZICOND-NEXT: ret
2087+
; RV32IMZICOND-LABEL: select_cst_diff8_invert:
2088+
; RV32IMZICOND: # %bb.0:
2089+
; RV32IMZICOND-NEXT: li a1, 8
2090+
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
2091+
; RV32IMZICOND-NEXT: addi a0, a0, 6
2092+
; RV32IMZICOND-NEXT: ret
2093+
;
2094+
; RV64IMZICOND-LABEL: select_cst_diff8_invert:
2095+
; RV64IMZICOND: # %bb.0:
2096+
; RV64IMZICOND-NEXT: li a1, 8
2097+
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
2098+
; RV64IMZICOND-NEXT: addiw a0, a0, 6
2099+
; RV64IMZICOND-NEXT: ret
20862100
%ret = select i1 %cond, i32 6, i32 14
20872101
ret i32 %ret
20882102
}
@@ -2151,15 +2165,22 @@ define i32 @select_cst_diff1024_invert(i1 zeroext %cond) {
21512165
; RV64IMXVTCONDOPS: # %bb.0:
21522166
; RV64IMXVTCONDOPS-NEXT: li a1, 1024
21532167
; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
2154-
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
2168+
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
21552169
; RV64IMXVTCONDOPS-NEXT: ret
21562170
;
2157-
; CHECKZICOND-LABEL: select_cst_diff1024_invert:
2158-
; CHECKZICOND: # %bb.0:
2159-
; CHECKZICOND-NEXT: li a1, 1024
2160-
; CHECKZICOND-NEXT: czero.nez a0, a1, a0
2161-
; CHECKZICOND-NEXT: addi a0, a0, 6
2162-
; CHECKZICOND-NEXT: ret
2171+
; RV32IMZICOND-LABEL: select_cst_diff1024_invert:
2172+
; RV32IMZICOND: # %bb.0:
2173+
; RV32IMZICOND-NEXT: li a1, 1024
2174+
; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
2175+
; RV32IMZICOND-NEXT: addi a0, a0, 6
2176+
; RV32IMZICOND-NEXT: ret
2177+
;
2178+
; RV64IMZICOND-LABEL: select_cst_diff1024_invert:
2179+
; RV64IMZICOND: # %bb.0:
2180+
; RV64IMZICOND-NEXT: li a1, 1024
2181+
; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
2182+
; RV64IMZICOND-NEXT: addiw a0, a0, 6
2183+
; RV64IMZICOND-NEXT: ret
21632184
%ret = select i1 %cond, i32 6, i32 1030
21642185
ret i32 %ret
21652186
}

0 commit comments

Comments
 (0)