Skip to content

Commit 1bf4bbc

Browse files
committed
[LegalizeTypes][RISCV][WebAssembly] Expand ABS in PromoteIntRes_ABS if it will expand to sra+xor+sub later.
If we promote the ABS and then Expand in LegalizeDAG, then both the sra and the xor will have their inputs sign extended. This generates extra code on RISCV which lacks an i8 or i16 sign extend instructon. If we expand during type legalization, then only the sra will get its input sign extended. RISCV is able to combine this with the sra by doing a shift left followed by an sra. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D121664
1 parent ad94dfb commit 1bf4bbc

File tree

3 files changed

+78
-89
lines changed

3 files changed

+78
-89
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,6 +1432,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
14321432
}
14331433

14341434
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
1435+
EVT OVT = N->getValueType(0);
1436+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
1437+
1438+
// If a larger ABS or SMAX isn't supported by the target, try to expand now.
1439+
// If we expand later we'll end up sign extending more than just the sra input
1440+
// in sra+xor+sub expansion.
1441+
if (!OVT.isVector() &&
1442+
!TLI.isOperationLegalOrCustomOrPromote(ISD::ABS, NVT) &&
1443+
!TLI.isOperationLegal(ISD::SMAX, NVT)) {
1444+
if (SDValue Res = TLI.expandABS(N, DAG))
1445+
return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Res);
1446+
}
1447+
14351448
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
14361449
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
14371450
}

llvm/test/CodeGen/RISCV/iabs.ll

Lines changed: 64 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,13 @@ declare i32 @llvm.abs.i32(i32, i1 immarg)
1818
declare i64 @llvm.abs.i64(i64, i1 immarg)
1919
declare i128 @llvm.abs.i128(i128, i1 immarg)
2020

21-
; FIXME: Sign extending the input to the input to the xor isn't needed and
22-
; causes an extra srai.
2321
define i8 @abs8(i8 %x) {
2422
; RV32I-LABEL: abs8:
2523
; RV32I: # %bb.0:
26-
; RV32I-NEXT: slli a0, a0, 24
27-
; RV32I-NEXT: srai a1, a0, 24
28-
; RV32I-NEXT: srai a0, a0, 31
29-
; RV32I-NEXT: xor a1, a1, a0
30-
; RV32I-NEXT: sub a0, a1, a0
24+
; RV32I-NEXT: slli a1, a0, 24
25+
; RV32I-NEXT: srai a1, a1, 31
26+
; RV32I-NEXT: xor a0, a0, a1
27+
; RV32I-NEXT: sub a0, a0, a1
3128
; RV32I-NEXT: ret
3229
;
3330
; RV32ZBB-LABEL: abs8:
@@ -39,20 +36,18 @@ define i8 @abs8(i8 %x) {
3936
;
4037
; RV32ZBT-LABEL: abs8:
4138
; RV32ZBT: # %bb.0:
42-
; RV32ZBT-NEXT: slli a0, a0, 24
43-
; RV32ZBT-NEXT: srai a1, a0, 24
44-
; RV32ZBT-NEXT: srai a0, a0, 31
45-
; RV32ZBT-NEXT: xor a1, a1, a0
46-
; RV32ZBT-NEXT: sub a0, a1, a0
39+
; RV32ZBT-NEXT: slli a1, a0, 24
40+
; RV32ZBT-NEXT: srai a1, a1, 31
41+
; RV32ZBT-NEXT: xor a0, a0, a1
42+
; RV32ZBT-NEXT: sub a0, a0, a1
4743
; RV32ZBT-NEXT: ret
4844
;
4945
; RV64I-LABEL: abs8:
5046
; RV64I: # %bb.0:
51-
; RV64I-NEXT: slli a0, a0, 56
52-
; RV64I-NEXT: srai a1, a0, 56
53-
; RV64I-NEXT: srai a0, a0, 63
54-
; RV64I-NEXT: xor a1, a1, a0
55-
; RV64I-NEXT: sub a0, a1, a0
47+
; RV64I-NEXT: slli a1, a0, 56
48+
; RV64I-NEXT: srai a1, a1, 63
49+
; RV64I-NEXT: xor a0, a0, a1
50+
; RV64I-NEXT: sub a0, a0, a1
5651
; RV64I-NEXT: ret
5752
;
5853
; RV64ZBB-LABEL: abs8:
@@ -64,26 +59,22 @@ define i8 @abs8(i8 %x) {
6459
;
6560
; RV64ZBT-LABEL: abs8:
6661
; RV64ZBT: # %bb.0:
67-
; RV64ZBT-NEXT: slli a0, a0, 56
68-
; RV64ZBT-NEXT: srai a1, a0, 56
69-
; RV64ZBT-NEXT: srai a0, a0, 63
70-
; RV64ZBT-NEXT: xor a1, a1, a0
71-
; RV64ZBT-NEXT: sub a0, a1, a0
62+
; RV64ZBT-NEXT: slli a1, a0, 56
63+
; RV64ZBT-NEXT: srai a1, a1, 63
64+
; RV64ZBT-NEXT: xor a0, a0, a1
65+
; RV64ZBT-NEXT: sub a0, a0, a1
7266
; RV64ZBT-NEXT: ret
7367
%abs = tail call i8 @llvm.abs.i8(i8 %x, i1 true)
7468
ret i8 %abs
7569
}
7670

77-
; FIXME: Sign extending the input to the input to the xor isn't needed and
78-
; causes an extra srai.
7971
define i8 @select_abs8(i8 %x) {
8072
; RV32I-LABEL: select_abs8:
8173
; RV32I: # %bb.0:
82-
; RV32I-NEXT: slli a0, a0, 24
83-
; RV32I-NEXT: srai a1, a0, 24
84-
; RV32I-NEXT: srai a0, a0, 31
85-
; RV32I-NEXT: xor a1, a1, a0
86-
; RV32I-NEXT: sub a0, a1, a0
74+
; RV32I-NEXT: slli a1, a0, 24
75+
; RV32I-NEXT: srai a1, a1, 31
76+
; RV32I-NEXT: xor a0, a0, a1
77+
; RV32I-NEXT: sub a0, a0, a1
8778
; RV32I-NEXT: ret
8879
;
8980
; RV32ZBB-LABEL: select_abs8:
@@ -95,20 +86,18 @@ define i8 @select_abs8(i8 %x) {
9586
;
9687
; RV32ZBT-LABEL: select_abs8:
9788
; RV32ZBT: # %bb.0:
98-
; RV32ZBT-NEXT: slli a0, a0, 24
99-
; RV32ZBT-NEXT: srai a1, a0, 24
100-
; RV32ZBT-NEXT: srai a0, a0, 31
101-
; RV32ZBT-NEXT: xor a1, a1, a0
102-
; RV32ZBT-NEXT: sub a0, a1, a0
89+
; RV32ZBT-NEXT: slli a1, a0, 24
90+
; RV32ZBT-NEXT: srai a1, a1, 31
91+
; RV32ZBT-NEXT: xor a0, a0, a1
92+
; RV32ZBT-NEXT: sub a0, a0, a1
10393
; RV32ZBT-NEXT: ret
10494
;
10595
; RV64I-LABEL: select_abs8:
10696
; RV64I: # %bb.0:
107-
; RV64I-NEXT: slli a0, a0, 56
108-
; RV64I-NEXT: srai a1, a0, 56
109-
; RV64I-NEXT: srai a0, a0, 63
110-
; RV64I-NEXT: xor a1, a1, a0
111-
; RV64I-NEXT: sub a0, a1, a0
97+
; RV64I-NEXT: slli a1, a0, 56
98+
; RV64I-NEXT: srai a1, a1, 63
99+
; RV64I-NEXT: xor a0, a0, a1
100+
; RV64I-NEXT: sub a0, a0, a1
112101
; RV64I-NEXT: ret
113102
;
114103
; RV64ZBB-LABEL: select_abs8:
@@ -120,28 +109,24 @@ define i8 @select_abs8(i8 %x) {
120109
;
121110
; RV64ZBT-LABEL: select_abs8:
122111
; RV64ZBT: # %bb.0:
123-
; RV64ZBT-NEXT: slli a0, a0, 56
124-
; RV64ZBT-NEXT: srai a1, a0, 56
125-
; RV64ZBT-NEXT: srai a0, a0, 63
126-
; RV64ZBT-NEXT: xor a1, a1, a0
127-
; RV64ZBT-NEXT: sub a0, a1, a0
112+
; RV64ZBT-NEXT: slli a1, a0, 56
113+
; RV64ZBT-NEXT: srai a1, a1, 63
114+
; RV64ZBT-NEXT: xor a0, a0, a1
115+
; RV64ZBT-NEXT: sub a0, a0, a1
128116
; RV64ZBT-NEXT: ret
129117
%1 = icmp slt i8 %x, 0
130118
%2 = sub nsw i8 0, %x
131119
%3 = select i1 %1, i8 %2, i8 %x
132120
ret i8 %3
133121
}
134122

135-
; FIXME: Sign extending the input to the input to the xor isn't needed and
136-
; causes an extra srai.
137123
define i16 @abs16(i16 %x) {
138124
; RV32I-LABEL: abs16:
139125
; RV32I: # %bb.0:
140-
; RV32I-NEXT: slli a0, a0, 16
141-
; RV32I-NEXT: srai a1, a0, 16
142-
; RV32I-NEXT: srai a0, a0, 31
143-
; RV32I-NEXT: xor a1, a1, a0
144-
; RV32I-NEXT: sub a0, a1, a0
126+
; RV32I-NEXT: slli a1, a0, 16
127+
; RV32I-NEXT: srai a1, a1, 31
128+
; RV32I-NEXT: xor a0, a0, a1
129+
; RV32I-NEXT: sub a0, a0, a1
145130
; RV32I-NEXT: ret
146131
;
147132
; RV32ZBB-LABEL: abs16:
@@ -153,20 +138,18 @@ define i16 @abs16(i16 %x) {
153138
;
154139
; RV32ZBT-LABEL: abs16:
155140
; RV32ZBT: # %bb.0:
156-
; RV32ZBT-NEXT: slli a0, a0, 16
157-
; RV32ZBT-NEXT: srai a1, a0, 16
158-
; RV32ZBT-NEXT: srai a0, a0, 31
159-
; RV32ZBT-NEXT: xor a1, a1, a0
160-
; RV32ZBT-NEXT: sub a0, a1, a0
141+
; RV32ZBT-NEXT: slli a1, a0, 16
142+
; RV32ZBT-NEXT: srai a1, a1, 31
143+
; RV32ZBT-NEXT: xor a0, a0, a1
144+
; RV32ZBT-NEXT: sub a0, a0, a1
161145
; RV32ZBT-NEXT: ret
162146
;
163147
; RV64I-LABEL: abs16:
164148
; RV64I: # %bb.0:
165-
; RV64I-NEXT: slli a0, a0, 48
166-
; RV64I-NEXT: srai a1, a0, 48
167-
; RV64I-NEXT: srai a0, a0, 63
168-
; RV64I-NEXT: xor a1, a1, a0
169-
; RV64I-NEXT: sub a0, a1, a0
149+
; RV64I-NEXT: slli a1, a0, 48
150+
; RV64I-NEXT: srai a1, a1, 63
151+
; RV64I-NEXT: xor a0, a0, a1
152+
; RV64I-NEXT: sub a0, a0, a1
170153
; RV64I-NEXT: ret
171154
;
172155
; RV64ZBB-LABEL: abs16:
@@ -178,26 +161,22 @@ define i16 @abs16(i16 %x) {
178161
;
179162
; RV64ZBT-LABEL: abs16:
180163
; RV64ZBT: # %bb.0:
181-
; RV64ZBT-NEXT: slli a0, a0, 48
182-
; RV64ZBT-NEXT: srai a1, a0, 48
183-
; RV64ZBT-NEXT: srai a0, a0, 63
184-
; RV64ZBT-NEXT: xor a1, a1, a0
185-
; RV64ZBT-NEXT: sub a0, a1, a0
164+
; RV64ZBT-NEXT: slli a1, a0, 48
165+
; RV64ZBT-NEXT: srai a1, a1, 63
166+
; RV64ZBT-NEXT: xor a0, a0, a1
167+
; RV64ZBT-NEXT: sub a0, a0, a1
186168
; RV64ZBT-NEXT: ret
187169
%abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true)
188170
ret i16 %abs
189171
}
190172

191-
; FIXME: Sign extending the input to the input to the xor isn't needed and
192-
; causes an extra srai.
193173
define i16 @select_abs16(i16 %x) {
194174
; RV32I-LABEL: select_abs16:
195175
; RV32I: # %bb.0:
196-
; RV32I-NEXT: slli a0, a0, 16
197-
; RV32I-NEXT: srai a1, a0, 16
198-
; RV32I-NEXT: srai a0, a0, 31
199-
; RV32I-NEXT: xor a1, a1, a0
200-
; RV32I-NEXT: sub a0, a1, a0
176+
; RV32I-NEXT: slli a1, a0, 16
177+
; RV32I-NEXT: srai a1, a1, 31
178+
; RV32I-NEXT: xor a0, a0, a1
179+
; RV32I-NEXT: sub a0, a0, a1
201180
; RV32I-NEXT: ret
202181
;
203182
; RV32ZBB-LABEL: select_abs16:
@@ -209,20 +188,18 @@ define i16 @select_abs16(i16 %x) {
209188
;
210189
; RV32ZBT-LABEL: select_abs16:
211190
; RV32ZBT: # %bb.0:
212-
; RV32ZBT-NEXT: slli a0, a0, 16
213-
; RV32ZBT-NEXT: srai a1, a0, 16
214-
; RV32ZBT-NEXT: srai a0, a0, 31
215-
; RV32ZBT-NEXT: xor a1, a1, a0
216-
; RV32ZBT-NEXT: sub a0, a1, a0
191+
; RV32ZBT-NEXT: slli a1, a0, 16
192+
; RV32ZBT-NEXT: srai a1, a1, 31
193+
; RV32ZBT-NEXT: xor a0, a0, a1
194+
; RV32ZBT-NEXT: sub a0, a0, a1
217195
; RV32ZBT-NEXT: ret
218196
;
219197
; RV64I-LABEL: select_abs16:
220198
; RV64I: # %bb.0:
221-
; RV64I-NEXT: slli a0, a0, 48
222-
; RV64I-NEXT: srai a1, a0, 48
223-
; RV64I-NEXT: srai a0, a0, 63
224-
; RV64I-NEXT: xor a1, a1, a0
225-
; RV64I-NEXT: sub a0, a1, a0
199+
; RV64I-NEXT: slli a1, a0, 48
200+
; RV64I-NEXT: srai a1, a1, 63
201+
; RV64I-NEXT: xor a0, a0, a1
202+
; RV64I-NEXT: sub a0, a0, a1
226203
; RV64I-NEXT: ret
227204
;
228205
; RV64ZBB-LABEL: select_abs16:
@@ -234,11 +211,10 @@ define i16 @select_abs16(i16 %x) {
234211
;
235212
; RV64ZBT-LABEL: select_abs16:
236213
; RV64ZBT: # %bb.0:
237-
; RV64ZBT-NEXT: slli a0, a0, 48
238-
; RV64ZBT-NEXT: srai a1, a0, 48
239-
; RV64ZBT-NEXT: srai a0, a0, 63
240-
; RV64ZBT-NEXT: xor a1, a1, a0
241-
; RV64ZBT-NEXT: sub a0, a1, a0
214+
; RV64ZBT-NEXT: slli a1, a0, 48
215+
; RV64ZBT-NEXT: srai a1, a1, 63
216+
; RV64ZBT-NEXT: xor a0, a0, a1
217+
; RV64ZBT-NEXT: sub a0, a0, a1
242218
; RV64ZBT-NEXT: ret
243219
%1 = icmp slt i16 %x, 0
244220
%2 = sub nsw i16 0, %x

llvm/test/CodeGen/WebAssembly/PR41149.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define void @mod() {
1010
; CHECK-NEXT: i32.load8_s 0
1111
; CHECK-NEXT: local.tee 0
1212
; CHECK-NEXT: local.get 0
13-
; CHECK-NEXT: i32.const 31
13+
; CHECK-NEXT: i32.const 7
1414
; CHECK-NEXT: i32.shr_s
1515
; CHECK-NEXT: local.tee 0
1616
; CHECK-NEXT: i32.xor

0 commit comments

Comments
 (0)