Skip to content

Commit 3885879

Browse files
committed
[DAGCombine] Add simple folds for SSHLSAT/USHLSAT
Do "simplifyShift" and "FoldConstantArithmetic" folds for the SSHLSAT and USHLSAT DAG nodes. This includes folds such as: (shlsat undef/poison, x) -> 0 (shlsat x, undef/poison) -> undef (shlsat x, too_large_shamt) -> undef (shlsat 0, x) -> 0 (shlsat x, 0) -> x (shlsat c1, c2) -> c3 Differential Revision: https://reviews.llvm.org/D118603
1 parent 06105f2 commit 3885879

File tree

4 files changed

+38
-140
lines changed

4 files changed

+38
-140
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ namespace {
440440
SDValue visitSRA(SDNode *N);
441441
SDValue visitSRL(SDNode *N);
442442
SDValue visitFunnelShift(SDNode *N);
443+
SDValue visitSHLSAT(SDNode *N);
443444
SDValue visitRotate(SDNode *N);
444445
SDValue visitABS(SDNode *N);
445446
SDValue visitBSWAP(SDNode *N);
@@ -1652,6 +1653,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
16521653
case ISD::ROTL: return visitRotate(N);
16531654
case ISD::FSHL:
16541655
case ISD::FSHR: return visitFunnelShift(N);
1656+
case ISD::SSHLSAT:
1657+
case ISD::USHLSAT: return visitSHLSAT(N);
16551658
case ISD::ABS: return visitABS(N);
16561659
case ISD::BSWAP: return visitBSWAP(N);
16571660
case ISD::BITREVERSE: return visitBITREVERSE(N);
@@ -9346,6 +9349,22 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
93469349
return SDValue();
93479350
}
93489351

9352+
SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9353+
SDValue N0 = N->getOperand(0);
9354+
SDValue N1 = N->getOperand(1);
9355+
if (SDValue V = DAG.simplifyShift(N0, N1))
9356+
return V;
9357+
9358+
EVT VT = N0.getValueType();
9359+
9360+
// fold (*shlsat c1, c2) -> c1<<c2
9361+
if (SDValue C =
9362+
DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9363+
return C;
9364+
9365+
return SDValue();
9366+
}
9367+
93499368
// Given a ABS node, detect the following pattern:
93509369
// (ABS (SUB (EXTEND a), (EXTEND b))).
93519370
// Generates UABD/SABD instruction.

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5242,6 +5242,8 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
52425242
case ISD::UADDSAT: return C1.uadd_sat(C2);
52435243
case ISD::SSUBSAT: return C1.ssub_sat(C2);
52445244
case ISD::USUBSAT: return C1.usub_sat(C2);
5245+
case ISD::SSHLSAT: return C1.sshl_sat(C2);
5246+
case ISD::USHLSAT: return C1.ushl_sat(C2);
52455247
case ISD::UDIV:
52465248
if (!C2.getBoolValue())
52475249
break;

llvm/test/CodeGen/AArch64/sshl_sat.ll

Lines changed: 10 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,7 @@ declare <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16>, <4 x i16>)
88
define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind {
99
; CHECK-LABEL: combine_shl_undef:
1010
; CHECK: // %bb.0:
11-
; CHECK-NEXT: mov w8, wzr
12-
; CHECK-NEXT: mov w9, #-2147483648
13-
; CHECK-NEXT: cmp w8, #0
14-
; CHECK-NEXT: cinv w9, w9, ge
15-
; CHECK-NEXT: csel w8, w9, w8, ne
16-
; CHECK-NEXT: asr w0, w8, #16
11+
; CHECK-NEXT: mov w0, wzr
1712
; CHECK-NEXT: ret
1813
%tmp = call i16 @llvm.sshl.sat.i16(i16 undef, i16 %y)
1914
ret i16 %tmp
@@ -23,13 +18,6 @@ define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind {
2318
define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind {
2419
; CHECK-LABEL: combine_shl_by_undef:
2520
; CHECK: // %bb.0:
26-
; CHECK-NEXT: lsl w9, w0, #16
27-
; CHECK-NEXT: mov w8, #-2147483648
28-
; CHECK-NEXT: cmp w9, #0
29-
; CHECK-NEXT: cinv w8, w8, ge
30-
; CHECK-NEXT: cmp w9, w9
31-
; CHECK-NEXT: csel w8, w8, w9, ne
32-
; CHECK-NEXT: asr w0, w8, #16
3321
; CHECK-NEXT: ret
3422
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 undef)
3523
ret i16 %tmp
@@ -39,12 +27,7 @@ define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind {
3927
define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind {
4028
; CHECK-LABEL: combine_shl_poison:
4129
; CHECK: // %bb.0:
42-
; CHECK-NEXT: mov w8, wzr
43-
; CHECK-NEXT: mov w9, #-2147483648
44-
; CHECK-NEXT: cmp w8, #0
45-
; CHECK-NEXT: cinv w9, w9, ge
46-
; CHECK-NEXT: csel w8, w9, w8, ne
47-
; CHECK-NEXT: asr w0, w8, #16
30+
; CHECK-NEXT: mov w0, wzr
4831
; CHECK-NEXT: ret
4932
%tmp = call i16 @llvm.sshl.sat.i16(i16 poison, i16 %y)
5033
ret i16 %tmp
@@ -54,13 +37,6 @@ define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind {
5437
define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind {
5538
; CHECK-LABEL: combine_shl_by_poison:
5639
; CHECK: // %bb.0:
57-
; CHECK-NEXT: lsl w9, w0, #16
58-
; CHECK-NEXT: mov w8, #-2147483648
59-
; CHECK-NEXT: cmp w9, #0
60-
; CHECK-NEXT: cinv w8, w8, ge
61-
; CHECK-NEXT: cmp w9, w9
62-
; CHECK-NEXT: csel w8, w8, w9, ne
63-
; CHECK-NEXT: asr w0, w8, #16
6440
; CHECK-NEXT: ret
6541
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 poison)
6642
ret i16 %tmp
@@ -70,12 +46,6 @@ define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind {
7046
define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind {
7147
; CHECK-LABEL: combine_shl_by_bitwidth:
7248
; CHECK: // %bb.0:
73-
; CHECK-NEXT: lsl w9, w0, #16
74-
; CHECK-NEXT: mov w8, #-2147483648
75-
; CHECK-NEXT: cmp w9, #0
76-
; CHECK-NEXT: cinv w8, w8, ge
77-
; CHECK-NEXT: csel w8, w8, wzr, ne
78-
; CHECK-NEXT: asr w0, w8, #16
7949
; CHECK-NEXT: ret
8050
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 16)
8151
ret i16 %tmp
@@ -85,12 +55,7 @@ define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind {
8555
define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind {
8656
; CHECK-LABEL: combine_shl_zero:
8757
; CHECK: // %bb.0:
88-
; CHECK-NEXT: mov w8, wzr
89-
; CHECK-NEXT: mov w9, #-2147483648
90-
; CHECK-NEXT: cmp w8, #0
91-
; CHECK-NEXT: cinv w9, w9, ge
92-
; CHECK-NEXT: csel w8, w9, w8, ne
93-
; CHECK-NEXT: asr w0, w8, #16
58+
; CHECK-NEXT: mov w0, wzr
9459
; CHECK-NEXT: ret
9560
%tmp = call i16 @llvm.sshl.sat.i16(i16 0, i16 %y)
9661
ret i16 %tmp
@@ -100,13 +65,6 @@ define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind {
10065
define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind {
10166
; CHECK-LABEL: combine_shlsat_by_zero:
10267
; CHECK: // %bb.0:
103-
; CHECK-NEXT: lsl w9, w0, #16
104-
; CHECK-NEXT: mov w8, #-2147483648
105-
; CHECK-NEXT: cmp w9, #0
106-
; CHECK-NEXT: cinv w8, w8, ge
107-
; CHECK-NEXT: cmp w9, w9
108-
; CHECK-NEXT: csel w8, w8, w9, ne
109-
; CHECK-NEXT: asr w0, w8, #16
11068
; CHECK-NEXT: ret
11169
%tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 0)
11270
ret i16 %tmp
@@ -116,14 +74,7 @@ define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind {
11674
define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind {
11775
; CHECK-LABEL: combine_shlsat_constfold:
11876
; CHECK: // %bb.0:
119-
; CHECK-NEXT: mov w8, #524288
120-
; CHECK-NEXT: mov w9, #-2147483648
121-
; CHECK-NEXT: cmp w8, #0
122-
; CHECK-NEXT: cinv w9, w9, ge
123-
; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288
124-
; CHECK-NEXT: mov w8, #2097152
125-
; CHECK-NEXT: csel w8, w9, w8, ne
126-
; CHECK-NEXT: asr w0, w8, #16
77+
; CHECK-NEXT: mov w0, #32
12778
; CHECK-NEXT: ret
12879
%tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 2)
12980
ret i16 %tmp
@@ -133,12 +84,7 @@ define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind {
13384
define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind {
13485
; CHECK-LABEL: combine_shlsat_satmax:
13586
; CHECK: // %bb.0:
136-
; CHECK-NEXT: mov w8, #524288
137-
; CHECK-NEXT: cmp w8, #0
138-
; CHECK-NEXT: mov w8, #-2147483648
139-
; CHECK-NEXT: cinv w8, w8, ge
140-
; CHECK-NEXT: csel w8, w8, wzr, ne
141-
; CHECK-NEXT: asr w0, w8, #16
87+
; CHECK-NEXT: mov w0, #32767
14288
; CHECK-NEXT: ret
14389
%tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 15)
14490
ret i16 %tmp
@@ -148,12 +94,7 @@ define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind {
14894
define i16 @combine_shlsat_satmin(i16 %x, i16 %y) nounwind {
14995
; CHECK-LABEL: combine_shlsat_satmin:
15096
; CHECK: // %bb.0:
151-
; CHECK-NEXT: mov w8, #-524288
152-
; CHECK-NEXT: cmp w8, #0
153-
; CHECK-NEXT: mov w8, #-2147483648
154-
; CHECK-NEXT: cinv w8, w8, ge
155-
; CHECK-NEXT: csel w8, w8, wzr, ne
156-
; CHECK-NEXT: asr w0, w8, #16
97+
; CHECK-NEXT: mov w0, #32768
15798
; CHECK-NEXT: ret
15899
%tmp = call i16 @llvm.sshl.sat.i16(i16 -8, i16 15)
159100
ret i16 %tmp
@@ -166,33 +107,10 @@ define void @combine_shlsat_vector() nounwind {
166107
; CHECK-LABEL: combine_shlsat_vector:
167108
; CHECK: // %bb.0:
168109
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
169-
; CHECK-NEXT: mov w8, #524288
170-
; CHECK-NEXT: mov w9, #-2147483648
171-
; CHECK-NEXT: cmp w8, #0
172-
; CHECK-NEXT: cinv w10, w9, ge
173-
; CHECK-NEXT: csel w11, w10, wzr, ne
174-
; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288
175-
; CHECK-NEXT: mov w8, #2097152
176-
; CHECK-NEXT: asr w11, w11, #16
177-
; CHECK-NEXT: csel w8, w10, w8, ne
178-
; CHECK-NEXT: mov w10, #-524288
179-
; CHECK-NEXT: asr w8, w8, #16
180-
; CHECK-NEXT: cmp w10, #0
181-
; CHECK-NEXT: cinv w9, w9, ge
182-
; CHECK-NEXT: fmov s0, w8
183-
; CHECK-NEXT: csel w8, w9, wzr, ne
184-
; CHECK-NEXT: cmn w10, #128, lsl #12 // =524288
185-
; CHECK-NEXT: mov w10, #-2097152
186-
; CHECK-NEXT: csel w9, w9, w10, ne
187-
; CHECK-NEXT: asr w8, w8, #16
188-
; CHECK-NEXT: mov v0.h[1], w11
189-
; CHECK-NEXT: asr w9, w9, #16
190-
; CHECK-NEXT: mov v0.h[2], w9
191-
; CHECK-NEXT: mov v0.h[3], w8
192-
; CHECK-NEXT: umov w0, v0.h[0]
193-
; CHECK-NEXT: umov w1, v0.h[1]
194-
; CHECK-NEXT: umov w2, v0.h[2]
195-
; CHECK-NEXT: umov w3, v0.h[3]
110+
; CHECK-NEXT: mov w0, #32
111+
; CHECK-NEXT: mov w1, #32767
112+
; CHECK-NEXT: mov w2, #65504
113+
; CHECK-NEXT: mov w3, #32768
196114
; CHECK-NEXT: bl sink4xi16
197115
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
198116
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/ushl_sat.ll

Lines changed: 7 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@ declare <2 x i16> @llvm.ushl.sat.v2i16(<2 x i16>, <2 x i16>)
88
define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind {
99
; CHECK-LABEL: combine_shl_undef:
1010
; CHECK: // %bb.0:
11-
; CHECK-NEXT: mov w8, wzr
12-
; CHECK-NEXT: cmp w8, #0
13-
; CHECK-NEXT: csetm w8, ne
14-
; CHECK-NEXT: lsr w0, w8, #16
11+
; CHECK-NEXT: mov w0, wzr
1512
; CHECK-NEXT: ret
1613
%tmp = call i16 @llvm.ushl.sat.i16(i16 undef, i16 %y)
1714
ret i16 %tmp
@@ -21,10 +18,6 @@ define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind {
2118
define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind {
2219
; CHECK-LABEL: combine_shl_by_undef:
2320
; CHECK: // %bb.0:
24-
; CHECK-NEXT: lsl w8, w0, #16
25-
; CHECK-NEXT: cmp w8, w8
26-
; CHECK-NEXT: csinv w8, w8, wzr, eq
27-
; CHECK-NEXT: lsr w0, w8, #16
2821
; CHECK-NEXT: ret
2922
%tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 undef)
3023
ret i16 %tmp
@@ -34,10 +27,7 @@ define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind {
3427
define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind {
3528
; CHECK-LABEL: combine_shl_poison:
3629
; CHECK: // %bb.0:
37-
; CHECK-NEXT: mov w8, wzr
38-
; CHECK-NEXT: cmp w8, #0
39-
; CHECK-NEXT: csetm w8, ne
40-
; CHECK-NEXT: lsr w0, w8, #16
30+
; CHECK-NEXT: mov w0, wzr
4131
; CHECK-NEXT: ret
4232
%tmp = call i16 @llvm.ushl.sat.i16(i16 poison, i16 %y)
4333
ret i16 %tmp
@@ -47,10 +37,6 @@ define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind {
4737
define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind {
4838
; CHECK-LABEL: combine_shl_by_poison:
4939
; CHECK: // %bb.0:
50-
; CHECK-NEXT: lsl w8, w0, #16
51-
; CHECK-NEXT: cmp w8, w8
52-
; CHECK-NEXT: csinv w8, w8, wzr, eq
53-
; CHECK-NEXT: lsr w0, w8, #16
5440
; CHECK-NEXT: ret
5541
%tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 poison)
5642
ret i16 %tmp
@@ -60,10 +46,6 @@ define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind {
6046
define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind {
6147
; CHECK-LABEL: combine_shl_by_bitwidth:
6248
; CHECK: // %bb.0:
63-
; CHECK-NEXT: lsl w8, w0, #16
64-
; CHECK-NEXT: cmp w8, #0
65-
; CHECK-NEXT: csetm w8, ne
66-
; CHECK-NEXT: lsr w0, w8, #16
6749
; CHECK-NEXT: ret
6850
%tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 16)
6951
ret i16 %tmp
@@ -73,10 +55,7 @@ define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind {
7355
define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind {
7456
; CHECK-LABEL: combine_shl_zero:
7557
; CHECK: // %bb.0:
76-
; CHECK-NEXT: mov w8, wzr
77-
; CHECK-NEXT: cmp w8, #0
78-
; CHECK-NEXT: csetm w8, ne
79-
; CHECK-NEXT: lsr w0, w8, #16
58+
; CHECK-NEXT: mov w0, wzr
8059
; CHECK-NEXT: ret
8160
%tmp = call i16 @llvm.ushl.sat.i16(i16 0, i16 %y)
8261
ret i16 %tmp
@@ -86,10 +65,6 @@ define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind {
8665
define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind {
8766
; CHECK-LABEL: combine_shlsat_by_zero:
8867
; CHECK: // %bb.0:
89-
; CHECK-NEXT: lsl w8, w0, #16
90-
; CHECK-NEXT: cmp w8, w8
91-
; CHECK-NEXT: csinv w8, w8, wzr, eq
92-
; CHECK-NEXT: lsr w0, w8, #16
9368
; CHECK-NEXT: ret
9469
%tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 0)
9570
ret i16 %tmp
@@ -99,11 +74,7 @@ define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind {
9974
define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind {
10075
; CHECK-LABEL: combine_shlsat_constfold:
10176
; CHECK: // %bb.0:
102-
; CHECK-NEXT: mov w8, #524288
103-
; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288
104-
; CHECK-NEXT: mov w8, #2097152
105-
; CHECK-NEXT: csinv w8, w8, wzr, eq
106-
; CHECK-NEXT: lsr w0, w8, #16
77+
; CHECK-NEXT: mov w0, #32
10778
; CHECK-NEXT: ret
10879
%tmp = call i16 @llvm.ushl.sat.i16(i16 8, i16 2)
10980
ret i16 %tmp
@@ -113,10 +84,7 @@ define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind {
11384
define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind {
11485
; CHECK-LABEL: combine_shlsat_satmax:
11586
; CHECK: // %bb.0:
116-
; CHECK-NEXT: mov w8, #524288
117-
; CHECK-NEXT: cmp w8, #0
118-
; CHECK-NEXT: csetm w8, ne
119-
; CHECK-NEXT: lsr w0, w8, #16
87+
; CHECK-NEXT: mov w0, #65535
12088
; CHECK-NEXT: ret
12189
%tmp = call i16 @llvm.ushl.sat.i16(i16 8, i16 15)
12290
ret i16 %tmp
@@ -130,17 +98,8 @@ define void @combine_shlsat_vector() nounwind {
13098
; CHECK-LABEL: combine_shlsat_vector:
13199
; CHECK: // %bb.0:
132100
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
133-
; CHECK-NEXT: mov w8, #524288
134-
; CHECK-NEXT: mov w9, #2097152
135-
; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288
136-
; CHECK-NEXT: csinv w9, w9, wzr, eq
137-
; CHECK-NEXT: cmp w8, #0
138-
; CHECK-NEXT: csetm w8, ne
139-
; CHECK-NEXT: fmov s0, w9
140-
; CHECK-NEXT: mov v0.s[1], w8
141-
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
142-
; CHECK-NEXT: mov w1, v0.s[1]
143-
; CHECK-NEXT: fmov w0, s0
101+
; CHECK-NEXT: mov w0, #32
102+
; CHECK-NEXT: mov w1, #65535
144103
; CHECK-NEXT: bl sink2xi16
145104
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
146105
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)