Skip to content

Commit 88b8076

Browse files
author
Cameron McInally
committed
[AArch64] Combine SELECT_CC patterns that match smin(a,0) and smax(a,0)
With a previous patch to canonicalize SPF to min/max intrinsics (a266af7), we saw a performance regression on the AArch64 backend. This patch recovers from the SPF canonicalization by combining smin(a,0) and smax(a,0) SELECT_CC patterns during AArch64ISelLowering. GitHub Issue: #61767 Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D148249
1 parent 411dd47 commit 88b8076

File tree

5 files changed

+428
-243
lines changed

5 files changed

+428
-243
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8937,6 +8937,24 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
89378937
return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
89388938
}
89398939

8940+
// Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
8941+
// (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
8942+
// (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
8943+
// Both require less instructions than compare and conditional select.
8944+
if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TVal &&
8945+
RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
8946+
LHS.getValueType() == RHS.getValueType()) {
8947+
EVT VT = LHS.getValueType();
8948+
SDValue Shift =
8949+
DAG.getNode(ISD::SRA, dl, VT, LHS,
8950+
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
8951+
8952+
if (CC == ISD::SETGT)
8953+
Shift = DAG.getNOT(dl, Shift, VT);
8954+
8955+
return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
8956+
}
8957+
89408958
unsigned Opcode = AArch64ISD::CSEL;
89418959

89428960
// If both the TVal and the FVal are constants, see if we can swap them in

llvm/test/CodeGen/AArch64/fpclamptosat.ll

Lines changed: 36 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,10 @@ define i16 @stest_f64i16(double %x) {
157157
; CHECK-LABEL: stest_f64i16:
158158
; CHECK: // %bb.0: // %entry
159159
; CHECK-NEXT: fcvtzs w8, d0
160-
; CHECK-NEXT: mov w9, #32767
160+
; CHECK-NEXT: mov w9, #32767 // =0x7fff
161161
; CHECK-NEXT: cmp w8, w9
162162
; CHECK-NEXT: csel w8, w8, w9, lt
163-
; CHECK-NEXT: mov w9, #-32768
163+
; CHECK-NEXT: mov w9, #-32768 // =0xffff8000
164164
; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
165165
; CHECK-NEXT: csel w0, w8, w9, gt
166166
; CHECK-NEXT: ret
@@ -178,7 +178,7 @@ define i16 @utest_f64i16(double %x) {
178178
; CHECK-LABEL: utest_f64i16:
179179
; CHECK: // %bb.0: // %entry
180180
; CHECK-NEXT: fcvtzu w8, d0
181-
; CHECK-NEXT: mov w9, #65535
181+
; CHECK-NEXT: mov w9, #65535 // =0xffff
182182
; CHECK-NEXT: cmp w8, w9
183183
; CHECK-NEXT: csel w0, w8, w9, lo
184184
; CHECK-NEXT: ret
@@ -194,7 +194,7 @@ define i16 @ustest_f64i16(double %x) {
194194
; CHECK-LABEL: ustest_f64i16:
195195
; CHECK: // %bb.0: // %entry
196196
; CHECK-NEXT: fcvtzs w8, d0
197-
; CHECK-NEXT: mov w9, #65535
197+
; CHECK-NEXT: mov w9, #65535 // =0xffff
198198
; CHECK-NEXT: cmp w8, w9
199199
; CHECK-NEXT: csel w8, w8, w9, lt
200200
; CHECK-NEXT: bic w0, w8, w8, asr #31
@@ -213,10 +213,10 @@ define i16 @stest_f32i16(float %x) {
213213
; CHECK-LABEL: stest_f32i16:
214214
; CHECK: // %bb.0: // %entry
215215
; CHECK-NEXT: fcvtzs w8, s0
216-
; CHECK-NEXT: mov w9, #32767
216+
; CHECK-NEXT: mov w9, #32767 // =0x7fff
217217
; CHECK-NEXT: cmp w8, w9
218218
; CHECK-NEXT: csel w8, w8, w9, lt
219-
; CHECK-NEXT: mov w9, #-32768
219+
; CHECK-NEXT: mov w9, #-32768 // =0xffff8000
220220
; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
221221
; CHECK-NEXT: csel w0, w8, w9, gt
222222
; CHECK-NEXT: ret
@@ -234,7 +234,7 @@ define i16 @utest_f32i16(float %x) {
234234
; CHECK-LABEL: utest_f32i16:
235235
; CHECK: // %bb.0: // %entry
236236
; CHECK-NEXT: fcvtzu w8, s0
237-
; CHECK-NEXT: mov w9, #65535
237+
; CHECK-NEXT: mov w9, #65535 // =0xffff
238238
; CHECK-NEXT: cmp w8, w9
239239
; CHECK-NEXT: csel w0, w8, w9, lo
240240
; CHECK-NEXT: ret
@@ -250,7 +250,7 @@ define i16 @ustest_f32i16(float %x) {
250250
; CHECK-LABEL: ustest_f32i16:
251251
; CHECK: // %bb.0: // %entry
252252
; CHECK-NEXT: fcvtzs w8, s0
253-
; CHECK-NEXT: mov w9, #65535
253+
; CHECK-NEXT: mov w9, #65535 // =0xffff
254254
; CHECK-NEXT: cmp w8, w9
255255
; CHECK-NEXT: csel w8, w8, w9, lt
256256
; CHECK-NEXT: bic w0, w8, w8, asr #31
@@ -269,22 +269,22 @@ define i16 @stest_f16i16(half %x) {
269269
; CHECK-CVT-LABEL: stest_f16i16:
270270
; CHECK-CVT: // %bb.0: // %entry
271271
; CHECK-CVT-NEXT: fcvt s0, h0
272-
; CHECK-CVT-NEXT: mov w9, #32767
272+
; CHECK-CVT-NEXT: mov w9, #32767 // =0x7fff
273273
; CHECK-CVT-NEXT: fcvtzs w8, s0
274274
; CHECK-CVT-NEXT: cmp w8, w9
275275
; CHECK-CVT-NEXT: csel w8, w8, w9, lt
276-
; CHECK-CVT-NEXT: mov w9, #-32768
276+
; CHECK-CVT-NEXT: mov w9, #-32768 // =0xffff8000
277277
; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
278278
; CHECK-CVT-NEXT: csel w0, w8, w9, gt
279279
; CHECK-CVT-NEXT: ret
280280
;
281281
; CHECK-FP16-LABEL: stest_f16i16:
282282
; CHECK-FP16: // %bb.0: // %entry
283283
; CHECK-FP16-NEXT: fcvtzs w8, h0
284-
; CHECK-FP16-NEXT: mov w9, #32767
284+
; CHECK-FP16-NEXT: mov w9, #32767 // =0x7fff
285285
; CHECK-FP16-NEXT: cmp w8, w9
286286
; CHECK-FP16-NEXT: csel w8, w8, w9, lt
287-
; CHECK-FP16-NEXT: mov w9, #-32768
287+
; CHECK-FP16-NEXT: mov w9, #-32768 // =0xffff8000
288288
; CHECK-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
289289
; CHECK-FP16-NEXT: csel w0, w8, w9, gt
290290
; CHECK-FP16-NEXT: ret
@@ -302,7 +302,7 @@ define i16 @utesth_f16i16(half %x) {
302302
; CHECK-CVT-LABEL: utesth_f16i16:
303303
; CHECK-CVT: // %bb.0: // %entry
304304
; CHECK-CVT-NEXT: fcvt s0, h0
305-
; CHECK-CVT-NEXT: mov w9, #65535
305+
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
306306
; CHECK-CVT-NEXT: fcvtzu w8, s0
307307
; CHECK-CVT-NEXT: cmp w8, w9
308308
; CHECK-CVT-NEXT: csel w0, w8, w9, lo
@@ -311,7 +311,7 @@ define i16 @utesth_f16i16(half %x) {
311311
; CHECK-FP16-LABEL: utesth_f16i16:
312312
; CHECK-FP16: // %bb.0: // %entry
313313
; CHECK-FP16-NEXT: fcvtzu w8, h0
314-
; CHECK-FP16-NEXT: mov w9, #65535
314+
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
315315
; CHECK-FP16-NEXT: cmp w8, w9
316316
; CHECK-FP16-NEXT: csel w0, w8, w9, lo
317317
; CHECK-FP16-NEXT: ret
@@ -327,7 +327,7 @@ define i16 @ustest_f16i16(half %x) {
327327
; CHECK-CVT-LABEL: ustest_f16i16:
328328
; CHECK-CVT: // %bb.0: // %entry
329329
; CHECK-CVT-NEXT: fcvt s0, h0
330-
; CHECK-CVT-NEXT: mov w9, #65535
330+
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
331331
; CHECK-CVT-NEXT: fcvtzs w8, s0
332332
; CHECK-CVT-NEXT: cmp w8, w9
333333
; CHECK-CVT-NEXT: csel w8, w8, w9, lt
@@ -337,7 +337,7 @@ define i16 @ustest_f16i16(half %x) {
337337
; CHECK-FP16-LABEL: ustest_f16i16:
338338
; CHECK-FP16: // %bb.0: // %entry
339339
; CHECK-FP16-NEXT: fcvtzs w8, h0
340-
; CHECK-FP16-NEXT: mov w9, #65535
340+
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
341341
; CHECK-FP16-NEXT: cmp w8, w9
342342
; CHECK-FP16-NEXT: csel w8, w8, w9, lt
343343
; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31
@@ -679,10 +679,10 @@ define i16 @stest_f64i16_mm(double %x) {
679679
; CHECK-LABEL: stest_f64i16_mm:
680680
; CHECK: // %bb.0: // %entry
681681
; CHECK-NEXT: fcvtzs w8, d0
682-
; CHECK-NEXT: mov w9, #32767
682+
; CHECK-NEXT: mov w9, #32767 // =0x7fff
683683
; CHECK-NEXT: cmp w8, w9
684684
; CHECK-NEXT: csel w8, w8, w9, lt
685-
; CHECK-NEXT: mov w9, #-32768
685+
; CHECK-NEXT: mov w9, #-32768 // =0xffff8000
686686
; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
687687
; CHECK-NEXT: csel w0, w8, w9, gt
688688
; CHECK-NEXT: ret
@@ -698,7 +698,7 @@ define i16 @utest_f64i16_mm(double %x) {
698698
; CHECK-LABEL: utest_f64i16_mm:
699699
; CHECK: // %bb.0: // %entry
700700
; CHECK-NEXT: fcvtzu w8, d0
701-
; CHECK-NEXT: mov w9, #65535
701+
; CHECK-NEXT: mov w9, #65535 // =0xffff
702702
; CHECK-NEXT: cmp w8, w9
703703
; CHECK-NEXT: csel w0, w8, w9, lo
704704
; CHECK-NEXT: ret
@@ -713,11 +713,10 @@ define i16 @ustest_f64i16_mm(double %x) {
713713
; CHECK-LABEL: ustest_f64i16_mm:
714714
; CHECK: // %bb.0: // %entry
715715
; CHECK-NEXT: fcvtzs w8, d0
716-
; CHECK-NEXT: mov w9, #65535
716+
; CHECK-NEXT: mov w9, #65535 // =0xffff
717717
; CHECK-NEXT: cmp w8, w9
718718
; CHECK-NEXT: csel w8, w8, w9, lt
719-
; CHECK-NEXT: cmp w8, #0
720-
; CHECK-NEXT: csel w0, w8, wzr, gt
719+
; CHECK-NEXT: bic w0, w8, w8, asr #31
721720
; CHECK-NEXT: ret
722721
entry:
723722
%conv = fptosi double %x to i32
@@ -731,10 +730,10 @@ define i16 @stest_f32i16_mm(float %x) {
731730
; CHECK-LABEL: stest_f32i16_mm:
732731
; CHECK: // %bb.0: // %entry
733732
; CHECK-NEXT: fcvtzs w8, s0
734-
; CHECK-NEXT: mov w9, #32767
733+
; CHECK-NEXT: mov w9, #32767 // =0x7fff
735734
; CHECK-NEXT: cmp w8, w9
736735
; CHECK-NEXT: csel w8, w8, w9, lt
737-
; CHECK-NEXT: mov w9, #-32768
736+
; CHECK-NEXT: mov w9, #-32768 // =0xffff8000
738737
; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
739738
; CHECK-NEXT: csel w0, w8, w9, gt
740739
; CHECK-NEXT: ret
@@ -750,7 +749,7 @@ define i16 @utest_f32i16_mm(float %x) {
750749
; CHECK-LABEL: utest_f32i16_mm:
751750
; CHECK: // %bb.0: // %entry
752751
; CHECK-NEXT: fcvtzu w8, s0
753-
; CHECK-NEXT: mov w9, #65535
752+
; CHECK-NEXT: mov w9, #65535 // =0xffff
754753
; CHECK-NEXT: cmp w8, w9
755754
; CHECK-NEXT: csel w0, w8, w9, lo
756755
; CHECK-NEXT: ret
@@ -765,11 +764,10 @@ define i16 @ustest_f32i16_mm(float %x) {
765764
; CHECK-LABEL: ustest_f32i16_mm:
766765
; CHECK: // %bb.0: // %entry
767766
; CHECK-NEXT: fcvtzs w8, s0
768-
; CHECK-NEXT: mov w9, #65535
767+
; CHECK-NEXT: mov w9, #65535 // =0xffff
769768
; CHECK-NEXT: cmp w8, w9
770769
; CHECK-NEXT: csel w8, w8, w9, lt
771-
; CHECK-NEXT: cmp w8, #0
772-
; CHECK-NEXT: csel w0, w8, wzr, gt
770+
; CHECK-NEXT: bic w0, w8, w8, asr #31
773771
; CHECK-NEXT: ret
774772
entry:
775773
%conv = fptosi float %x to i32
@@ -783,22 +781,22 @@ define i16 @stest_f16i16_mm(half %x) {
783781
; CHECK-CVT-LABEL: stest_f16i16_mm:
784782
; CHECK-CVT: // %bb.0: // %entry
785783
; CHECK-CVT-NEXT: fcvt s0, h0
786-
; CHECK-CVT-NEXT: mov w9, #32767
784+
; CHECK-CVT-NEXT: mov w9, #32767 // =0x7fff
787785
; CHECK-CVT-NEXT: fcvtzs w8, s0
788786
; CHECK-CVT-NEXT: cmp w8, w9
789787
; CHECK-CVT-NEXT: csel w8, w8, w9, lt
790-
; CHECK-CVT-NEXT: mov w9, #-32768
788+
; CHECK-CVT-NEXT: mov w9, #-32768 // =0xffff8000
791789
; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
792790
; CHECK-CVT-NEXT: csel w0, w8, w9, gt
793791
; CHECK-CVT-NEXT: ret
794792
;
795793
; CHECK-FP16-LABEL: stest_f16i16_mm:
796794
; CHECK-FP16: // %bb.0: // %entry
797795
; CHECK-FP16-NEXT: fcvtzs w8, h0
798-
; CHECK-FP16-NEXT: mov w9, #32767
796+
; CHECK-FP16-NEXT: mov w9, #32767 // =0x7fff
799797
; CHECK-FP16-NEXT: cmp w8, w9
800798
; CHECK-FP16-NEXT: csel w8, w8, w9, lt
801-
; CHECK-FP16-NEXT: mov w9, #-32768
799+
; CHECK-FP16-NEXT: mov w9, #-32768 // =0xffff8000
802800
; CHECK-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
803801
; CHECK-FP16-NEXT: csel w0, w8, w9, gt
804802
; CHECK-FP16-NEXT: ret
@@ -814,7 +812,7 @@ define i16 @utesth_f16i16_mm(half %x) {
814812
; CHECK-CVT-LABEL: utesth_f16i16_mm:
815813
; CHECK-CVT: // %bb.0: // %entry
816814
; CHECK-CVT-NEXT: fcvt s0, h0
817-
; CHECK-CVT-NEXT: mov w9, #65535
815+
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
818816
; CHECK-CVT-NEXT: fcvtzu w8, s0
819817
; CHECK-CVT-NEXT: cmp w8, w9
820818
; CHECK-CVT-NEXT: csel w0, w8, w9, lo
@@ -823,7 +821,7 @@ define i16 @utesth_f16i16_mm(half %x) {
823821
; CHECK-FP16-LABEL: utesth_f16i16_mm:
824822
; CHECK-FP16: // %bb.0: // %entry
825823
; CHECK-FP16-NEXT: fcvtzu w8, h0
826-
; CHECK-FP16-NEXT: mov w9, #65535
824+
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
827825
; CHECK-FP16-NEXT: cmp w8, w9
828826
; CHECK-FP16-NEXT: csel w0, w8, w9, lo
829827
; CHECK-FP16-NEXT: ret
@@ -838,22 +836,20 @@ define i16 @ustest_f16i16_mm(half %x) {
838836
; CHECK-CVT-LABEL: ustest_f16i16_mm:
839837
; CHECK-CVT: // %bb.0: // %entry
840838
; CHECK-CVT-NEXT: fcvt s0, h0
841-
; CHECK-CVT-NEXT: mov w9, #65535
839+
; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff
842840
; CHECK-CVT-NEXT: fcvtzs w8, s0
843841
; CHECK-CVT-NEXT: cmp w8, w9
844842
; CHECK-CVT-NEXT: csel w8, w8, w9, lt
845-
; CHECK-CVT-NEXT: cmp w8, #0
846-
; CHECK-CVT-NEXT: csel w0, w8, wzr, gt
843+
; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31
847844
; CHECK-CVT-NEXT: ret
848845
;
849846
; CHECK-FP16-LABEL: ustest_f16i16_mm:
850847
; CHECK-FP16: // %bb.0: // %entry
851848
; CHECK-FP16-NEXT: fcvtzs w8, h0
852-
; CHECK-FP16-NEXT: mov w9, #65535
849+
; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff
853850
; CHECK-FP16-NEXT: cmp w8, w9
854851
; CHECK-FP16-NEXT: csel w8, w8, w9, lt
855-
; CHECK-FP16-NEXT: cmp w8, #0
856-
; CHECK-FP16-NEXT: csel w0, w8, wzr, gt
852+
; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31
857853
; CHECK-FP16-NEXT: ret
858854
entry:
859855
%conv = fptosi half %x to i32

0 commit comments

Comments
 (0)