Skip to content

Commit d52c840

Browse files
authored
SelectionDAG/expandFMINNUM_FMAXNUM: skips vector if SETCC/VSELECT is not legal (llvm#109570)
If SETCC or VSELECT is not legal for vector, we should not expand it, instead we can split the vectors. So that, some simple scale instructions can be emitted instead of some pairs of comparation+selection.
1 parent 923566a commit d52c840

File tree

5 files changed

+258
-520
lines changed

5 files changed

+258
-520
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8424,6 +8424,11 @@ TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
84248424

84258425
if (Node->getFlags().hasNoNaNs()) {
84268426
ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8427+
EVT VT = Node->getValueType(0);
8428+
if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8429+
!isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8430+
VT.isVector())
8431+
return SDValue();
84278432
SDValue Op1 = Node->getOperand(0);
84288433
SDValue Op2 = Node->getOperand(1);
84298434
SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);

llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll

Lines changed: 63 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -223,77 +223,69 @@ define half @test_v16f16(<16 x half> %a) nounwind {
223223
; CHECK-NOFP-SD-NEXT: fcvt s5, h0
224224
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
225225
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
226-
; CHECK-NOFP-SD-NEXT: fcmp s3, s2
227-
; CHECK-NOFP-SD-NEXT: fcsel s2, s3, s2, gt
228-
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
229-
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
230-
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[2]
226+
; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4
231227
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2]
228+
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s3, s2
229+
; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2]
230+
; CHECK-NOFP-SD-NEXT: fcvt h4, s4
231+
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
232232
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
233-
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
233+
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
234234
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
235-
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
236235
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
237-
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
238-
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
239-
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s3, s2
240-
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
241-
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
236+
; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s3
242237
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
238+
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s4, s2
239+
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
243240
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
241+
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
244242
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
245243
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
246-
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
247244
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
248245
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
249-
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
250-
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
251-
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
252-
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
246+
; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4
253247
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
254-
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
255-
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
256-
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
248+
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
249+
; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4]
250+
; CHECK-NOFP-SD-NEXT: fcvt h4, s4
257251
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
252+
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
258253
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
254+
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
259255
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
260-
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
261-
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
262-
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
263-
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
256+
; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s3
264257
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
258+
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s4
259+
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
265260
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
261+
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
266262
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
267263
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
268-
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
269264
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
270265
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
271-
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
266+
; CHECK-NOFP-SD-NEXT: fmaxnm s4, s5, s4
267+
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
268+
; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
272269
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
273-
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
270+
; CHECK-NOFP-SD-NEXT: fcvt h3, s4
274271
; CHECK-NOFP-SD-NEXT: mov h4, v1.h[6]
275-
; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
272+
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
276273
; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
277-
; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
278-
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
274+
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
279275
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
276+
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
280277
; CHECK-NOFP-SD-NEXT: fcvt s4, h4
281-
; CHECK-NOFP-SD-NEXT: fcvt s5, h5
282278
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
283-
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
284-
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
285279
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
286-
; CHECK-NOFP-SD-NEXT: fcmp s5, s4
280+
; CHECK-NOFP-SD-NEXT: fmaxnm s0, s0, s1
287281
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
288-
; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
289-
; CHECK-NOFP-SD-NEXT: fcmp s0, s1
290-
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
291-
; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, gt
292-
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
282+
; CHECK-NOFP-SD-NEXT: fmaxnm s3, s5, s4
293283
; CHECK-NOFP-SD-NEXT: fcvt h0, s0
294-
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
295-
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
284+
; CHECK-NOFP-SD-NEXT: fcvt h2, s2
285+
; CHECK-NOFP-SD-NEXT: fcvt h3, s3
296286
; CHECK-NOFP-SD-NEXT: fcvt s0, h0
287+
; CHECK-NOFP-SD-NEXT: fcvt s2, h2
288+
; CHECK-NOFP-SD-NEXT: fcvt s3, h3
297289
; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
298290
; CHECK-NOFP-SD-NEXT: fcvt h1, s2
299291
; CHECK-NOFP-SD-NEXT: fcvt s1, h1
@@ -410,72 +402,44 @@ define half @test_v11f16(<11 x half> %a) nounwind {
410402
; CHECK-NOFP-LABEL: test_v11f16:
411403
; CHECK-NOFP: // %bb.0:
412404
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
413-
; CHECK-NOFP-NEXT: fcvt s1, h1
414405
; CHECK-NOFP-NEXT: ldr h17, [sp]
406+
; CHECK-NOFP-NEXT: fcvt s1, h1
415407
; CHECK-NOFP-NEXT: fcvt s0, h0
416408
; CHECK-NOFP-NEXT: fcvt s2, h2
417-
; CHECK-NOFP-NEXT: adrp x8, .LCPI14_0
418409
; CHECK-NOFP-NEXT: fcvt s16, h16
419410
; CHECK-NOFP-NEXT: fcvt s17, h17
420-
; CHECK-NOFP-NEXT: fcvt s3, h3
421-
; CHECK-NOFP-NEXT: fcvt s4, h4
422-
; CHECK-NOFP-NEXT: fcmp s1, s16
423-
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
424-
; CHECK-NOFP-NEXT: fcmp s0, s17
411+
; CHECK-NOFP-NEXT: fmaxnm s1, s1, s16
412+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s17
425413
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
426414
; CHECK-NOFP-NEXT: fcvt s16, h16
427-
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
428415
; CHECK-NOFP-NEXT: fcvt h1, s1
429416
; CHECK-NOFP-NEXT: fcvt h0, s0
430-
; CHECK-NOFP-NEXT: fcmp s2, s16
431417
; CHECK-NOFP-NEXT: fcvt s1, h1
432418
; CHECK-NOFP-NEXT: fcvt s0, h0
433419
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
434-
; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt
435-
; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
436-
; CHECK-NOFP-NEXT: mov w8, #-8388608 // =0xff800000
437-
; CHECK-NOFP-NEXT: fcvt s2, h2
438-
; CHECK-NOFP-NEXT: fcvt h1, s1
420+
; CHECK-NOFP-NEXT: fmaxnm s1, s2, s16
439421
; CHECK-NOFP-NEXT: fcvt h0, s0
440-
; CHECK-NOFP-NEXT: fcmp s3, s2
441-
; CHECK-NOFP-NEXT: fcvt s1, h1
422+
; CHECK-NOFP-NEXT: fcvt h1, s1
442423
; CHECK-NOFP-NEXT: fcvt s0, h0
424+
; CHECK-NOFP-NEXT: fcvt s1, h1
443425
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
444-
; CHECK-NOFP-NEXT: fmov s1, w8
445-
; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
446-
; CHECK-NOFP-NEXT: fcmp s4, s2
426+
; CHECK-NOFP-NEXT: fcvt s1, h3
447427
; CHECK-NOFP-NEXT: fcvt h0, s0
448-
; CHECK-NOFP-NEXT: fcvt h3, s3
449428
; CHECK-NOFP-NEXT: fcvt s0, h0
450-
; CHECK-NOFP-NEXT: fcvt s3, h3
451-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
452-
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
453-
; CHECK-NOFP-NEXT: fcvt s4, h5
454-
; CHECK-NOFP-NEXT: fcvt h3, s3
429+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
430+
; CHECK-NOFP-NEXT: fcvt s1, h4
455431
; CHECK-NOFP-NEXT: fcvt h0, s0
456-
; CHECK-NOFP-NEXT: fcmp s4, s2
457-
; CHECK-NOFP-NEXT: fcvt s3, h3
458432
; CHECK-NOFP-NEXT: fcvt s0, h0
459-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
460-
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
461-
; CHECK-NOFP-NEXT: fcvt s4, h6
462-
; CHECK-NOFP-NEXT: fcvt h3, s3
433+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
434+
; CHECK-NOFP-NEXT: fcvt s1, h5
463435
; CHECK-NOFP-NEXT: fcvt h0, s0
464-
; CHECK-NOFP-NEXT: fcmp s4, s2
465-
; CHECK-NOFP-NEXT: fcvt s3, h3
466436
; CHECK-NOFP-NEXT: fcvt s0, h0
467-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
468-
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
469-
; CHECK-NOFP-NEXT: fcvt s4, h7
470-
; CHECK-NOFP-NEXT: fcvt h3, s3
437+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
438+
; CHECK-NOFP-NEXT: fcvt s1, h6
471439
; CHECK-NOFP-NEXT: fcvt h0, s0
472-
; CHECK-NOFP-NEXT: fcmp s4, s2
473-
; CHECK-NOFP-NEXT: fcvt s3, h3
474-
; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
475440
; CHECK-NOFP-NEXT: fcvt s0, h0
476-
; CHECK-NOFP-NEXT: fcvt h1, s1
477-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
478-
; CHECK-NOFP-NEXT: fcvt s1, h1
441+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
442+
; CHECK-NOFP-NEXT: fcvt s1, h7
479443
; CHECK-NOFP-NEXT: fcvt h0, s0
480444
; CHECK-NOFP-NEXT: fcvt s0, h0
481445
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
@@ -517,73 +481,44 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
517481
; CHECK-NOFP-LABEL: test_v11f16_ninf:
518482
; CHECK-NOFP: // %bb.0:
519483
; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
520-
; CHECK-NOFP-NEXT: fcvt s1, h1
521484
; CHECK-NOFP-NEXT: ldr h17, [sp]
485+
; CHECK-NOFP-NEXT: fcvt s1, h1
522486
; CHECK-NOFP-NEXT: fcvt s0, h0
523487
; CHECK-NOFP-NEXT: fcvt s2, h2
524-
; CHECK-NOFP-NEXT: adrp x8, .LCPI15_0
525488
; CHECK-NOFP-NEXT: fcvt s16, h16
526489
; CHECK-NOFP-NEXT: fcvt s17, h17
527-
; CHECK-NOFP-NEXT: fcvt s3, h3
528-
; CHECK-NOFP-NEXT: fcvt s4, h4
529-
; CHECK-NOFP-NEXT: fcmp s1, s16
530-
; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
531-
; CHECK-NOFP-NEXT: fcmp s0, s17
490+
; CHECK-NOFP-NEXT: fmaxnm s1, s1, s16
491+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s17
532492
; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
533493
; CHECK-NOFP-NEXT: fcvt s16, h16
534-
; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
535494
; CHECK-NOFP-NEXT: fcvt h1, s1
536495
; CHECK-NOFP-NEXT: fcvt h0, s0
537-
; CHECK-NOFP-NEXT: fcmp s2, s16
538496
; CHECK-NOFP-NEXT: fcvt s1, h1
539497
; CHECK-NOFP-NEXT: fcvt s0, h0
540498
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
541-
; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt
542-
; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI15_0]
543-
; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
544-
; CHECK-NOFP-NEXT: fcvt s2, h2
545-
; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16
546-
; CHECK-NOFP-NEXT: fcvt h1, s1
499+
; CHECK-NOFP-NEXT: fmaxnm s1, s2, s16
547500
; CHECK-NOFP-NEXT: fcvt h0, s0
548-
; CHECK-NOFP-NEXT: fcmp s3, s2
549-
; CHECK-NOFP-NEXT: fcvt s1, h1
501+
; CHECK-NOFP-NEXT: fcvt h1, s1
550502
; CHECK-NOFP-NEXT: fcvt s0, h0
503+
; CHECK-NOFP-NEXT: fcvt s1, h1
551504
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
552-
; CHECK-NOFP-NEXT: fmov s1, w8
553-
; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
554-
; CHECK-NOFP-NEXT: fcmp s4, s2
505+
; CHECK-NOFP-NEXT: fcvt s1, h3
555506
; CHECK-NOFP-NEXT: fcvt h0, s0
556-
; CHECK-NOFP-NEXT: fcvt h3, s3
557507
; CHECK-NOFP-NEXT: fcvt s0, h0
558-
; CHECK-NOFP-NEXT: fcvt s3, h3
559-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
560-
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
561-
; CHECK-NOFP-NEXT: fcvt s4, h5
562-
; CHECK-NOFP-NEXT: fcvt h3, s3
508+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
509+
; CHECK-NOFP-NEXT: fcvt s1, h4
563510
; CHECK-NOFP-NEXT: fcvt h0, s0
564-
; CHECK-NOFP-NEXT: fcmp s4, s2
565-
; CHECK-NOFP-NEXT: fcvt s3, h3
566511
; CHECK-NOFP-NEXT: fcvt s0, h0
567-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
568-
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
569-
; CHECK-NOFP-NEXT: fcvt s4, h6
570-
; CHECK-NOFP-NEXT: fcvt h3, s3
512+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
513+
; CHECK-NOFP-NEXT: fcvt s1, h5
571514
; CHECK-NOFP-NEXT: fcvt h0, s0
572-
; CHECK-NOFP-NEXT: fcmp s4, s2
573-
; CHECK-NOFP-NEXT: fcvt s3, h3
574515
; CHECK-NOFP-NEXT: fcvt s0, h0
575-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
576-
; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
577-
; CHECK-NOFP-NEXT: fcvt s4, h7
578-
; CHECK-NOFP-NEXT: fcvt h3, s3
516+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
517+
; CHECK-NOFP-NEXT: fcvt s1, h6
579518
; CHECK-NOFP-NEXT: fcvt h0, s0
580-
; CHECK-NOFP-NEXT: fcmp s4, s2
581-
; CHECK-NOFP-NEXT: fcvt s3, h3
582-
; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
583519
; CHECK-NOFP-NEXT: fcvt s0, h0
584-
; CHECK-NOFP-NEXT: fcvt h1, s1
585-
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
586-
; CHECK-NOFP-NEXT: fcvt s1, h1
520+
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
521+
; CHECK-NOFP-NEXT: fcvt s1, h7
587522
; CHECK-NOFP-NEXT: fcvt h0, s0
588523
; CHECK-NOFP-NEXT: fcvt s0, h0
589524
; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1

0 commit comments

Comments
 (0)