Skip to content

Commit 0f38a40

Browse files
committed
[AArch64] Use custom lowering for {U,S}INT_TO_FP with i8.
With fullfp16, it is cheaper to cast the {U,S}INT_TO_FP operand to i16 first, rather than promoting it to i32. The custom lowering for {U,S}INT_TO_FP already supports that, it just needs to be used. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D113601 (cherry-picked from c2ed9fd)
1 parent 9583d5f commit 0f38a40

File tree

2 files changed

+96
-64
lines changed

2 files changed

+96
-64
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,10 +1020,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10201020
// elements smaller than i32, so promote the input to i32 first.
10211021
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
10221022
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1023-
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1024-
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1025-
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1026-
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
10271023

10281024
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
10291025
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -1036,13 +1032,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10361032
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
10371033

10381034
if (Subtarget->hasFullFP16()) {
1035+
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
1036+
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1037+
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
1038+
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
10391039
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
10401040
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
10411041
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
10421042
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
10431043
} else {
10441044
// when AArch64 doesn't have fullfp16 support, promote the input
10451045
// to i32 first.
1046+
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1047+
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1048+
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1049+
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
10461050
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
10471051
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
10481052
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);

llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll

Lines changed: 88 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -414,41 +414,55 @@ define <4 x half> @sitofp_v4i8(<4 x i8> %a) #0 {
414414
}
415415

416416
define <8 x half> @sitofp_v8i8(<8 x i8> %a) #0 {
417-
; CHECK-LABEL: sitofp_v8i8:
418-
; CHECK: // %bb.0:
419-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
420-
; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
421-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
422-
; CHECK-NEXT: scvtf v1.4s, v1.4s
423-
; CHECK-NEXT: scvtf v0.4s, v0.4s
424-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
425-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
426-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
427-
; CHECK-NEXT: ret
417+
; CHECK-CVT-LABEL: sitofp_v8i8:
418+
; CHECK-CVT: // %bb.0:
419+
; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
420+
; CHECK-CVT-NEXT: sshll2 v1.4s, v0.8h, #0
421+
; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
422+
; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
423+
; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
424+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
425+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
426+
; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0]
427+
; CHECK-CVT-NEXT: ret
428+
;
429+
; CHECK-FP16-LABEL: sitofp_v8i8:
430+
; CHECK-FP16: // %bb.0:
431+
; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
432+
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
433+
; CHECK-FP16-NEXT: ret
428434
%1 = sitofp <8 x i8> %a to <8 x half>
429435
ret <8 x half> %1
430436
}
431437

432438
define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 {
433-
; CHECK-LABEL: sitofp_v16i8:
434-
; CHECK: // %bb.0:
435-
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
436-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
437-
; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0
438-
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
439-
; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0
440-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
441-
; CHECK-NEXT: scvtf v2.4s, v2.4s
442-
; CHECK-NEXT: scvtf v1.4s, v1.4s
443-
; CHECK-NEXT: scvtf v3.4s, v3.4s
444-
; CHECK-NEXT: scvtf v0.4s, v0.4s
445-
; CHECK-NEXT: fcvtn v2.4h, v2.4s
446-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
447-
; CHECK-NEXT: fcvtn v3.4h, v3.4s
448-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
449-
; CHECK-NEXT: mov v1.d[1], v2.d[0]
450-
; CHECK-NEXT: mov v0.d[1], v3.d[0]
451-
; CHECK-NEXT: ret
439+
; CHECK-CVT-LABEL: sitofp_v16i8:
440+
; CHECK-CVT: // %bb.0:
441+
; CHECK-CVT-NEXT: sshll2 v1.8h, v0.16b, #0
442+
; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
443+
; CHECK-CVT-NEXT: sshll2 v2.4s, v1.8h, #0
444+
; CHECK-CVT-NEXT: sshll v1.4s, v1.4h, #0
445+
; CHECK-CVT-NEXT: sshll2 v3.4s, v0.8h, #0
446+
; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
447+
; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s
448+
; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
449+
; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s
450+
; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
451+
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
452+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
453+
; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s
454+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
455+
; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0]
456+
; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0]
457+
; CHECK-CVT-NEXT: ret
458+
;
459+
; CHECK-FP16-LABEL: sitofp_v16i8:
460+
; CHECK-FP16: // %bb.0:
461+
; CHECK-FP16-NEXT: sshll2 v1.8h, v0.16b, #0
462+
; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
463+
; CHECK-FP16-NEXT: scvtf v1.8h, v1.8h
464+
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
465+
; CHECK-FP16-NEXT: ret
452466
%1 = sitofp <16 x i8> %a to <16 x half>
453467
ret <16 x half> %1
454468
}
@@ -525,41 +539,55 @@ define <4 x half> @uitofp_v4i8(<4 x i8> %a) #0 {
525539
}
526540

527541
define <8 x half> @uitofp_v8i8(<8 x i8> %a) #0 {
528-
; CHECK-LABEL: uitofp_v8i8:
529-
; CHECK: // %bb.0:
530-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
531-
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
532-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
533-
; CHECK-NEXT: ucvtf v1.4s, v1.4s
534-
; CHECK-NEXT: ucvtf v0.4s, v0.4s
535-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
536-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
537-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
538-
; CHECK-NEXT: ret
542+
; CHECK-CVT-LABEL: uitofp_v8i8:
543+
; CHECK-CVT: // %bb.0:
544+
; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
545+
; CHECK-CVT-NEXT: ushll2 v1.4s, v0.8h, #0
546+
; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
547+
; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
548+
; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
549+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
550+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
551+
; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0]
552+
; CHECK-CVT-NEXT: ret
553+
;
554+
; CHECK-FP16-LABEL: uitofp_v8i8:
555+
; CHECK-FP16: // %bb.0:
556+
; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
557+
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
558+
; CHECK-FP16-NEXT: ret
539559
%1 = uitofp <8 x i8> %a to <8 x half>
540560
ret <8 x half> %1
541561
}
542562

543563
define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 {
544-
; CHECK-LABEL: uitofp_v16i8:
545-
; CHECK: // %bb.0:
546-
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
547-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
548-
; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0
549-
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
550-
; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
551-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
552-
; CHECK-NEXT: ucvtf v2.4s, v2.4s
553-
; CHECK-NEXT: ucvtf v1.4s, v1.4s
554-
; CHECK-NEXT: ucvtf v3.4s, v3.4s
555-
; CHECK-NEXT: ucvtf v0.4s, v0.4s
556-
; CHECK-NEXT: fcvtn v2.4h, v2.4s
557-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
558-
; CHECK-NEXT: fcvtn v3.4h, v3.4s
559-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
560-
; CHECK-NEXT: mov v1.d[1], v2.d[0]
561-
; CHECK-NEXT: mov v0.d[1], v3.d[0]
562-
; CHECK-NEXT: ret
564+
; CHECK-CVT-LABEL: uitofp_v16i8:
565+
; CHECK-CVT: // %bb.0:
566+
; CHECK-CVT-NEXT: ushll2 v1.8h, v0.16b, #0
567+
; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
568+
; CHECK-CVT-NEXT: ushll2 v2.4s, v1.8h, #0
569+
; CHECK-CVT-NEXT: ushll v1.4s, v1.4h, #0
570+
; CHECK-CVT-NEXT: ushll2 v3.4s, v0.8h, #0
571+
; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
572+
; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s
573+
; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
574+
; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s
575+
; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
576+
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
577+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
578+
; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s
579+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
580+
; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0]
581+
; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0]
582+
; CHECK-CVT-NEXT: ret
583+
;
584+
; CHECK-FP16-LABEL: uitofp_v16i8:
585+
; CHECK-FP16: // %bb.0:
586+
; CHECK-FP16-NEXT: ushll2 v1.8h, v0.16b, #0
587+
; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
588+
; CHECK-FP16-NEXT: ucvtf v1.8h, v1.8h
589+
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
590+
; CHECK-FP16-NEXT: ret
563591
%1 = uitofp <16 x i8> %a to <16 x half>
564592
ret <16 x half> %1
565593
}

0 commit comments

Comments
 (0)