Skip to content

Commit 93bcb83

Browse files
committed
[AArch64] Use custom lowering for {U,S}INT_TO_FP with i8.
With fullfp16, it is cheaper to cast the {U,S}INT_TO_FP operand to i16 first, rather than promoting it to i32. The custom lowering for {U,S}INT_TO_FP already supports that, it just needs to be used. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D113601 (cherry-picked from c2ed9fd)
1 parent a160e45 commit 93bcb83

File tree

2 files changed

+86
-60
lines changed

2 files changed

+86
-60
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,10 +1003,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10031003
// elements smaller than i32, so promote the input to i32 first.
10041004
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
10051005
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1006-
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1007-
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1008-
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1009-
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
10101006

10111007
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
10121008
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -1019,13 +1015,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10191015
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
10201016

10211017
if (Subtarget->hasFullFP16()) {
1018+
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
1019+
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1020+
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
1021+
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
10221022
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
10231023
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
10241024
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
10251025
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
10261026
} else {
10271027
// when AArch64 doesn't have fullfp16 support, promote the input
10281028
// to i32 first.
1029+
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1030+
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1031+
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1032+
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
10291033
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
10301034
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
10311035
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);

llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll

Lines changed: 78 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -297,41 +297,51 @@ define <4 x half> @sitofp_v4i8(<4 x i8> %a) #0 {
297297
}
298298

299299
define <8 x half> @sitofp_v8i8(<8 x i8> %a) #0 {
300-
; CHECK-LABEL: sitofp_v8i8:
301-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
302-
; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
303-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
304-
; CHECK-NEXT: scvtf v1.4s, v1.4s
305-
; CHECK-NEXT: scvtf v0.4s, v0.4s
306-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
307-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
308-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
309-
; CHECK-NEXT: ret
300+
; CHECK-CVT-LABEL: sitofp_v8i8:
301+
; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
302+
; CHECK-CVT-NEXT: sshll2 v1.4s, v0.8h, #0
303+
; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
304+
; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
305+
; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
306+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
307+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
308+
; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0]
309+
; CHECK-CVT-NEXT: ret
310310
;
311+
; CHECK-FP16-LABEL: sitofp_v8i8:
312+
; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
313+
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
314+
; CHECK-FP16-NEXT: ret
311315
%1 = sitofp <8 x i8> %a to <8 x half>
312316
ret <8 x half> %1
313317
}
314318

315319
define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 {
316-
; CHECK-LABEL: sitofp_v16i8:
317-
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
318-
; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0
319-
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
320-
; CHECK-NEXT: scvtf v2.4s, v2.4s
321-
; CHECK-NEXT: scvtf v1.4s, v1.4s
322-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
323-
; CHECK-NEXT: fcvtn v2.4h, v2.4s
324-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
325-
; CHECK-NEXT: mov v1.d[1], v2.d[0]
326-
; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
327-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
328-
; CHECK-NEXT: scvtf v2.4s, v2.4s
329-
; CHECK-NEXT: scvtf v0.4s, v0.4s
330-
; CHECK-NEXT: fcvtn v2.4h, v2.4s
331-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
332-
; CHECK-NEXT: mov v0.d[1], v2.d[0]
333-
; CHECK-NEXT: ret
320+
; CHECK-CVT-LABEL: sitofp_v16i8:
321+
; CHECK-CVT-NEXT: sshll2 v1.8h, v0.16b, #0
322+
; CHECK-CVT-NEXT: sshll2 v2.4s, v1.8h, #0
323+
; CHECK-CVT-NEXT: sshll v1.4s, v1.4h, #0
324+
; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s
325+
; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
326+
; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
327+
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
328+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
329+
; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0]
330+
; CHECK-CVT-NEXT: sshll2 v2.4s, v0.8h, #0
331+
; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
332+
; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s
333+
; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
334+
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
335+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
336+
; CHECK-CVT-NEXT: mov v0.d[1], v2.d[0]
337+
; CHECK-CVT-NEXT: ret
334338
;
339+
; CHECK-FP16-LABEL: sitofp_v16i8:
340+
; CHECK-FP16-NEXT: sshll2 v1.8h, v0.16b, #0
341+
; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
342+
; CHECK-FP16-NEXT: scvtf v1.8h, v1.8h
343+
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
344+
; CHECK-FP16-NEXT: ret
335345
%1 = sitofp <16 x i8> %a to <16 x half>
336346
ret <16 x half> %1
337347
}
@@ -391,40 +401,52 @@ define <4 x half> @uitofp_v4i8(<4 x i8> %a) #0 {
391401
}
392402

393403
define <8 x half> @uitofp_v8i8(<8 x i8> %a) #0 {
394-
; CHECK-LABEL: uitofp_v8i8:
395-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
396-
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
397-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
398-
; CHECK-NEXT: ucvtf v1.4s, v1.4s
399-
; CHECK-NEXT: ucvtf v0.4s, v0.4s
400-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
401-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
402-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
403-
; CHECK-NEXT: ret
404+
; CHECK-CVT-LABEL: uitofp_v8i8:
405+
; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
406+
; CHECK-CVT-NEXT: ushll2 v1.4s, v0.8h, #0
407+
; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
408+
; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
409+
; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
410+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
411+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
412+
; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0]
413+
; CHECK-CVT-NEXT: ret
414+
;
415+
; CHECK-FP16-LABEL: uitofp_v8i8:
416+
; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
417+
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
418+
; CHECK-FP16-NEXT: ret
404419
;
405420
%1 = uitofp <8 x i8> %a to <8 x half>
406421
ret <8 x half> %1
407422
}
408423

409424
define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 {
410-
; CHECK-LABEL: uitofp_v16i8:
411-
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
412-
; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0
413-
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
414-
; CHECK-NEXT: ucvtf v2.4s, v2.4s
415-
; CHECK-NEXT: ucvtf v1.4s, v1.4s
416-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
417-
; CHECK-NEXT: fcvtn v2.4h, v2.4s
418-
; CHECK-NEXT: fcvtn v1.4h, v1.4s
419-
; CHECK-NEXT: mov v1.d[1], v2.d[0]
420-
; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
421-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
422-
; CHECK-NEXT: ucvtf v2.4s, v2.4s
423-
; CHECK-NEXT: ucvtf v0.4s, v0.4s
424-
; CHECK-NEXT: fcvtn v2.4h, v2.4s
425-
; CHECK-NEXT: fcvtn v0.4h, v0.4s
426-
; CHECK-NEXT: mov v0.d[1], v2.d[0]
427-
; CHECK-NEXT: ret
425+
; CHECK-CVT-LABEL: uitofp_v16i8:
426+
; CHECK-CVT-NEXT: ushll2 v1.8h, v0.16b, #0
427+
; CHECK-CVT-NEXT: ushll2 v2.4s, v1.8h, #0
428+
; CHECK-CVT-NEXT: ushll v1.4s, v1.4h, #0
429+
; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s
430+
; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
431+
; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
432+
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
433+
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
434+
; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0]
435+
; CHECK-CVT-NEXT: ushll2 v2.4s, v0.8h, #0
436+
; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
437+
; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s
438+
; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
439+
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
440+
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
441+
; CHECK-CVT-NEXT: mov v0.d[1], v2.d[0]
442+
; CHECK-CVT-NEXT: ret
443+
;
444+
; CHECK-FP16-LABEL: uitofp_v16i8:
445+
; CHECK-FP16-NEXT: ushll2 v1.8h, v0.16b, #0
446+
; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
447+
; CHECK-FP16-NEXT: ucvtf v1.8h, v1.8h
448+
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
449+
; CHECK-FP16-NEXT: ret
428450
;
429451
%1 = uitofp <16 x i8> %a to <16 x half>
430452
ret <16 x half> %1

0 commit comments

Comments
 (0)