Skip to content

Commit 5871f18

Browse files
committed
[AArch64] Lower extending uitofp using tbl.
On AArch64, doing the zero-extend separately first can be lowered more efficiently using tbl, building on D120571. https://alive2.llvm.org/ce/z/8Je595 Depends on D120571 Reviewed By: t.p.northover Differential Revision: https://reviews.llvm.org/D133494
1 parent f096e72 commit 5871f18

File tree

3 files changed

+170
-30
lines changed

3 files changed

+170
-30
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8047,6 +8047,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
80478047
if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
80488048
return true;
80498049

8050+
if (isa<UIToFPInst>(I) && TLI->optimizeExtendOrTruncateConversion(
8051+
I, LI->getLoopFor(I->getParent())))
8052+
return true;
8053+
80508054
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
80518055
/// Sink a zext or sext into its user blocks if the target type doesn't
80528056
/// fit in one register

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13235,6 +13235,21 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
1323513235
createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
1323613236
return true;
1323713237
}
13238+
13239+
auto *UIToFP = dyn_cast<UIToFPInst>(I);
13240+
if (UIToFP &&
13241+
(SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
13242+
SrcTy->getElementType()->isIntegerTy(8) &&
13243+
DstTy->getElementType()->isFloatTy()) {
13244+
IRBuilder<> Builder(I);
13245+
auto *ZExt = cast<ZExtInst>(
13246+
Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
13247+
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
13248+
I->replaceAllUsesWith(UI);
13249+
I->eraseFromParent();
13250+
createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
13251+
return true;
13252+
}
1323813253
return false;
1323913254
}
1324013255

llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll

Lines changed: 151 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -386,28 +386,69 @@ exit:
386386
ret void
387387
}
388388

389+
; CHECK-LABEL: lCPI8_0:
390+
; CHECK-NEXT: .byte 4 ; 0x4
391+
; CHECK-NEXT: .byte 255 ; 0xff
392+
; CHECK-NEXT: .byte 255 ; 0xff
393+
; CHECK-NEXT: .byte 255 ; 0xff
394+
; CHECK-NEXT: .byte 5 ; 0x5
395+
; CHECK-NEXT: .byte 255 ; 0xff
396+
; CHECK-NEXT: .byte 255 ; 0xff
397+
; CHECK-NEXT: .byte 255 ; 0xff
398+
; CHECK-NEXT: .byte 6 ; 0x6
399+
; CHECK-NEXT: .byte 255 ; 0xff
400+
; CHECK-NEXT: .byte 255 ; 0xff
401+
; CHECK-NEXT: .byte 255 ; 0xff
402+
; CHECK-NEXT: .byte 7 ; 0x7
403+
; CHECK-NEXT: .byte 255 ; 0xff
404+
; CHECK-NEXT: .byte 255 ; 0xff
405+
; CHECK-NEXT: .byte 255 ; 0xff
406+
; CHECK-NEXT: lCPI8_1:
407+
; CHECK-NEXT: .byte 0 ; 0x0
408+
; CHECK-NEXT: .byte 255 ; 0xff
409+
; CHECK-NEXT: .byte 255 ; 0xff
410+
; CHECK-NEXT: .byte 255 ; 0xff
411+
; CHECK-NEXT: .byte 1 ; 0x1
412+
; CHECK-NEXT: .byte 255 ; 0xff
413+
; CHECK-NEXT: .byte 255 ; 0xff
414+
; CHECK-NEXT: .byte 255 ; 0xff
415+
; CHECK-NEXT: .byte 2 ; 0x2
416+
; CHECK-NEXT: .byte 255 ; 0xff
417+
; CHECK-NEXT: .byte 255 ; 0xff
418+
; CHECK-NEXT: .byte 255 ; 0xff
419+
; CHECK-NEXT: .byte 3 ; 0x3
420+
; CHECK-NEXT: .byte 255 ; 0xff
421+
; CHECK-NEXT: .byte 255 ; 0xff
422+
; CHECK-NEXT: .byte 255 ; 0xff
423+
389424
define void @uitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
390425
; CHECK-LABEL: uitofp_v8i8_to_v8f32:
391426
; CHECK: ; %bb.0: ; %entry
427+
; CHECK-NEXT: Lloh2:
428+
; CHECK-NEXT: adrp x9, lCPI8_0@PAGE
429+
; CHECK-NEXT: Lloh3:
430+
; CHECK-NEXT: adrp x10, lCPI8_1@PAGE
392431
; CHECK-NEXT: mov x8, xzr
432+
; CHECK-NEXT: Lloh4:
433+
; CHECK-NEXT: ldr q0, [x9, lCPI8_0@PAGEOFF]
434+
; CHECK-NEXT: Lloh5:
435+
; CHECK-NEXT: ldr q1, [x10, lCPI8_1@PAGEOFF]
393436
; CHECK-NEXT: LBB8_1: ; %loop
394437
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
395-
; CHECK-NEXT: ldr d0, [x0, x8, lsl #3]
438+
; CHECK-NEXT: ldr d2, [x0, x8, lsl #3]
396439
; CHECK-NEXT: add x9, x1, x8, lsl #5
397440
; CHECK-NEXT: add x8, x8, #1
398441
; CHECK-NEXT: cmp x8, #1000
399-
; CHECK-NEXT: zip1.8b v1, v0, v0
400-
; CHECK-NEXT: zip2.8b v0, v0, v0
401-
; CHECK-NEXT: bic.4h v1, #255, lsl #8
402-
; CHECK-NEXT: bic.4h v0, #255, lsl #8
403-
; CHECK-NEXT: ushll.4s v0, v0, #0
404-
; CHECK-NEXT: ushll.4s v1, v1, #0
405-
; CHECK-NEXT: ucvtf.4s v0, v0
406-
; CHECK-NEXT: ucvtf.4s v1, v1
407-
; CHECK-NEXT: stp q1, q0, [x9]
442+
; CHECK-NEXT: tbl.16b v3, { v2 }, v0
443+
; CHECK-NEXT: tbl.16b v2, { v2 }, v1
444+
; CHECK-NEXT: ucvtf.4s v3, v3
445+
; CHECK-NEXT: ucvtf.4s v2, v2
446+
; CHECK-NEXT: stp q2, q3, [x9]
408447
; CHECK-NEXT: b.eq LBB8_1
409448
; CHECK-NEXT: ; %bb.2: ; %exit
410449
; CHECK-NEXT: ret
450+
; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh5
451+
; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh4
411452
entry:
412453
br label %loop
413454

@@ -426,38 +467,118 @@ exit:
426467
ret void
427468
}
428469

470+
; CHECK-LABEL: lCPI9_0:
471+
; CHECK-NEXT: .byte 12 ; 0xc
472+
; CHECK-NEXT: .byte 255 ; 0xff
473+
; CHECK-NEXT: .byte 255 ; 0xff
474+
; CHECK-NEXT: .byte 255 ; 0xff
475+
; CHECK-NEXT: .byte 13 ; 0xd
476+
; CHECK-NEXT: .byte 255 ; 0xff
477+
; CHECK-NEXT: .byte 255 ; 0xff
478+
; CHECK-NEXT: .byte 255 ; 0xff
479+
; CHECK-NEXT: .byte 14 ; 0xe
480+
; CHECK-NEXT: .byte 255 ; 0xff
481+
; CHECK-NEXT: .byte 255 ; 0xff
482+
; CHECK-NEXT: .byte 255 ; 0xff
483+
; CHECK-NEXT: .byte 15 ; 0xf
484+
; CHECK-NEXT: .byte 255 ; 0xff
485+
; CHECK-NEXT: .byte 255 ; 0xff
486+
; CHECK-NEXT: .byte 255 ; 0xff
487+
; CHECK-NEXT: lCPI9_1:
488+
; CHECK-NEXT: .byte 8 ; 0x8
489+
; CHECK-NEXT: .byte 255 ; 0xff
490+
; CHECK-NEXT: .byte 255 ; 0xff
491+
; CHECK-NEXT: .byte 255 ; 0xff
492+
; CHECK-NEXT: .byte 9 ; 0x9
493+
; CHECK-NEXT: .byte 255 ; 0xff
494+
; CHECK-NEXT: .byte 255 ; 0xff
495+
; CHECK-NEXT: .byte 255 ; 0xff
496+
; CHECK-NEXT: .byte 10 ; 0xa
497+
; CHECK-NEXT: .byte 255 ; 0xff
498+
; CHECK-NEXT: .byte 255 ; 0xff
499+
; CHECK-NEXT: .byte 255 ; 0xff
500+
; CHECK-NEXT: .byte 11 ; 0xb
501+
; CHECK-NEXT: .byte 255 ; 0xff
502+
; CHECK-NEXT: .byte 255 ; 0xff
503+
; CHECK-NEXT: .byte 255 ; 0xff
504+
; CHECK-NEXT: lCPI9_2:
505+
; CHECK-NEXT: .byte 4 ; 0x4
506+
; CHECK-NEXT: .byte 255 ; 0xff
507+
; CHECK-NEXT: .byte 255 ; 0xff
508+
; CHECK-NEXT: .byte 255 ; 0xff
509+
; CHECK-NEXT: .byte 5 ; 0x5
510+
; CHECK-NEXT: .byte 255 ; 0xff
511+
; CHECK-NEXT: .byte 255 ; 0xff
512+
; CHECK-NEXT: .byte 255 ; 0xff
513+
; CHECK-NEXT: .byte 6 ; 0x6
514+
; CHECK-NEXT: .byte 255 ; 0xff
515+
; CHECK-NEXT: .byte 255 ; 0xff
516+
; CHECK-NEXT: .byte 255 ; 0xff
517+
; CHECK-NEXT: .byte 7 ; 0x7
518+
; CHECK-NEXT: .byte 255 ; 0xff
519+
; CHECK-NEXT: .byte 255 ; 0xff
520+
; CHECK-NEXT: .byte 255 ; 0xff
521+
; CHECK-NEXT: lCPI9_3:
522+
; CHECK-NEXT: .byte 0 ; 0x0
523+
; CHECK-NEXT: .byte 255 ; 0xff
524+
; CHECK-NEXT: .byte 255 ; 0xff
525+
; CHECK-NEXT: .byte 255 ; 0xff
526+
; CHECK-NEXT: .byte 1 ; 0x1
527+
; CHECK-NEXT: .byte 255 ; 0xff
528+
; CHECK-NEXT: .byte 255 ; 0xff
529+
; CHECK-NEXT: .byte 255 ; 0xff
530+
; CHECK-NEXT: .byte 2 ; 0x2
531+
; CHECK-NEXT: .byte 255 ; 0xff
532+
; CHECK-NEXT: .byte 255 ; 0xff
533+
; CHECK-NEXT: .byte 255 ; 0xff
534+
; CHECK-NEXT: .byte 3 ; 0x3
535+
; CHECK-NEXT: .byte 255 ; 0xff
536+
; CHECK-NEXT: .byte 255 ; 0xff
537+
; CHECK-NEXT: .byte 255 ; 0xff
538+
429539
define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
430540
; CHECK-LABEL: uitofp_v16i8_to_v16f32:
431541
; CHECK: ; %bb.0: ; %entry
542+
; CHECK-NEXT: Lloh6:
543+
; CHECK-NEXT: adrp x9, lCPI9_0@PAGE
544+
; CHECK-NEXT: Lloh7:
545+
; CHECK-NEXT: adrp x10, lCPI9_1@PAGE
546+
; CHECK-NEXT: Lloh8:
547+
; CHECK-NEXT: adrp x11, lCPI9_2@PAGE
548+
; CHECK-NEXT: Lloh9:
549+
; CHECK-NEXT: adrp x12, lCPI9_3@PAGE
432550
; CHECK-NEXT: mov x8, xzr
551+
; CHECK-NEXT: Lloh10:
552+
; CHECK-NEXT: ldr q0, [x9, lCPI9_0@PAGEOFF]
553+
; CHECK-NEXT: Lloh11:
554+
; CHECK-NEXT: ldr q1, [x10, lCPI9_1@PAGEOFF]
555+
; CHECK-NEXT: Lloh12:
556+
; CHECK-NEXT: ldr q2, [x11, lCPI9_2@PAGEOFF]
557+
; CHECK-NEXT: Lloh13:
558+
; CHECK-NEXT: ldr q3, [x12, lCPI9_3@PAGEOFF]
433559
; CHECK-NEXT: LBB9_1: ; %loop
434560
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
435-
; CHECK-NEXT: ldr q0, [x0, x8, lsl #4]
561+
; CHECK-NEXT: ldr q4, [x0, x8, lsl #4]
436562
; CHECK-NEXT: add x9, x1, x8, lsl #6
437563
; CHECK-NEXT: add x8, x8, #1
438564
; CHECK-NEXT: cmp x8, #1000
439-
; CHECK-NEXT: ext.16b v1, v0, v0, #8
440-
; CHECK-NEXT: zip1.8b v2, v0, v0
441-
; CHECK-NEXT: zip2.8b v0, v0, v0
442-
; CHECK-NEXT: bic.4h v2, #255, lsl #8
443-
; CHECK-NEXT: zip1.8b v3, v1, v0
444-
; CHECK-NEXT: zip2.8b v1, v1, v0
445-
; CHECK-NEXT: bic.4h v0, #255, lsl #8
446-
; CHECK-NEXT: ushll.4s v2, v2, #0
447-
; CHECK-NEXT: ushll.4s v0, v0, #0
448-
; CHECK-NEXT: bic.4h v3, #255, lsl #8
449-
; CHECK-NEXT: bic.4h v1, #255, lsl #8
450-
; CHECK-NEXT: ucvtf.4s v2, v2
451-
; CHECK-NEXT: ushll.4s v1, v1, #0
452-
; CHECK-NEXT: ucvtf.4s v0, v0
453-
; CHECK-NEXT: ushll.4s v3, v3, #0
454-
; CHECK-NEXT: ucvtf.4s v1, v1
455-
; CHECK-NEXT: ucvtf.4s v3, v3
456-
; CHECK-NEXT: stp q2, q0, [x9]
457-
; CHECK-NEXT: stp q3, q1, [x9, #32]
565+
; CHECK-NEXT: tbl.16b v5, { v4 }, v0
566+
; CHECK-NEXT: tbl.16b v6, { v4 }, v1
567+
; CHECK-NEXT: tbl.16b v7, { v4 }, v2
568+
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
569+
; CHECK-NEXT: ucvtf.4s v5, v5
570+
; CHECK-NEXT: ucvtf.4s v6, v6
571+
; CHECK-NEXT: ucvtf.4s v7, v7
572+
; CHECK-NEXT: ucvtf.4s v4, v4
573+
; CHECK-NEXT: stp q6, q5, [x9, #32]
574+
; CHECK-NEXT: stp q4, q7, [x9]
458575
; CHECK-NEXT: b.eq LBB9_1
459576
; CHECK-NEXT: ; %bb.2: ; %exit
460577
; CHECK-NEXT: ret
578+
; CHECK-NEXT: .loh AdrpLdr Lloh9, Lloh13
579+
; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh12
580+
; CHECK-NEXT: .loh AdrpLdr Lloh7, Lloh11
581+
; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh10
461582
entry:
462583
br label %loop
463584

0 commit comments

Comments
 (0)