Skip to content

Commit 56a95a2

Browse files
committed
[AArch64] Lower extending uitofp using tbl.
On AArch64, doing the zero-extend separately first can be lowered more efficiently using tbl, building on D120571. https://alive2.llvm.org/ce/z/8Je595 Depends on D120571 Reviewed By: t.p.northover Differential Revision: https://reviews.llvm.org/D133494 (cherry-picked from 5871f18)
1 parent 662fa8e commit 56a95a2

File tree

3 files changed

+170
-30
lines changed

3 files changed

+170
-30
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7800,6 +7800,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
78007800
if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
78017801
return true;
78027802

7803+
if (isa<UIToFPInst>(I) && TLI->optimizeExtendOrTruncateConversion(
7804+
I, LI->getLoopFor(I->getParent())))
7805+
return true;
7806+
78037807
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
78047808
/// Sink a zext or sext into its user blocks if the target type doesn't
78057809
/// fit in one register

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12907,6 +12907,21 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
1290712907
createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
1290812908
return true;
1290912909
}
12910+
12911+
auto *UIToFP = dyn_cast<UIToFPInst>(I);
12912+
if (UIToFP &&
12913+
(SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) &&
12914+
SrcTy->getElementType()->isIntegerTy(8) &&
12915+
DstTy->getElementType()->isFloatTy()) {
12916+
IRBuilder<> Builder(I);
12917+
auto *ZExt = cast<ZExtInst>(
12918+
Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
12919+
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
12920+
I->replaceAllUsesWith(UI);
12921+
I->eraseFromParent();
12922+
createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
12923+
return true;
12924+
}
1291012925
return false;
1291112926
}
1291212927

llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll

Lines changed: 151 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -386,28 +386,69 @@ exit:
386386
ret void
387387
}
388388

389+
; CHECK-LABEL: lCPI8_0:
390+
; CHECK-NEXT: .byte 4 ; 0x4
391+
; CHECK-NEXT: .byte 16 ; 0x10
392+
; CHECK-NEXT: .byte 16 ; 0x10
393+
; CHECK-NEXT: .byte 16 ; 0x10
394+
; CHECK-NEXT: .byte 5 ; 0x5
395+
; CHECK-NEXT: .byte 16 ; 0x10
396+
; CHECK-NEXT: .byte 16 ; 0x10
397+
; CHECK-NEXT: .byte 16 ; 0x10
398+
; CHECK-NEXT: .byte 6 ; 0x6
399+
; CHECK-NEXT: .byte 16 ; 0x10
400+
; CHECK-NEXT: .byte 16 ; 0x10
401+
; CHECK-NEXT: .byte 16 ; 0x10
402+
; CHECK-NEXT: .byte 7 ; 0x7
403+
; CHECK-NEXT: .byte 16 ; 0x10
404+
; CHECK-NEXT: .byte 16 ; 0x10
405+
; CHECK-NEXT: .byte 16 ; 0x10
406+
; CHECK-NEXT:lCPI8_1:
407+
; CHECK-NEXT: .byte 0 ; 0x0
408+
; CHECK-NEXT: .byte 16 ; 0x10
409+
; CHECK-NEXT: .byte 16 ; 0x10
410+
; CHECK-NEXT: .byte 16 ; 0x10
411+
; CHECK-NEXT: .byte 1 ; 0x1
412+
; CHECK-NEXT: .byte 16 ; 0x10
413+
; CHECK-NEXT: .byte 16 ; 0x10
414+
; CHECK-NEXT: .byte 16 ; 0x10
415+
; CHECK-NEXT: .byte 2 ; 0x2
416+
; CHECK-NEXT: .byte 16 ; 0x10
417+
; CHECK-NEXT: .byte 16 ; 0x10
418+
; CHECK-NEXT: .byte 16 ; 0x10
419+
; CHECK-NEXT: .byte 3 ; 0x3
420+
; CHECK-NEXT: .byte 16 ; 0x10
421+
; CHECK-NEXT: .byte 16 ; 0x10
422+
; CHECK-NEXT: .byte 16 ; 0x10
423+
389424
define void @uitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
390425
; CHECK-LABEL: uitofp_v8i8_to_v8f32:
391426
; CHECK: ; %bb.0: ; %entry
427+
; CHECK-NEXT: Lloh2:
428+
; CHECK-NEXT: adrp x9, lCPI8_0@PAGE
429+
; CHECK-NEXT: Lloh3:
430+
; CHECK-NEXT: adrp x10, lCPI8_1@PAGE
392431
; CHECK-NEXT: mov x8, xzr
432+
; CHECK-NEXT: Lloh4:
433+
; CHECK-NEXT: ldr q0, [x9, lCPI8_0@PAGEOFF]
434+
; CHECK-NEXT: Lloh5:
435+
; CHECK-NEXT: ldr q1, [x10, lCPI8_1@PAGEOFF]
393436
; CHECK-NEXT: LBB8_1: ; %loop
394437
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
395-
; CHECK-NEXT: ldr d0, [x0, x8, lsl #3]
438+
; CHECK-NEXT: ldr d2, [x0, x8, lsl #3]
396439
; CHECK-NEXT: add x9, x1, x8, lsl #5
397440
; CHECK-NEXT: add x8, x8, #1
398441
; CHECK-NEXT: cmp x8, #1000
399-
; CHECK-NEXT: zip1.8b v1, v0, v0
400-
; CHECK-NEXT: zip2.8b v0, v0, v0
401-
; CHECK-NEXT: bic.4h v1, #255, lsl #8
402-
; CHECK-NEXT: bic.4h v0, #255, lsl #8
403-
; CHECK-NEXT: ushll.4s v0, v0, #0
404-
; CHECK-NEXT: ushll.4s v1, v1, #0
405-
; CHECK-NEXT: ucvtf.4s v0, v0
406-
; CHECK-NEXT: ucvtf.4s v1, v1
407-
; CHECK-NEXT: stp q1, q0, [x9]
442+
; CHECK-NEXT: tbl.16b v3, { v2 }, v0
443+
; CHECK-NEXT: tbl.16b v2, { v2 }, v1
444+
; CHECK-NEXT: ucvtf.4s v3, v3
445+
; CHECK-NEXT: ucvtf.4s v2, v2
446+
; CHECK-NEXT: stp q2, q3, [x9]
408447
; CHECK-NEXT: b.eq LBB8_1
409448
; CHECK-NEXT: ; %bb.2: ; %exit
410449
; CHECK-NEXT: ret
450+
; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh5
451+
; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh4
411452
entry:
412453
br label %loop
413454

@@ -426,38 +467,118 @@ exit:
426467
ret void
427468
}
428469

470+
; CHECK-LABEL: lCPI9_0:
471+
; CHECK-NEXT: .byte 12 ; 0xc
472+
; CHECK-NEXT: .byte 16 ; 0x10
473+
; CHECK-NEXT: .byte 16 ; 0x10
474+
; CHECK-NEXT: .byte 16 ; 0x10
475+
; CHECK-NEXT: .byte 13 ; 0xd
476+
; CHECK-NEXT: .byte 16 ; 0x10
477+
; CHECK-NEXT: .byte 16 ; 0x10
478+
; CHECK-NEXT: .byte 16 ; 0x10
479+
; CHECK-NEXT: .byte 14 ; 0xe
480+
; CHECK-NEXT: .byte 16 ; 0x10
481+
; CHECK-NEXT: .byte 16 ; 0x10
482+
; CHECK-NEXT: .byte 16 ; 0x10
483+
; CHECK-NEXT: .byte 15 ; 0xf
484+
; CHECK-NEXT: .byte 16 ; 0x10
485+
; CHECK-NEXT: .byte 16 ; 0x10
486+
; CHECK-NEXT: .byte 16 ; 0x10
487+
; CHECK-NEXT: lCPI9_1:
488+
; CHECK-NEXT: .byte 8 ; 0x8
489+
; CHECK-NEXT: .byte 16 ; 0x10
490+
; CHECK-NEXT: .byte 16 ; 0x10
491+
; CHECK-NEXT: .byte 16 ; 0x10
492+
; CHECK-NEXT: .byte 9 ; 0x9
493+
; CHECK-NEXT: .byte 16 ; 0x10
494+
; CHECK-NEXT: .byte 16 ; 0x10
495+
; CHECK-NEXT: .byte 16 ; 0x10
496+
; CHECK-NEXT: .byte 10 ; 0xa
497+
; CHECK-NEXT: .byte 16 ; 0x10
498+
; CHECK-NEXT: .byte 16 ; 0x10
499+
; CHECK-NEXT: .byte 16 ; 0x10
500+
; CHECK-NEXT: .byte 11 ; 0xb
501+
; CHECK-NEXT: .byte 16 ; 0x10
502+
; CHECK-NEXT: .byte 16 ; 0x10
503+
; CHECK-NEXT: .byte 16 ; 0x10
504+
; CHECK-NEXT: lCPI9_2:
505+
; CHECK-NEXT: .byte 4 ; 0x4
506+
; CHECK-NEXT: .byte 16 ; 0x10
507+
; CHECK-NEXT: .byte 16 ; 0x10
508+
; CHECK-NEXT: .byte 16 ; 0x10
509+
; CHECK-NEXT: .byte 5 ; 0x5
510+
; CHECK-NEXT: .byte 16 ; 0x10
511+
; CHECK-NEXT: .byte 16 ; 0x10
512+
; CHECK-NEXT: .byte 16 ; 0x10
513+
; CHECK-NEXT: .byte 6 ; 0x6
514+
; CHECK-NEXT: .byte 16 ; 0x10
515+
; CHECK-NEXT: .byte 16 ; 0x10
516+
; CHECK-NEXT: .byte 16 ; 0x10
517+
; CHECK-NEXT: .byte 7 ; 0x7
518+
; CHECK-NEXT: .byte 16 ; 0x10
519+
; CHECK-NEXT: .byte 16 ; 0x10
520+
; CHECK-NEXT: .byte 16 ; 0x10
521+
; CHECK-NEXT: lCPI9_3:
522+
; CHECK-NEXT: .byte 0 ; 0x0
523+
; CHECK-NEXT: .byte 16 ; 0x10
524+
; CHECK-NEXT: .byte 16 ; 0x10
525+
; CHECK-NEXT: .byte 16 ; 0x10
526+
; CHECK-NEXT: .byte 1 ; 0x1
527+
; CHECK-NEXT: .byte 16 ; 0x10
528+
; CHECK-NEXT: .byte 16 ; 0x10
529+
; CHECK-NEXT: .byte 16 ; 0x10
530+
; CHECK-NEXT: .byte 2 ; 0x2
531+
; CHECK-NEXT: .byte 16 ; 0x10
532+
; CHECK-NEXT: .byte 16 ; 0x10
533+
; CHECK-NEXT: .byte 16 ; 0x10
534+
; CHECK-NEXT: .byte 3 ; 0x3
535+
; CHECK-NEXT: .byte 16 ; 0x10
536+
; CHECK-NEXT: .byte 16 ; 0x10
537+
; CHECK-NEXT: .byte 16 ; 0x10
538+
429539
define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
430540
; CHECK-LABEL: uitofp_v16i8_to_v16f32:
431541
; CHECK: ; %bb.0: ; %entry
542+
; CHECK-NEXT: Lloh6:
543+
; CHECK-NEXT: adrp x9, lCPI9_0@PAGE
544+
; CHECK-NEXT: Lloh7:
545+
; CHECK-NEXT: adrp x10, lCPI9_1@PAGE
546+
; CHECK-NEXT: Lloh8:
547+
; CHECK-NEXT: adrp x11, lCPI9_2@PAGE
548+
; CHECK-NEXT: Lloh9:
549+
; CHECK-NEXT: adrp x12, lCPI9_3@PAGE
432550
; CHECK-NEXT: mov x8, xzr
551+
; CHECK-NEXT: Lloh10:
552+
; CHECK-NEXT: ldr q0, [x9, lCPI9_0@PAGEOFF]
553+
; CHECK-NEXT: Lloh11:
554+
; CHECK-NEXT: ldr q1, [x10, lCPI9_1@PAGEOFF]
555+
; CHECK-NEXT: Lloh12:
556+
; CHECK-NEXT: ldr q2, [x11, lCPI9_2@PAGEOFF]
557+
; CHECK-NEXT: Lloh13:
558+
; CHECK-NEXT: ldr q3, [x12, lCPI9_3@PAGEOFF]
433559
; CHECK-NEXT: LBB9_1: ; %loop
434560
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
435-
; CHECK-NEXT: ldr q0, [x0, x8, lsl #4]
561+
; CHECK-NEXT: ldr q4, [x0, x8, lsl #4]
436562
; CHECK-NEXT: add x9, x1, x8, lsl #6
437563
; CHECK-NEXT: add x8, x8, #1
438564
; CHECK-NEXT: cmp x8, #1000
439-
; CHECK-NEXT: ext.16b v1, v0, v0, #8
440-
; CHECK-NEXT: zip1.8b v2, v0, v0
441-
; CHECK-NEXT: zip2.8b v0, v0, v0
442-
; CHECK-NEXT: bic.4h v2, #255, lsl #8
443-
; CHECK-NEXT: zip1.8b v3, v1, v0
444-
; CHECK-NEXT: zip2.8b v1, v1, v0
445-
; CHECK-NEXT: bic.4h v0, #255, lsl #8
446-
; CHECK-NEXT: ushll.4s v2, v2, #0
447-
; CHECK-NEXT: ushll.4s v0, v0, #0
448-
; CHECK-NEXT: bic.4h v3, #255, lsl #8
449-
; CHECK-NEXT: bic.4h v1, #255, lsl #8
450-
; CHECK-NEXT: ucvtf.4s v2, v2
451-
; CHECK-NEXT: ushll.4s v1, v1, #0
452-
; CHECK-NEXT: ucvtf.4s v0, v0
453-
; CHECK-NEXT: ushll.4s v3, v3, #0
454-
; CHECK-NEXT: ucvtf.4s v1, v1
455-
; CHECK-NEXT: ucvtf.4s v3, v3
456-
; CHECK-NEXT: stp q2, q0, [x9]
457-
; CHECK-NEXT: stp q3, q1, [x9, #32]
565+
; CHECK-NEXT: tbl.16b v5, { v4 }, v0
566+
; CHECK-NEXT: tbl.16b v6, { v4 }, v1
567+
; CHECK-NEXT: tbl.16b v7, { v4 }, v2
568+
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
569+
; CHECK-NEXT: ucvtf.4s v5, v5
570+
; CHECK-NEXT: ucvtf.4s v6, v6
571+
; CHECK-NEXT: ucvtf.4s v7, v7
572+
; CHECK-NEXT: ucvtf.4s v4, v4
573+
; CHECK-NEXT: stp q6, q5, [x9, #32]
574+
; CHECK-NEXT: stp q4, q7, [x9]
458575
; CHECK-NEXT: b.eq LBB9_1
459576
; CHECK-NEXT: ; %bb.2: ; %exit
460577
; CHECK-NEXT: ret
578+
; CHECK-NEXT: .loh AdrpLdr Lloh9, Lloh13
579+
; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh12
580+
; CHECK-NEXT: .loh AdrpLdr Lloh7, Lloh11
581+
; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh10
461582
entry:
462583
br label %loop
463584

0 commit comments

Comments
 (0)