Skip to content

Commit 96819da

Browse files
authored
[AArch64] Handle v2i16 and v2i8 in concat load combine. (#86264)
This extends the concat load patch from https://reviews.llvm.org/D121400, which was later moved to a combine, to handle v2i8 and v2i16 concat loads too.
1 parent d99cfa0 commit 96819da

File tree

2 files changed

+28
-78
lines changed

2 files changed

+28
-78
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18629,32 +18629,35 @@ static SDValue performConcatVectorsCombine(SDNode *N,
1862918629
}
1863018630
}
1863118631

18632-
if (N->getOperand(0).getValueType() == MVT::v4i8) {
18632+
if (N->getOperand(0).getValueType() == MVT::v4i8 ||
18633+
N->getOperand(0).getValueType() == MVT::v2i16 ||
18634+
N->getOperand(0).getValueType() == MVT::v2i8) {
18635+
EVT SrcVT = N->getOperand(0).getValueType();
1863318636
// If we have a concat of v4i8 loads, convert them to a buildvector of f32
1863418637
// loads to prevent having to go through the v4i8 load legalization that
1863518638
// needs to extend each element into a larger type.
18636-
if (N->getNumOperands() % 2 == 0 && all_of(N->op_values(), [](SDValue V) {
18637-
if (V.getValueType() != MVT::v4i8)
18639+
if (N->getNumOperands() % 2 == 0 &&
18640+
all_of(N->op_values(), [SrcVT](SDValue V) {
18641+
if (V.getValueType() != SrcVT)
1863818642
return false;
1863918643
if (V.isUndef())
1864018644
return true;
1864118645
LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
1864218646
return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
1864318647
LD->getExtensionType() == ISD::NON_EXTLOAD;
1864418648
})) {
18645-
EVT NVT =
18646-
EVT::getVectorVT(*DAG.getContext(), MVT::f32, N->getNumOperands());
18649+
EVT FVT = SrcVT == MVT::v2i8 ? MVT::f16 : MVT::f32;
18650+
EVT NVT = EVT::getVectorVT(*DAG.getContext(), FVT, N->getNumOperands());
1864718651
SmallVector<SDValue> Ops;
1864818652

1864918653
for (unsigned i = 0; i < N->getNumOperands(); i++) {
1865018654
SDValue V = N->getOperand(i);
1865118655
if (V.isUndef())
18652-
Ops.push_back(DAG.getUNDEF(MVT::f32));
18656+
Ops.push_back(DAG.getUNDEF(FVT));
1865318657
else {
1865418658
LoadSDNode *LD = cast<LoadSDNode>(V);
18655-
SDValue NewLoad =
18656-
DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
18657-
LD->getMemOperand());
18659+
SDValue NewLoad = DAG.getLoad(FVT, dl, LD->getChain(),
18660+
LD->getBasePtr(), LD->getMemOperand());
1865818661
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
1865918662
Ops.push_back(NewLoad);
1866018663
}

llvm/test/CodeGen/AArch64/insert-subvector.ll

Lines changed: 16 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -377,12 +377,8 @@ define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, ptr %a) {
377377
define <8 x i8> @load_v8i8_2_1(float %tmp, <8 x i8> %b, ptr %a) {
378378
; CHECK-LABEL: load_v8i8_2_1:
379379
; CHECK: // %bb.0:
380-
; CHECK-NEXT: ld1 { v2.b }[0], [x0]
381-
; CHECK-NEXT: add x8, x0, #1
382-
; CHECK-NEXT: mov v0.16b, v2.16b
383-
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
384-
; CHECK-NEXT: mov v2.b[1], v0.b[4]
385380
; CHECK-NEXT: fmov d0, d1
381+
; CHECK-NEXT: ldr h2, [x0]
386382
; CHECK-NEXT: mov v0.h[0], v2.h[0]
387383
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
388384
; CHECK-NEXT: ret
@@ -395,12 +391,9 @@ define <8 x i8> @load_v8i8_2_1(float %tmp, <8 x i8> %b, ptr %a) {
395391
define <8 x i8> @load_v8i8_2_15(float %tmp, <8 x i8> %b, ptr %a) {
396392
; CHECK-LABEL: load_v8i8_2_15:
397393
; CHECK: // %bb.0:
398-
; CHECK-NEXT: ld1 { v0.b }[0], [x0]
399-
; CHECK-NEXT: add x8, x0, #1
394+
; CHECK-NEXT: ldr h0, [x0]
400395
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
401-
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
402396
; CHECK-NEXT: adrp x8, .LCPI33_0
403-
; CHECK-NEXT: mov v0.b[1], v0.b[4]
404397
; CHECK-NEXT: mov v0.d[1], v1.d[0]
405398
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
406399
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
@@ -414,12 +407,8 @@ define <8 x i8> @load_v8i8_2_15(float %tmp, <8 x i8> %b, ptr %a) {
414407
define <8 x i8> @load_v8i8_2_2(float %tmp, <8 x i8> %b, ptr %a) {
415408
; CHECK-LABEL: load_v8i8_2_2:
416409
; CHECK: // %bb.0:
417-
; CHECK-NEXT: ld1 { v2.b }[0], [x0]
418-
; CHECK-NEXT: add x8, x0, #1
419-
; CHECK-NEXT: mov v0.16b, v2.16b
420-
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
421-
; CHECK-NEXT: mov v2.b[1], v0.b[4]
422410
; CHECK-NEXT: fmov d0, d1
411+
; CHECK-NEXT: ldr h2, [x0]
423412
; CHECK-NEXT: mov v0.h[1], v2.h[0]
424413
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
425414
; CHECK-NEXT: ret
@@ -432,12 +421,8 @@ define <8 x i8> @load_v8i8_2_2(float %tmp, <8 x i8> %b, ptr %a) {
432421
define <8 x i8> @load_v8i8_2_3(float %tmp, <8 x i8> %b, ptr %a) {
433422
; CHECK-LABEL: load_v8i8_2_3:
434423
; CHECK: // %bb.0:
435-
; CHECK-NEXT: ld1 { v2.b }[0], [x0]
436-
; CHECK-NEXT: add x8, x0, #1
437-
; CHECK-NEXT: mov v0.16b, v2.16b
438-
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
439-
; CHECK-NEXT: mov v2.b[1], v0.b[4]
440424
; CHECK-NEXT: fmov d0, d1
425+
; CHECK-NEXT: ldr h2, [x0]
441426
; CHECK-NEXT: mov v0.h[2], v2.h[0]
442427
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
443428
; CHECK-NEXT: ret
@@ -450,12 +435,8 @@ define <8 x i8> @load_v8i8_2_3(float %tmp, <8 x i8> %b, ptr %a) {
450435
define <8 x i8> @load_v8i8_2_4(float %tmp, <8 x i8> %b, ptr %a) {
451436
; CHECK-LABEL: load_v8i8_2_4:
452437
; CHECK: // %bb.0:
453-
; CHECK-NEXT: ld1 { v2.b }[0], [x0]
454-
; CHECK-NEXT: add x8, x0, #1
455-
; CHECK-NEXT: mov v0.16b, v2.16b
456-
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
457-
; CHECK-NEXT: mov v2.b[1], v0.b[4]
458438
; CHECK-NEXT: fmov d0, d1
439+
; CHECK-NEXT: ldr h2, [x0]
459440
; CHECK-NEXT: mov v0.h[3], v2.h[0]
460441
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
461442
; CHECK-NEXT: ret
@@ -468,11 +449,9 @@ define <8 x i8> @load_v8i8_2_4(float %tmp, <8 x i8> %b, ptr %a) {
468449
define <4 x i8> @load_v4i8_2_1(float %tmp, <4 x i8> %b, ptr %a) {
469450
; CHECK-LABEL: load_v4i8_2_1:
470451
; CHECK: // %bb.0:
471-
; CHECK-NEXT: ld1 { v0.b }[0], [x0]
472-
; CHECK-NEXT: add x8, x0, #1
452+
; CHECK-NEXT: ldr h0, [x0]
473453
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
474-
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
475-
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
454+
; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b
476455
; CHECK-NEXT: mov v0.s[1], v1.s[1]
477456
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
478457
; CHECK-NEXT: ret
@@ -485,10 +464,8 @@ define <4 x i8> @load_v4i8_2_1(float %tmp, <4 x i8> %b, ptr %a) {
485464
define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
486465
; CHECK-LABEL: load_v4i8_2_2:
487466
; CHECK: // %bb.0:
488-
; CHECK-NEXT: ld1 { v0.b }[0], [x0]
489-
; CHECK-NEXT: add x8, x0, #1
490-
; CHECK-NEXT: ld1 { v0.b }[4], [x8]
491-
; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h
467+
; CHECK-NEXT: ldr h0, [x0]
468+
; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b
492469
; CHECK-NEXT: fmov d0, d1
493470
; CHECK-NEXT: mov v0.s[1], v2.s[0]
494471
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -504,13 +481,8 @@ define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
504481
define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) {
505482
; CHECK-LABEL: load_v8i16_2_1:
506483
; CHECK: // %bb.0:
507-
; CHECK-NEXT: ldrh w8, [x0]
508-
; CHECK-NEXT: add x9, x0, #2
509-
; CHECK-NEXT: fmov s0, w8
510-
; CHECK-NEXT: ld1 { v0.h }[2], [x9]
511-
; CHECK-NEXT: xtn v2.4h, v0.4s
512484
; CHECK-NEXT: mov v0.16b, v1.16b
513-
; CHECK-NEXT: mov v0.s[0], v2.s[0]
485+
; CHECK-NEXT: ld1 { v0.s }[0], [x0]
514486
; CHECK-NEXT: ret
515487
%l = load <2 x i16>, ptr %a
516488
%s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -521,13 +493,9 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) {
521493
define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
522494
; CHECK-LABEL: load_v8i16_2_15:
523495
; CHECK: // %bb.0:
524-
; CHECK-NEXT: ldrh w8, [x0]
525-
; CHECK-NEXT: add x9, x0, #2
526496
; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1
527-
; CHECK-NEXT: fmov s2, w8
528497
; CHECK-NEXT: adrp x8, .LCPI40_0
529-
; CHECK-NEXT: ld1 { v2.h }[2], [x9]
530-
; CHECK-NEXT: xtn v0.4h, v2.4s
498+
; CHECK-NEXT: ldr s0, [x0]
531499
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0]
532500
; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
533501
; CHECK-NEXT: ret
@@ -540,13 +508,8 @@ define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
540508
define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, ptr %a) {
541509
; CHECK-LABEL: load_v8i16_2_2:
542510
; CHECK: // %bb.0:
543-
; CHECK-NEXT: ldrh w8, [x0]
544-
; CHECK-NEXT: add x9, x0, #2
545-
; CHECK-NEXT: fmov s0, w8
546-
; CHECK-NEXT: ld1 { v0.h }[2], [x9]
547-
; CHECK-NEXT: xtn v2.4h, v0.4s
548511
; CHECK-NEXT: mov v0.16b, v1.16b
549-
; CHECK-NEXT: mov v0.s[1], v2.s[0]
512+
; CHECK-NEXT: ld1 { v0.s }[1], [x0]
550513
; CHECK-NEXT: ret
551514
%l = load <2 x i16>, ptr %a
552515
%s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -557,13 +520,8 @@ define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, ptr %a) {
557520
define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, ptr %a) {
558521
; CHECK-LABEL: load_v8i16_2_3:
559522
; CHECK: // %bb.0:
560-
; CHECK-NEXT: ldrh w8, [x0]
561-
; CHECK-NEXT: add x9, x0, #2
562-
; CHECK-NEXT: fmov s0, w8
563-
; CHECK-NEXT: ld1 { v0.h }[2], [x9]
564-
; CHECK-NEXT: xtn v2.4h, v0.4s
565523
; CHECK-NEXT: mov v0.16b, v1.16b
566-
; CHECK-NEXT: mov v0.s[2], v2.s[0]
524+
; CHECK-NEXT: ld1 { v0.s }[2], [x0]
567525
; CHECK-NEXT: ret
568526
%l = load <2 x i16>, ptr %a
569527
%s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -574,13 +532,8 @@ define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, ptr %a) {
574532
define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, ptr %a) {
575533
; CHECK-LABEL: load_v8i16_2_4:
576534
; CHECK: // %bb.0:
577-
; CHECK-NEXT: ldrh w8, [x0]
578-
; CHECK-NEXT: add x9, x0, #2
579-
; CHECK-NEXT: fmov s0, w8
580-
; CHECK-NEXT: ld1 { v0.h }[2], [x9]
581-
; CHECK-NEXT: xtn v2.4h, v0.4s
582535
; CHECK-NEXT: mov v0.16b, v1.16b
583-
; CHECK-NEXT: mov v0.s[3], v2.s[0]
536+
; CHECK-NEXT: ld1 { v0.s }[3], [x0]
584537
; CHECK-NEXT: ret
585538
%l = load <2 x i16>, ptr %a
586539
%s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -591,11 +544,8 @@ define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, ptr %a) {
591544
define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) {
592545
; CHECK-LABEL: load_v4i16_2_1:
593546
; CHECK: // %bb.0:
594-
; CHECK-NEXT: ld1 { v0.h }[0], [x0]
595-
; CHECK-NEXT: add x8, x0, #2
547+
; CHECK-NEXT: ldr s0, [x0]
596548
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
597-
; CHECK-NEXT: ld1 { v0.h }[2], [x8]
598-
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
599549
; CHECK-NEXT: mov v0.s[1], v1.s[1]
600550
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
601551
; CHECK-NEXT: ret
@@ -608,11 +558,8 @@ define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) {
608558
define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, ptr %a) {
609559
; CHECK-LABEL: load_v4i16_2_2:
610560
; CHECK: // %bb.0:
611-
; CHECK-NEXT: ld1 { v0.h }[0], [x0]
612-
; CHECK-NEXT: add x8, x0, #2
613-
; CHECK-NEXT: ld1 { v0.h }[2], [x8]
614-
; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h
615561
; CHECK-NEXT: fmov d0, d1
562+
; CHECK-NEXT: ldr s2, [x0]
616563
; CHECK-NEXT: mov v0.s[1], v2.s[0]
617564
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
618565
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)