Skip to content

Commit 5ebd1c5

Browse files
committed
[AArch64] Custom lower v4i8 subreg extract.
A v4i8 extract will usually be scalarized. This prevents that during lowering, converting it to an anyext and larger v4i16 subvector extract. There are a few minor regressions that are fixed up in a followup.
1 parent f61ef9c commit 5ebd1c5

15 files changed

+477
-756
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1426,6 +1426,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
14261426
setOperationAction(ISD::BITCAST, MVT::v2i16, Custom);
14271427
setOperationAction(ISD::BITCAST, MVT::v4i8, Custom);
14281428

1429+
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8, Custom);
1430+
14291431
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
14301432
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
14311433
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
@@ -27309,12 +27311,22 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
2730927311
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2731027312
SDValue In = N->getOperand(0);
2731127313
EVT InVT = In.getValueType();
27314+
SDLoc DL(N);
27315+
27316+
if (N->getValueType(0) == MVT::v4i8 &&
27317+
N->getOperand(0).getValueType() == MVT::v8i8 &&
27318+
(N->getConstantOperandVal(1) == 0 || N->getConstantOperandVal(1) == 4)) {
27319+
SDValue Ext =
27320+
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::v8i16, N->getOperand(0));
27321+
Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
27322+
N->getOperand(1));
27323+
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i8, Ext));
27324+
}
2731227325

2731327326
// Common code will handle these just fine.
2731427327
if (!InVT.isScalableVector() || !InVT.isInteger())
2731527328
return;
2731627329

27317-
SDLoc DL(N);
2731827330
EVT VT = N->getValueType(0);
2731927331

2732027332
// The following checks bail if this is not a halving operation.

llvm/test/CodeGen/AArch64/aarch64-load-ext.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -273,18 +273,15 @@ define <3 x i16> @fsext_v3i16(ptr %a) {
273273
; CHECK-LE-LABEL: fsext_v3i16:
274274
; CHECK-LE: // %bb.0:
275275
; CHECK-LE-NEXT: ldr s0, [x0]
276-
; CHECK-LE-NEXT: zip1 v0.8b, v0.8b, v0.8b
277-
; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8
278-
; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8
276+
; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0
277+
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
279278
; CHECK-LE-NEXT: ret
280279
;
281280
; CHECK-BE-LABEL: fsext_v3i16:
282281
; CHECK-BE: // %bb.0:
283282
; CHECK-BE-NEXT: ldr s0, [x0]
284283
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
285-
; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
286-
; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
287-
; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
284+
; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0
288285
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
289286
; CHECK-BE-NEXT: ret
290287
%x = load <3 x i8>, ptr %a

llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
281281
; CHECK-NEXT: movi.2d v2, #0000000000000000
282282
; CHECK-NEXT: uaddlv.8b h1, v0
283283
; CHECK-NEXT: stp q0, q0, [x0, #32]
284-
; CHECK-NEXT: mov.h v2[0], v1[0]
284+
; CHECK-NEXT: mov.b v2[0], v1[0]
285+
; CHECK-NEXT: zip1.8b v2, v2, v2
285286
; CHECK-NEXT: bic.4h v2, #255, lsl #8
286287
; CHECK-NEXT: ushll.4s v2, v2, #0
287288
; CHECK-NEXT: ucvtf.4s v2, v2
@@ -303,8 +304,9 @@ define void @insert_vec_v8i8_uaddlv_from_v8i8(ptr %0) {
303304
; CHECK-NEXT: movi.2d v0, #0000000000000000
304305
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
305306
; CHECK-NEXT: uaddlv.8b h1, v0
306-
; CHECK-NEXT: mov.h v0[0], v1[0]
307-
; CHECK-NEXT: bic.4h v0, #7, lsl #8
307+
; CHECK-NEXT: mov.b v0[0], v1[0]
308+
; CHECK-NEXT: zip1.8b v0, v0, v0
309+
; CHECK-NEXT: bic.4h v0, #255, lsl #8
308310
; CHECK-NEXT: ushll.4s v0, v0, #0
309311
; CHECK-NEXT: ucvtf.4s v0, v0
310312
; CHECK-NEXT: str q0, [x0]
@@ -433,7 +435,8 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) {
433435
; CHECK-NEXT: movi.2d v1, #0000000000000000
434436
; CHECK-NEXT: stp xzr, xzr, [x0, #16]
435437
; CHECK-NEXT: uaddlv.4s d0, v0
436-
; CHECK-NEXT: mov.h v1[0], v0[0]
438+
; CHECK-NEXT: mov.b v1[0], v0[0]
439+
; CHECK-NEXT: zip1.8b v1, v1, v1
437440
; CHECK-NEXT: bic.4h v1, #255, lsl #8
438441
; CHECK-NEXT: ushll.4s v1, v1, #0
439442
; CHECK-NEXT: ucvtf.4s v1, v1
@@ -457,7 +460,8 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) {
457460
; CHECK-NEXT: movi.2d v2, #0000000000000000
458461
; CHECK-NEXT: uaddlv.4s d0, v0
459462
; CHECK-NEXT: stp q2, q2, [x0, #32]
460-
; CHECK-NEXT: mov.h v1[0], v0[0]
463+
; CHECK-NEXT: mov.b v1[0], v0[0]
464+
; CHECK-NEXT: zip1.8b v1, v1, v1
461465
; CHECK-NEXT: bic.4h v1, #255, lsl #8
462466
; CHECK-NEXT: ushll.4s v1, v1, #0
463467
; CHECK-NEXT: ucvtf.4s v1, v1

llvm/test/CodeGen/AArch64/add.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ define void @v3i8(ptr %p1, ptr %p2) {
9797
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
9898
; CHECK-SD-NEXT: ldr s0, [x0]
9999
; CHECK-SD-NEXT: ldr s1, [x1]
100-
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
101-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
102-
; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
100+
; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
103101
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
104102
; CHECK-SD-NEXT: umov w8, v0.h[2]
105103
; CHECK-SD-NEXT: str s1, [sp, #12]

llvm/test/CodeGen/AArch64/andorxor.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ define void @and_v3i8(ptr %p1, ptr %p2) {
292292
; CHECK-SD-NEXT: ldr s0, [x0]
293293
; CHECK-SD-NEXT: ldr s1, [x1]
294294
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
295-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
295+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
296296
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
297297
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
298298
; CHECK-SD-NEXT: umov w8, v0.h[2]
@@ -340,7 +340,7 @@ define void @or_v3i8(ptr %p1, ptr %p2) {
340340
; CHECK-SD-NEXT: ldr s0, [x0]
341341
; CHECK-SD-NEXT: ldr s1, [x1]
342342
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
343-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
343+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
344344
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
345345
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
346346
; CHECK-SD-NEXT: umov w8, v0.h[2]
@@ -388,7 +388,7 @@ define void @xor_v3i8(ptr %p1, ptr %p2) {
388388
; CHECK-SD-NEXT: ldr s0, [x0]
389389
; CHECK-SD-NEXT: ldr s1, [x1]
390390
; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
391-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
391+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
392392
; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
393393
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
394394
; CHECK-SD-NEXT: umov w8, v0.h[2]

llvm/test/CodeGen/AArch64/ctlz.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ define void @v3i8(ptr %p1) {
4242
; CHECK-SD: // %bb.0: // %entry
4343
; CHECK-SD-NEXT: sub sp, sp, #16
4444
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
45-
; CHECK-SD-NEXT: movi v0.4h, #8
4645
; CHECK-SD-NEXT: ldr s1, [x0]
47-
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
46+
; CHECK-SD-NEXT: movi v0.4h, #8
47+
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v1.8b
4848
; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8
4949
; CHECK-SD-NEXT: clz v1.4h, v1.4h
5050
; CHECK-SD-NEXT: sub v0.4h, v1.4h, v0.4h

llvm/test/CodeGen/AArch64/extbinopload.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,11 +1366,11 @@ define <4 x i32> @atomic(ptr %p) {
13661366
; CHECK-LABEL: atomic:
13671367
; CHECK: // %bb.0:
13681368
; CHECK-NEXT: ldar w8, [x0]
1369-
; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff
13701369
; CHECK-NEXT: ldr s1, [x0, #4]
1370+
; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff
13711371
; CHECK-NEXT: fmov s2, w8
13721372
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
1373-
; CHECK-NEXT: zip1 v2.8b, v2.8b, v0.8b
1373+
; CHECK-NEXT: zip1 v2.8b, v2.8b, v2.8b
13741374
; CHECK-NEXT: ushll v1.4s, v1.4h, #3
13751375
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
13761376
; CHECK-NEXT: and v0.16b, v2.16b, v0.16b

llvm/test/CodeGen/AArch64/insert-subvector.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
465465
; CHECK-LABEL: load_v4i8_2_2:
466466
; CHECK: // %bb.0:
467467
; CHECK-NEXT: ldr h0, [x0]
468-
; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b
468+
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
469469
; CHECK-NEXT: fmov d0, d1
470470
; CHECK-NEXT: mov v0.s[1], v2.s[0]
471471
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0

0 commit comments

Comments
 (0)