Skip to content

Commit 947a52d

Browse files
authored
[AArch64] Prefer using DUP instead of INS where possible (#138549)
Replace all instances of `INS(IMPLICIT_DEF, 0, v, idx)` with `DUP(v, idx)` in instruction selection. `INS` (e.g. `mov v0.s[0], v1.s[1]`) has a value dependency on its output register, which becomes a false dependency when we're inserting into an `IMPLICIT_DEF` register. We can break this false dependency by using `DUP` (e.g. `mov s0, v1.s[1]`) instead.
1 parent ba5591e commit 947a52d

12 files changed

+189
-140
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7358,7 +7358,8 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))),
73587358

73597359
// Patterns for i8/i16 -> v2i32/v4i16 lane moves via insert and extract that go via i32.
73607360
multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType OutVT,
7361-
Instruction INS, SDNodeXForm VecIndexMult> {
7361+
Instruction INS, Instruction DUP, SubRegIndex DUPSub,
7362+
SDNodeXForm VecIndexMult> {
73627363
// VT64->OutVT
73637364
def : Pat<(OutVT (vector_insert (OutVT V64:$src),
73647365
(i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
@@ -7369,8 +7370,10 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
73697370
dsub)>;
73707371
def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
73717372
(EXTRACT_SUBREG
7372-
(INS (IMPLICIT_DEF), 0,
7373-
(INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
7373+
(VT128 (SUBREG_TO_REG
7374+
(i64 0),
7375+
(DUP (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
7376+
DUPSub)),
73747377
dsub)>;
73757378

73767379
// VT128->OutVT
@@ -7383,25 +7386,38 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
73837386
dsub)>;
73847387
def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
73857388
(EXTRACT_SUBREG
7386-
(INS (IMPLICIT_DEF), 0, V128:$Rn, imm:$Immn),
7389+
(VT128 (SUBREG_TO_REG
7390+
(i64 0),
7391+
(DUP V128:$Rn, imm:$Immn),
7392+
DUPSub)),
73877393
dsub)>;
73887394
}
73897395

7390-
defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v4i16, INSvi8lane, VecIndex_x2>;
7391-
defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v2i32, INSvi8lane, VecIndex_x4>;
7392-
defm : Neon_INS_elt_ext_pattern<v8i16, v4i16, v2i32, INSvi16lane, VecIndex_x2>;
7396+
defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v4i16, INSvi8lane, DUPi8, bsub, VecIndex_x2>;
7397+
defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v2i32, INSvi8lane, DUPi8, bsub, VecIndex_x4>;
7398+
defm : Neon_INS_elt_ext_pattern<v8i16, v4i16, v2i32, INSvi16lane, DUPi16, hsub, VecIndex_x2>;
73937399

73947400
// bitcast of an extract
7395-
// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
7396-
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
7397-
(EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
7401+
// f32 bitcast(vector_extract(v4i32 src, 0)) -> EXTRACT_SUBREG(src)
7402+
def : Pat<(f32 (bitconvert (i32 (vector_extract v16i8:$src, (i64 0))))),
7403+
(EXTRACT_SUBREG V128:$src, bsub)>;
7404+
def : Pat<(f32 (bitconvert (i32 (vector_extract v8i16:$src, (i64 0))))),
7405+
(EXTRACT_SUBREG V128:$src, hsub)>;
73987406
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
73997407
(EXTRACT_SUBREG V128:$src, ssub)>;
7400-
def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
7401-
(EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
74027408
def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
74037409
(EXTRACT_SUBREG V128:$src, dsub)>;
74047410

7411+
// f32 bitcast(vector_extract(v4i32 src, lane)) -> DUPi32(src, lane)
7412+
def : Pat<(f32 (bitconvert (i32 (vector_extract v16i8:$src, imm:$Immd)))),
7413+
(EXTRACT_SUBREG (v16i8 (SUBREG_TO_REG (i64 0), (DUPi8 V128:$src, imm:$Immd), bsub)), ssub)>;
7414+
def : Pat<(f32 (bitconvert (i32 (vector_extract v8i16:$src, imm:$Immd)))),
7415+
(EXTRACT_SUBREG (v8i16 (SUBREG_TO_REG (i64 0), (DUPi16 V128:$src, imm:$Immd), hsub)), ssub)>;
7416+
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
7417+
(DUPi32 V128:$src, imm:$Immd)>;
7418+
def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
7419+
(DUPi64 V128:$src, imm:$Immd)>;
7420+
74057421
// Floating point vector extractions are codegen'd as either a sequence of
74067422
// subregister extractions, or a MOV (aka DUP here) if
74077423
// the lane number is anything other than zero.

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3459,16 +3459,10 @@ let Predicates = [HasSVE_or_SME] in {
34593459
// Alternative case where insertelement is just scalar_to_vector rather than vector_insert.
34603460
def : Pat<(v1f64 (scalar_to_vector
34613461
(f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))),
3462-
(EXTRACT_SUBREG
3463-
(INSvi64lane (IMPLICIT_DEF), (i64 0),
3464-
(EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index),
3465-
dsub)>;
3462+
(DUPi64 (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index)>;
34663463
def : Pat<(v1i64 (scalar_to_vector
34673464
(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))),
3468-
(EXTRACT_SUBREG
3469-
(INSvi64lane (IMPLICIT_DEF), (i64 0),
3470-
(EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index),
3471-
dsub)>;
3465+
(DUPi64 (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index)>;
34723466
} // End HasNEON
34733467

34743468
let Predicates = [HasNEON] in {

llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
193193
define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) {
194194
; CHECK-LABEL: uqxtn_ext:
195195
; CHECK: // %bb.0: // %entry
196-
; CHECK-NEXT: mov v0.d[0], v3.d[1]
196+
; CHECK-NEXT: mov d0, v3.d[1]
197197
; CHECK-NEXT: uqxtn s0, d0
198198
; CHECK-NEXT: fmov w0, s0
199199
; CHECK-NEXT: ret
@@ -219,7 +219,7 @@ entry:
219219
define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) {
220220
; CHECK-LABEL: sqxtun_insext:
221221
; CHECK: // %bb.0: // %entry
222-
; CHECK-NEXT: mov v1.d[0], v1.d[1]
222+
; CHECK-NEXT: mov d1, v1.d[1]
223223
; CHECK-NEXT: sqxtun s1, d1
224224
; CHECK-NEXT: mov v0.s[3], v1.s[0]
225225
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/bitcast-extend.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ define <4 x i64> @z_i32_v4i64(i32 %x) {
7070
; CHECK-SD: // %bb.0:
7171
; CHECK-SD-NEXT: fmov s0, w0
7272
; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff
73-
; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
74-
; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
73+
; CHECK-SD-NEXT: mov b2, v0.b[0]
74+
; CHECK-SD-NEXT: mov b3, v0.b[2]
7575
; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
7676
; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
7777
; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0
@@ -172,8 +172,8 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
172172
; CHECK-SD-LABEL: s_i32_v4i64:
173173
; CHECK-SD: // %bb.0:
174174
; CHECK-SD-NEXT: fmov s0, w0
175-
; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
176-
; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
175+
; CHECK-SD-NEXT: mov b1, v0.b[0]
176+
; CHECK-SD-NEXT: mov b2, v0.b[2]
177177
; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
178178
; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
179179
; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0

llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
66
; CHECKLE-LABEL: test_reconstructshuffle:
77
; CHECKLE: // %bb.0:
8-
; CHECKLE-NEXT: mov v2.b[0], v0.b[3]
8+
; CHECKLE-NEXT: mov b2, v0.b[3]
99
; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
1010
; CHECKLE-NEXT: mov v2.b[2], v0.b[2]
1111
; CHECKLE-NEXT: mov v2.b[4], v0.b[1]
@@ -21,7 +21,7 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
2121
; CHECKBE-NEXT: rev64 v1.16b, v1.16b
2222
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
2323
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
24-
; CHECKBE-NEXT: mov v2.b[0], v0.b[3]
24+
; CHECKBE-NEXT: mov b2, v0.b[3]
2525
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
2626
; CHECKBE-NEXT: mov v2.b[2], v0.b[2]
2727
; CHECKBE-NEXT: mov v2.b[4], v0.b[1]

llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,8 @@ define half @get_lane_64(<4 x half> %a) #0 {
347347
; CHECK-LABEL: get_lane_64:
348348
; CHECK: // %bb.0: // %entry
349349
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
350-
; CHECK-NEXT: umov w8, v0.h[2]
351-
; CHECK-NEXT: fmov s0, w8
352-
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
350+
; CHECK-NEXT: mov h0, v0.h[2]
351+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
353352
; CHECK-NEXT: ret
354353
entry:
355354
%0 = bitcast <4 x half> %a to <4 x i16>
@@ -362,9 +361,8 @@ entry:
362361
define half @get_lane_128(<8 x half> %a) #0 {
363362
; CHECK-LABEL: get_lane_128:
364363
; CHECK: // %bb.0: // %entry
365-
; CHECK-NEXT: umov w8, v0.h[2]
366-
; CHECK-NEXT: fmov s0, w8
367-
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
364+
; CHECK-NEXT: mov h0, v0.h[2]
365+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
368366
; CHECK-NEXT: ret
369367
entry:
370368
%0 = bitcast <8 x half> %a to <8 x i16>

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3443,10 +3443,10 @@ define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) {
34433443
; CHECK-SD-LABEL: stofp_v8i8_v8f64:
34443444
; CHECK-SD: // %bb.0: // %entry
34453445
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
3446-
; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
3447-
; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
3448-
; CHECK-SD-NEXT: mov v3.b[0], v0.b[4]
3449-
; CHECK-SD-NEXT: mov v4.b[0], v0.b[6]
3446+
; CHECK-SD-NEXT: mov b1, v0.b[0]
3447+
; CHECK-SD-NEXT: mov b2, v0.b[2]
3448+
; CHECK-SD-NEXT: mov b3, v0.b[4]
3449+
; CHECK-SD-NEXT: mov b4, v0.b[6]
34503450
; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
34513451
; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
34523452
; CHECK-SD-NEXT: mov v3.b[4], v0.b[5]
@@ -3492,10 +3492,10 @@ define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) {
34923492
; CHECK-SD-LABEL: utofp_v8i8_v8f64:
34933493
; CHECK-SD: // %bb.0: // %entry
34943494
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
3495-
; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
3496-
; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
3497-
; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
3498-
; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
3495+
; CHECK-SD-NEXT: mov b2, v0.b[0]
3496+
; CHECK-SD-NEXT: mov b3, v0.b[2]
3497+
; CHECK-SD-NEXT: mov b4, v0.b[4]
3498+
; CHECK-SD-NEXT: mov b5, v0.b[6]
34993499
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
35003500
; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
35013501
; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
@@ -3538,14 +3538,14 @@ define <16 x double> @stofp_v16i8_v16f64(<16 x i8> %a) {
35383538
; CHECK-SD-LABEL: stofp_v16i8_v16f64:
35393539
; CHECK-SD: // %bb.0: // %entry
35403540
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
3541-
; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
3542-
; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
3543-
; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
3544-
; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
3545-
; CHECK-SD-NEXT: mov v6.b[0], v1.b[0]
3546-
; CHECK-SD-NEXT: mov v7.b[0], v1.b[2]
3547-
; CHECK-SD-NEXT: mov v16.b[0], v1.b[4]
3548-
; CHECK-SD-NEXT: mov v17.b[0], v1.b[6]
3541+
; CHECK-SD-NEXT: mov b2, v0.b[0]
3542+
; CHECK-SD-NEXT: mov b3, v0.b[2]
3543+
; CHECK-SD-NEXT: mov b4, v0.b[4]
3544+
; CHECK-SD-NEXT: mov b5, v0.b[6]
3545+
; CHECK-SD-NEXT: mov b6, v1.b[0]
3546+
; CHECK-SD-NEXT: mov b7, v1.b[2]
3547+
; CHECK-SD-NEXT: mov b16, v1.b[4]
3548+
; CHECK-SD-NEXT: mov b17, v1.b[6]
35493549
; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
35503550
; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
35513551
; CHECK-SD-NEXT: mov v4.b[4], v0.b[5]
@@ -3622,15 +3622,15 @@ define <16 x double> @utofp_v16i8_v16f64(<16 x i8> %a) {
36223622
; CHECK-SD-LABEL: utofp_v16i8_v16f64:
36233623
; CHECK-SD: // %bb.0: // %entry
36243624
; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
3625-
; CHECK-SD-NEXT: mov v3.b[0], v0.b[0]
3626-
; CHECK-SD-NEXT: mov v4.b[0], v0.b[2]
3627-
; CHECK-SD-NEXT: mov v5.b[0], v0.b[4]
3628-
; CHECK-SD-NEXT: mov v6.b[0], v0.b[6]
3625+
; CHECK-SD-NEXT: mov b3, v0.b[0]
3626+
; CHECK-SD-NEXT: mov b4, v0.b[2]
3627+
; CHECK-SD-NEXT: mov b5, v0.b[4]
3628+
; CHECK-SD-NEXT: mov b6, v0.b[6]
36293629
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
3630-
; CHECK-SD-NEXT: mov v7.b[0], v2.b[0]
3631-
; CHECK-SD-NEXT: mov v16.b[0], v2.b[2]
3632-
; CHECK-SD-NEXT: mov v17.b[0], v2.b[4]
3633-
; CHECK-SD-NEXT: mov v18.b[0], v2.b[6]
3630+
; CHECK-SD-NEXT: mov b7, v2.b[0]
3631+
; CHECK-SD-NEXT: mov b16, v2.b[2]
3632+
; CHECK-SD-NEXT: mov b17, v2.b[4]
3633+
; CHECK-SD-NEXT: mov b18, v2.b[6]
36343634
; CHECK-SD-NEXT: mov v3.b[4], v0.b[1]
36353635
; CHECK-SD-NEXT: mov v4.b[4], v0.b[3]
36363636
; CHECK-SD-NEXT: mov v5.b[4], v0.b[5]
@@ -3699,35 +3699,35 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) {
36993699
; CHECK-SD-LABEL: stofp_v32i8_v32f64:
37003700
; CHECK-SD: // %bb.0: // %entry
37013701
; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
3702-
; CHECK-SD-NEXT: mov v5.b[0], v1.b[6]
3703-
; CHECK-SD-NEXT: mov v17.b[0], v1.b[4]
3704-
; CHECK-SD-NEXT: mov v20.b[0], v1.b[2]
3705-
; CHECK-SD-NEXT: mov v21.b[0], v1.b[0]
3706-
; CHECK-SD-NEXT: mov v18.b[0], v0.b[0]
3707-
; CHECK-SD-NEXT: mov v19.b[0], v0.b[6]
3708-
; CHECK-SD-NEXT: mov v22.b[0], v0.b[4]
3702+
; CHECK-SD-NEXT: mov b5, v1.b[6]
3703+
; CHECK-SD-NEXT: mov b17, v1.b[4]
3704+
; CHECK-SD-NEXT: mov b20, v1.b[2]
3705+
; CHECK-SD-NEXT: mov b21, v1.b[0]
3706+
; CHECK-SD-NEXT: mov b18, v0.b[0]
3707+
; CHECK-SD-NEXT: mov b19, v0.b[6]
3708+
; CHECK-SD-NEXT: mov b22, v0.b[4]
37093709
; CHECK-SD-NEXT: ext v16.16b, v1.16b, v1.16b, #8
3710-
; CHECK-SD-NEXT: mov v2.b[0], v3.b[0]
3711-
; CHECK-SD-NEXT: mov v4.b[0], v3.b[2]
3712-
; CHECK-SD-NEXT: mov v6.b[0], v3.b[4]
3713-
; CHECK-SD-NEXT: mov v7.b[0], v3.b[6]
3710+
; CHECK-SD-NEXT: mov b2, v3.b[0]
3711+
; CHECK-SD-NEXT: mov b4, v3.b[2]
3712+
; CHECK-SD-NEXT: mov b6, v3.b[4]
3713+
; CHECK-SD-NEXT: mov b7, v3.b[6]
37143714
; CHECK-SD-NEXT: mov v5.b[4], v1.b[7]
37153715
; CHECK-SD-NEXT: mov v17.b[4], v1.b[5]
37163716
; CHECK-SD-NEXT: mov v20.b[4], v1.b[3]
37173717
; CHECK-SD-NEXT: mov v21.b[4], v1.b[1]
37183718
; CHECK-SD-NEXT: mov v19.b[4], v0.b[7]
37193719
; CHECK-SD-NEXT: mov v22.b[4], v0.b[5]
37203720
; CHECK-SD-NEXT: mov v18.b[4], v0.b[1]
3721-
; CHECK-SD-NEXT: mov v23.b[0], v16.b[0]
3721+
; CHECK-SD-NEXT: mov b23, v16.b[0]
37223722
; CHECK-SD-NEXT: mov v2.b[4], v3.b[1]
37233723
; CHECK-SD-NEXT: mov v4.b[4], v3.b[3]
37243724
; CHECK-SD-NEXT: mov v6.b[4], v3.b[5]
37253725
; CHECK-SD-NEXT: mov v7.b[4], v3.b[7]
3726-
; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
3726+
; CHECK-SD-NEXT: mov b3, v0.b[2]
37273727
; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24
37283728
; CHECK-SD-NEXT: shl v17.2s, v17.2s, #24
37293729
; CHECK-SD-NEXT: shl v20.2s, v20.2s, #24
3730-
; CHECK-SD-NEXT: mov v24.b[0], v16.b[4]
3730+
; CHECK-SD-NEXT: mov b24, v16.b[4]
37313731
; CHECK-SD-NEXT: mov v23.b[4], v16.b[1]
37323732
; CHECK-SD-NEXT: shl v18.2s, v18.2s, #24
37333733
; CHECK-SD-NEXT: shl v19.2s, v19.2s, #24
@@ -3739,10 +3739,10 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) {
37393739
; CHECK-SD-NEXT: shl v0.2s, v21.2s, #24
37403740
; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24
37413741
; CHECK-SD-NEXT: shl v6.2s, v7.2s, #24
3742-
; CHECK-SD-NEXT: mov v7.b[0], v16.b[2]
3742+
; CHECK-SD-NEXT: mov b7, v16.b[2]
37433743
; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0
37443744
; CHECK-SD-NEXT: sshr v20.2s, v20.2s, #24
3745-
; CHECK-SD-NEXT: mov v21.b[0], v16.b[6]
3745+
; CHECK-SD-NEXT: mov b21, v16.b[6]
37463746
; CHECK-SD-NEXT: sshll v17.2d, v17.2s, #0
37473747
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
37483748
; CHECK-SD-NEXT: shl v22.2s, v22.2s, #24
@@ -3869,25 +3869,25 @@ entry:
38693869
define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) {
38703870
; CHECK-SD-LABEL: utofp_v32i8_v32f64:
38713871
; CHECK-SD: // %bb.0: // %entry
3872-
; CHECK-SD-NEXT: mov v6.b[0], v1.b[6]
3873-
; CHECK-SD-NEXT: mov v7.b[0], v1.b[4]
3872+
; CHECK-SD-NEXT: mov b6, v1.b[6]
3873+
; CHECK-SD-NEXT: mov b7, v1.b[4]
38743874
; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8
3875-
; CHECK-SD-NEXT: mov v16.b[0], v1.b[2]
3876-
; CHECK-SD-NEXT: mov v17.b[0], v1.b[0]
3877-
; CHECK-SD-NEXT: mov v19.b[0], v0.b[6]
3878-
; CHECK-SD-NEXT: mov v20.b[0], v0.b[4]
3875+
; CHECK-SD-NEXT: mov b16, v1.b[2]
3876+
; CHECK-SD-NEXT: mov b17, v1.b[0]
3877+
; CHECK-SD-NEXT: mov b19, v0.b[6]
3878+
; CHECK-SD-NEXT: mov b20, v0.b[4]
38793879
; CHECK-SD-NEXT: movi d5, #0x0000ff000000ff
3880-
; CHECK-SD-NEXT: mov v24.b[0], v0.b[2]
3881-
; CHECK-SD-NEXT: mov v25.b[0], v0.b[0]
3880+
; CHECK-SD-NEXT: mov b24, v0.b[2]
3881+
; CHECK-SD-NEXT: mov b25, v0.b[0]
38823882
; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
38833883
; CHECK-SD-NEXT: mov v6.b[4], v1.b[7]
38843884
; CHECK-SD-NEXT: mov v7.b[4], v1.b[5]
3885-
; CHECK-SD-NEXT: mov v18.b[0], v3.b[0]
3886-
; CHECK-SD-NEXT: mov v21.b[0], v3.b[2]
3887-
; CHECK-SD-NEXT: mov v23.b[0], v3.b[4]
3885+
; CHECK-SD-NEXT: mov b18, v3.b[0]
3886+
; CHECK-SD-NEXT: mov b21, v3.b[2]
3887+
; CHECK-SD-NEXT: mov b23, v3.b[4]
38883888
; CHECK-SD-NEXT: mov v16.b[4], v1.b[3]
38893889
; CHECK-SD-NEXT: mov v17.b[4], v1.b[1]
3890-
; CHECK-SD-NEXT: mov v1.b[0], v3.b[6]
3890+
; CHECK-SD-NEXT: mov b1, v3.b[6]
38913891
; CHECK-SD-NEXT: mov v19.b[4], v0.b[7]
38923892
; CHECK-SD-NEXT: mov v20.b[4], v0.b[5]
38933893
; CHECK-SD-NEXT: mov v24.b[4], v0.b[3]
@@ -3905,15 +3905,15 @@ define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) {
39053905
; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
39063906
; CHECK-SD-NEXT: and v20.8b, v20.8b, v5.8b
39073907
; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0
3908-
; CHECK-SD-NEXT: mov v4.b[0], v2.b[0]
3909-
; CHECK-SD-NEXT: mov v22.b[0], v2.b[2]
3908+
; CHECK-SD-NEXT: mov b4, v2.b[0]
3909+
; CHECK-SD-NEXT: mov b22, v2.b[2]
39103910
; CHECK-SD-NEXT: ushll v17.2d, v17.2s, #0
39113911
; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0
3912-
; CHECK-SD-NEXT: mov v19.b[0], v2.b[4]
3912+
; CHECK-SD-NEXT: mov b19, v2.b[4]
39133913
; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d
39143914
; CHECK-SD-NEXT: ucvtf v3.2d, v7.2d
39153915
; CHECK-SD-NEXT: ushll v20.2d, v20.2s, #0
3916-
; CHECK-SD-NEXT: mov v7.b[0], v2.b[6]
3916+
; CHECK-SD-NEXT: mov b7, v2.b[6]
39173917
; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d
39183918
; CHECK-SD-NEXT: and v24.8b, v24.8b, v5.8b
39193919
; CHECK-SD-NEXT: ucvtf v17.2d, v17.2d

llvm/test/CodeGen/AArch64/neon-bitcast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
555555
; CHECK-LE-LABEL: bitcast_i16_to_v2i8:
556556
; CHECK-LE: // %bb.0:
557557
; CHECK-LE-NEXT: fmov s1, w0
558-
; CHECK-LE-NEXT: mov v0.b[0], v1.b[0]
558+
; CHECK-LE-NEXT: mov b0, v1.b[0]
559559
; CHECK-LE-NEXT: mov v0.b[4], v1.b[1]
560560
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
561561
; CHECK-LE-NEXT: ret
@@ -564,7 +564,7 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
564564
; CHECK-BE: // %bb.0:
565565
; CHECK-BE-NEXT: fmov s0, w0
566566
; CHECK-BE-NEXT: rev16 v0.16b, v0.16b
567-
; CHECK-BE-NEXT: mov v1.b[0], v0.b[0]
567+
; CHECK-BE-NEXT: mov b1, v0.b[0]
568568
; CHECK-BE-NEXT: mov v1.b[4], v0.b[1]
569569
; CHECK-BE-NEXT: rev64 v0.2s, v1.2s
570570
; CHECK-BE-NEXT: ret

0 commit comments

Comments
 (0)