Skip to content

Commit 1d900e2

Browse files
authored
[AArch64][GlobalISel] Avoid generating inserts for undefs when selecting G_BUILD_VECTOR (#84452)
It is safe to ignore undef values when selecting G_BUILD_VECTOR as undef values choose random registers for copying values from.
1 parent f95710c commit 1d900e2

40 files changed

+545
-865
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5934,13 +5934,16 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
59345934

59355935
// Keep track of the last MI we inserted. Later on, we might be able to save
59365936
// a copy using it.
5937-
MachineInstr *PrevMI = nullptr;
5937+
MachineInstr *PrevMI = ScalarToVec;
59385938
for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
59395939
// Note that if we don't do a subregister copy, we can end up making an
59405940
// extra register.
5941-
PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(),
5942-
i - 1, RB, MIB);
5943-
DstVec = PrevMI->getOperand(0).getReg();
5941+
Register OpReg = I.getOperand(i).getReg();
5942+
// Do not emit inserts for undefs
5943+
if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5944+
PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5945+
DstVec = PrevMI->getOperand(0).getReg();
5946+
}
59445947
}
59455948

59465949
// If DstTy's size in bits is less than 128, then emit a subregister copy
@@ -5973,11 +5976,27 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
59735976
RegOp.setReg(Reg);
59745977
RBI.constrainGenericRegister(DstReg, *RC, MRI);
59755978
} else {
5976-
// We don't need a subregister copy. Save a copy by re-using the
5977-
// destination register on the final insert.
5978-
assert(PrevMI && "PrevMI was null?");
5979+
// We either have a vector with all elements (except the first one) undef or
5980+
// at least one non-undef non-first element. In the first case, we need to
5981+
// constrain the output register ourselves as we may have generated an
5982+
// INSERT_SUBREG operation which is a generic operation for which the
5983+
// output regclass cannot be automatically chosen.
5984+
//
5985+
// In the second case, there is no need to do this as it may generate an
5986+
// instruction like INSvi32gpr where the regclass can be automatically
5987+
// chosen.
5988+
//
5989+
// Also, we save a copy by re-using the destination register on the final
5990+
// insert.
59795991
PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
59805992
constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5993+
5994+
Register DstReg = PrevMI->getOperand(0).getReg();
5995+
if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5996+
const TargetRegisterClass *RC =
5997+
getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5998+
RBI.constrainGenericRegister(DstReg, *RC, MRI);
5999+
}
59816000
}
59826001

59836002
I.eraseFromParent();

llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -266,12 +266,8 @@ body: |
266266
; CHECK-LABEL: name: undef_elts_different_regbanks
267267
; CHECK: liveins: $w0
268268
; CHECK: %val:gpr32all = COPY $w0
269-
; CHECK: %undef:gpr32 = IMPLICIT_DEF
270269
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
271-
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub
272-
; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %undef
273-
; CHECK: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSvi32gpr]], 2, %undef
274-
; CHECK: %bv:fpr128 = INSvi32gpr [[INSvi32gpr1]], 3, %undef
270+
; CHECK: %bv:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub
275271
; CHECK: $q0 = COPY %bv
276272
; CHECK: RET_ReallyLR implicit $q0
277273
%val:gpr(s32) = COPY $w0

llvm/test/CodeGen/AArch64/GlobalISel/select-shufflevec-undef-mask-elt.mir

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,18 @@ body: |
1919
; CHECK: liveins: $d0
2020
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
2121
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
22-
; CHECK: [[DEF1:%[0-9]+]]:gpr32 = IMPLICIT_DEF
23-
; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
24-
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[DEF]], %subreg.ssub
25-
; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, [[DEF1]]
26-
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub
22+
; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
23+
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DEF]], %subreg.ssub
24+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSERT_SUBREG]].dsub
2725
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
2826
; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
27+
; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
28+
; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub
2929
; CHECK: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF
30-
; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.dsub
31-
; CHECK: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF
32-
; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[COPY1]], %subreg.dsub
30+
; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[COPY1]], %subreg.dsub
3331
; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG1]], 1, [[INSERT_SUBREG2]], 0
34-
; CHECK: [[DEF5:%[0-9]+]]:fpr128 = IMPLICIT_DEF
35-
; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF5]], [[LDRDui]], %subreg.dsub
32+
; CHECK: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF
33+
; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[LDRDui]], %subreg.dsub
3634
; CHECK: [[TBLv16i8One:%[0-9]+]]:fpr128 = TBLv16i8One [[INSvi64lane]], [[INSERT_SUBREG3]]
3735
; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[TBLv16i8One]].dsub
3836
; CHECK: $d0 = COPY [[COPY2]]

llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7777
; CHECK-GI-NEXT: and w8, w8, w10
7878
; CHECK-GI-NEXT: orr w8, w9, w8
7979
; CHECK-GI-NEXT: fmov s0, w8
80-
; CHECK-GI-NEXT: mov v0.s[1], w8
8180
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
8281
; CHECK-GI-NEXT: ret
8382
%neg = xor <1 x i32> %C, <i32 -1>

llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7979
; CHECK-GI-NEXT: bic w8, w10, w8
8080
; CHECK-GI-NEXT: orr w8, w9, w8
8181
; CHECK-GI-NEXT: fmov s0, w8
82-
; CHECK-GI-NEXT: mov v0.s[1], w8
8382
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
8483
; CHECK-GI-NEXT: ret
8584
%and = and <1 x i32> %C, %B

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
252252
; CHECK-GI-NEXT: add w8, w8, w9
253253
; CHECK-GI-NEXT: eor w8, w8, w9
254254
; CHECK-GI-NEXT: fmov s0, w8
255-
; CHECK-GI-NEXT: mov v0.s[1], w8
256255
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
257256
; CHECK-GI-NEXT: ret
258257
entry:
@@ -308,11 +307,6 @@ define <3 x i8> @abs_v3i8(<3 x i8> %a){
308307
; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
309308
; CHECK-GI-NEXT: fmov s1, w2
310309
; CHECK-GI-NEXT: mov v0.b[2], v1.b[0]
311-
; CHECK-GI-NEXT: mov v0.b[3], v0.b[0]
312-
; CHECK-GI-NEXT: mov v0.b[4], v0.b[0]
313-
; CHECK-GI-NEXT: mov v0.b[5], v0.b[0]
314-
; CHECK-GI-NEXT: mov v0.b[6], v0.b[0]
315-
; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
316310
; CHECK-GI-NEXT: abs v0.8b, v0.8b
317311
; CHECK-GI-NEXT: umov w0, v0.b[0]
318312
; CHECK-GI-NEXT: umov w1, v0.b[1]

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -373,11 +373,9 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
373373
;
374374
; CHECK-GI-LABEL: test_build_illegal:
375375
; CHECK-GI: // %bb.0:
376-
; CHECK-GI-NEXT: mov.h v1[1], v0[0]
377376
; CHECK-GI-NEXT: mov s0, v0[3]
378-
; CHECK-GI-NEXT: mov.h v1[2], v0[0]
379-
; CHECK-GI-NEXT: mov.h v1[3], v0[0]
380-
; CHECK-GI-NEXT: fmov d0, d1
377+
; CHECK-GI-NEXT: mov.h v0[3], v0[0]
378+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
381379
; CHECK-GI-NEXT: ret
382380
%val = extractelement <4 x i32> %in, i32 3
383381
%smallval = trunc i32 %val to i16

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,41 +1346,26 @@ define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
13461346
; CHECK-GI-LABEL: scalar_to_vector.v2i32:
13471347
; CHECK-GI: // %bb.0:
13481348
; CHECK-GI-NEXT: fmov s0, w0
1349-
; CHECK-GI-NEXT: mov v0.s[1], w8
13501349
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
13511350
; CHECK-GI-NEXT: ret
13521351
%b = insertelement <2 x i32> undef, i32 %a, i32 0
13531352
ret <2 x i32> %b
13541353
}
13551354

13561355
define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
1357-
; CHECK-SD-LABEL: scalar_to_vector.v4i32:
1358-
; CHECK-SD: // %bb.0:
1359-
; CHECK-SD-NEXT: fmov s0, w0
1360-
; CHECK-SD-NEXT: ret
1361-
;
1362-
; CHECK-GI-LABEL: scalar_to_vector.v4i32:
1363-
; CHECK-GI: // %bb.0:
1364-
; CHECK-GI-NEXT: fmov s0, w0
1365-
; CHECK-GI-NEXT: mov v0.s[1], w8
1366-
; CHECK-GI-NEXT: mov v0.s[2], w8
1367-
; CHECK-GI-NEXT: mov v0.s[3], w8
1368-
; CHECK-GI-NEXT: ret
1356+
; CHECK-LABEL: scalar_to_vector.v4i32:
1357+
; CHECK: // %bb.0:
1358+
; CHECK-NEXT: fmov s0, w0
1359+
; CHECK-NEXT: ret
13691360
%b = insertelement <4 x i32> undef, i32 %a, i32 0
13701361
ret <4 x i32> %b
13711362
}
13721363

13731364
define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
1374-
; CHECK-SD-LABEL: scalar_to_vector.v2i64:
1375-
; CHECK-SD: // %bb.0:
1376-
; CHECK-SD-NEXT: fmov d0, x0
1377-
; CHECK-SD-NEXT: ret
1378-
;
1379-
; CHECK-GI-LABEL: scalar_to_vector.v2i64:
1380-
; CHECK-GI: // %bb.0:
1381-
; CHECK-GI-NEXT: fmov d0, x0
1382-
; CHECK-GI-NEXT: mov v0.d[1], x8
1383-
; CHECK-GI-NEXT: ret
1365+
; CHECK-LABEL: scalar_to_vector.v2i64:
1366+
; CHECK: // %bb.0:
1367+
; CHECK-NEXT: fmov d0, x0
1368+
; CHECK-NEXT: ret
13841369
%b = insertelement <2 x i64> undef, i64 %a, i32 0
13851370
ret <2 x i64> %b
13861371
}
@@ -1900,14 +1885,6 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
19001885
; CHECK-GI-NEXT: mov v0.b[5], v6.b[0]
19011886
; CHECK-GI-NEXT: mov v0.b[6], v7.b[0]
19021887
; CHECK-GI-NEXT: mov v0.b[7], v16.b[0]
1903-
; CHECK-GI-NEXT: mov v0.b[8], v0.b[0]
1904-
; CHECK-GI-NEXT: mov v0.b[9], v0.b[0]
1905-
; CHECK-GI-NEXT: mov v0.b[10], v0.b[0]
1906-
; CHECK-GI-NEXT: mov v0.b[11], v0.b[0]
1907-
; CHECK-GI-NEXT: mov v0.b[12], v0.b[0]
1908-
; CHECK-GI-NEXT: mov v0.b[13], v0.b[0]
1909-
; CHECK-GI-NEXT: mov v0.b[14], v0.b[0]
1910-
; CHECK-GI-NEXT: mov v0.b[15], v0.b[0]
19111888
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
19121889
; CHECK-GI-NEXT: ret
19131890
entry:
@@ -2123,10 +2100,6 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
21232100
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0]
21242101
; CHECK-GI-NEXT: mov v0.h[2], v3.h[0]
21252102
; CHECK-GI-NEXT: mov v0.h[3], v4.h[0]
2126-
; CHECK-GI-NEXT: mov v0.h[4], v0.h[0]
2127-
; CHECK-GI-NEXT: mov v0.h[5], v0.h[0]
2128-
; CHECK-GI-NEXT: mov v0.h[6], v0.h[0]
2129-
; CHECK-GI-NEXT: mov v0.h[7], v0.h[0]
21302103
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
21312104
; CHECK-GI-NEXT: ret
21322105
entry:
@@ -2266,8 +2239,6 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
22662239
; CHECK-GI-NEXT: mov s2, v0.s[1]
22672240
; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
22682241
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0]
2269-
; CHECK-GI-NEXT: mov v0.s[2], v0.s[0]
2270-
; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
22712242
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
22722243
; CHECK-GI-NEXT: ret
22732244
entry:

llvm/test/CodeGen/AArch64/bitcast.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ define <4 x i16> @foo1(<2 x i32> %a) {
2121
; CHECK-GI: // %bb.0:
2222
; CHECK-GI-NEXT: mov w8, #58712 // =0xe558
2323
; CHECK-GI-NEXT: fmov s1, w8
24-
; CHECK-GI-NEXT: mov v1.s[1], w8
2524
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
2625
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
2726
; CHECK-GI-NEXT: ret
@@ -42,7 +41,6 @@ define <4 x i16> @foo2(<2 x i32> %a) {
4241
; CHECK-GI: // %bb.0:
4342
; CHECK-GI-NEXT: mov w8, #712 // =0x2c8
4443
; CHECK-GI-NEXT: fmov s1, w8
45-
; CHECK-GI-NEXT: mov v1.s[1], w8
4644
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
4745
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
4846
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
137137
; CHECK-GI-NEXT: fmov w8, s0
138138
; CHECK-GI-NEXT: rev w8, w8
139139
; CHECK-GI-NEXT: fmov s0, w8
140-
; CHECK-GI-NEXT: mov v0.s[1], w8
141140
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
142141
; CHECK-GI-NEXT: ret
143142
entry:

llvm/test/CodeGen/AArch64/fabs.ll

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -160,21 +160,20 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
160160
;
161161
; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
162162
; CHECK-GI-NOFP16: // %bb.0: // %entry
163-
; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
164-
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
165-
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
166-
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
167-
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
168-
; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v3.4s
169-
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0]
170-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
171-
; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
172-
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
163+
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
164+
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[4]
165+
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
166+
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[6]
167+
; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
168+
; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v3.h[0]
169+
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
170+
; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
171+
; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
172+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
173173
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2]
174174
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
175-
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
176-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
177-
; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
175+
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
176+
; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v2.4s
178177
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0]
179178
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
180179
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0]
@@ -183,7 +182,6 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
183182
; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
184183
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
185184
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
186-
; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
187185
; CHECK-GI-NOFP16-NEXT: ret
188186
;
189187
; CHECK-GI-FP16-LABEL: fabs_v7f16:

llvm/test/CodeGen/AArch64/faddsub.ll

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -186,26 +186,24 @@ define <7 x half> @fadd_v7f16(<7 x half> %a, <7 x half> %b) {
186186
;
187187
; CHECK-GI-NOFP16-LABEL: fadd_v7f16:
188188
; CHECK-GI-NOFP16: // %bb.0: // %entry
189-
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[4]
190-
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
191-
; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[4]
192-
; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[5]
193-
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
194-
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v1.4h
195-
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
189+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
190+
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
191+
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[4]
192+
; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
193+
; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[4]
194+
; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5]
196195
; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
197-
; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v3.h[0]
196+
; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v3.4s
197+
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
198198
; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v5.h[0]
199-
; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v6.4s, v7.4s
200-
; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[0]
201-
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v1.h[0]
202-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v3.4s
203-
; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v0.h[0]
204-
; CHECK-GI-NOFP16-NEXT: mov v4.h[3], v0.h[0]
199+
; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v7.h[0]
200+
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
201+
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v3.h[0]
202+
; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v1.h[0]
205203
; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
206204
; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
207-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
208-
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v4.4h
205+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v4.4h
206+
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v6.4h
209207
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
210208
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
211209
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v3.4s
@@ -217,7 +215,6 @@ define <7 x half> @fadd_v7f16(<7 x half> %a, <7 x half> %b) {
217215
; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
218216
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
219217
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
220-
; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
221218
; CHECK-GI-NOFP16-NEXT: ret
222219
;
223220
; CHECK-GI-FP16-LABEL: fadd_v7f16:
@@ -538,26 +535,24 @@ define <7 x half> @fsub_v7f16(<7 x half> %a, <7 x half> %b) {
538535
;
539536
; CHECK-GI-NOFP16-LABEL: fsub_v7f16:
540537
; CHECK-GI-NOFP16: // %bb.0: // %entry
541-
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[4]
542-
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
543-
; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[4]
544-
; CHECK-GI-NOFP16-NEXT: mov h5, v1.h[5]
545-
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
546-
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v1.4h
547-
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
538+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
539+
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
540+
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[4]
541+
; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
542+
; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[4]
543+
; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5]
548544
; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
549-
; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v3.h[0]
545+
; CHECK-GI-NOFP16-NEXT: fsub v2.4s, v2.4s, v3.4s
546+
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
550547
; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v5.h[0]
551-
; CHECK-GI-NOFP16-NEXT: fsub v3.4s, v6.4s, v7.4s
552-
; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[0]
553-
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v1.h[0]
554-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v3.4s
555-
; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v0.h[0]
556-
; CHECK-GI-NOFP16-NEXT: mov v4.h[3], v0.h[0]
548+
; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v7.h[0]
549+
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
550+
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v3.h[0]
551+
; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v1.h[0]
557552
; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
558553
; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
559-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
560-
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v4.4h
554+
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v4.4h
555+
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v6.4h
561556
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
562557
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
563558
; CHECK-GI-NOFP16-NEXT: fsub v1.4s, v2.4s, v3.4s
@@ -569,7 +564,6 @@ define <7 x half> @fsub_v7f16(<7 x half> %a, <7 x half> %b) {
569564
; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
570565
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
571566
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
572-
; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
573567
; CHECK-GI-NOFP16-NEXT: ret
574568
;
575569
; CHECK-GI-FP16-LABEL: fsub_v7f16:

0 commit comments

Comments
 (0)