Skip to content

Commit 1b12974

Browse files
authored
[AArch64][AMDGPU][GlobalISel] Remove vector handling from unmerge_dead_to_trunc (#82224)
This combine transforms an unmerge where only the first element is used into a truncate. That works OK for scalar but for vector needs to insert a bitcast to integers, perform the truncate then bitcast back to vectors. This generates more awkward code than using an Unmerge.
1 parent 39fd3fc commit 1b12974

23 files changed

+265
-433
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2078,6 +2078,9 @@ bool CombinerHelper::matchCombineUnmergeUndef(
20782078
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
20792079
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
20802080
"Expected an unmerge");
2081+
if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2082+
MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2083+
return false;
20812084
// Check that all the lanes are dead except the first one.
20822085
for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
20832086
if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
@@ -2089,21 +2092,8 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
20892092
void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
20902093
Builder.setInstrAndDebugLoc(MI);
20912094
Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2092-
// Truncating a vector is going to truncate every single lane,
2093-
// whereas we want the full lowbits.
2094-
// Do the operation on a scalar instead.
2095-
LLT SrcTy = MRI.getType(SrcReg);
2096-
if (SrcTy.isVector())
2097-
SrcReg =
2098-
Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
2099-
21002095
Register Dst0Reg = MI.getOperand(0).getReg();
2101-
LLT Dst0Ty = MRI.getType(Dst0Reg);
2102-
if (Dst0Ty.isVector()) {
2103-
auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
2104-
Builder.buildCast(Dst0Reg, MIB);
2105-
} else
2106-
Builder.buildTrunc(Dst0Reg, SrcReg);
2096+
Builder.buildTrunc(Dst0Reg, SrcReg);
21072097
MI.eraseFromParent();
21082098
}
21092099

llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,8 @@ body: |
326326
bb.1:
327327
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out
328328
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0
329-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>)
330-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
331-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32)
332-
; CHECK-NEXT: $w0 = COPY [[BITCAST1]](<2 x s16>)
329+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
330+
; CHECK-NEXT: $w0 = COPY [[UV]](<2 x s16>)
333331
%0:_(<4 x s16>) = COPY $x0
334332
%1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>)
335333
$w0 = COPY %1(<2 x s16>)
@@ -343,9 +341,8 @@ body: |
343341
bb.1:
344342
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in
345343
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
346-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>)
347-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64)
348-
; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16)
344+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
345+
; CHECK-NEXT: $h0 = COPY [[UV]](s16)
349346
%0:_(<2 x s32>) = COPY $x0
350347
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>)
351348
$h0 = COPY %1(s16)

llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,14 @@ define <1 x i8> @test_bitf_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
1414
;
1515
; CHECK-GI-LABEL: test_bitf_v1i8:
1616
; CHECK-GI: // %bb.0:
17-
; CHECK-GI-NEXT: fmov x8, d0
18-
; CHECK-GI-NEXT: fmov x9, d1
19-
; CHECK-GI-NEXT: fmov x10, d2
20-
; CHECK-GI-NEXT: bic w9, w9, w10
21-
; CHECK-GI-NEXT: and w8, w10, w8
17+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
18+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
19+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
20+
; CHECK-GI-NEXT: umov w8, v2.b[0]
21+
; CHECK-GI-NEXT: umov w9, v1.b[0]
22+
; CHECK-GI-NEXT: umov w10, v0.b[0]
23+
; CHECK-GI-NEXT: bic w9, w9, w8
24+
; CHECK-GI-NEXT: and w8, w8, w10
2225
; CHECK-GI-NEXT: orr w8, w9, w8
2326
; CHECK-GI-NEXT: fmov s0, w8
2427
; CHECK-GI-NEXT: ret
@@ -39,11 +42,14 @@ define <1 x i16> @test_bitf_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
3942
;
4043
; CHECK-GI-LABEL: test_bitf_v1i16:
4144
; CHECK-GI: // %bb.0:
42-
; CHECK-GI-NEXT: fmov x8, d0
43-
; CHECK-GI-NEXT: fmov x9, d1
44-
; CHECK-GI-NEXT: fmov x10, d2
45-
; CHECK-GI-NEXT: bic w9, w9, w10
46-
; CHECK-GI-NEXT: and w8, w10, w8
45+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
46+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
47+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
48+
; CHECK-GI-NEXT: umov w8, v2.h[0]
49+
; CHECK-GI-NEXT: umov w9, v1.h[0]
50+
; CHECK-GI-NEXT: umov w10, v0.h[0]
51+
; CHECK-GI-NEXT: bic w9, w9, w8
52+
; CHECK-GI-NEXT: and w8, w8, w10
4753
; CHECK-GI-NEXT: orr w8, w9, w8
4854
; CHECK-GI-NEXT: fmov s0, w8
4955
; CHECK-GI-NEXT: ret
@@ -64,11 +70,11 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
6470
;
6571
; CHECK-GI-LABEL: test_bitf_v1i32:
6672
; CHECK-GI: // %bb.0:
67-
; CHECK-GI-NEXT: fmov x8, d0
68-
; CHECK-GI-NEXT: fmov x9, d1
69-
; CHECK-GI-NEXT: fmov x10, d2
70-
; CHECK-GI-NEXT: bic w9, w9, w10
71-
; CHECK-GI-NEXT: and w8, w10, w8
73+
; CHECK-GI-NEXT: fmov w8, s2
74+
; CHECK-GI-NEXT: fmov w9, s1
75+
; CHECK-GI-NEXT: fmov w10, s0
76+
; CHECK-GI-NEXT: bic w9, w9, w8
77+
; CHECK-GI-NEXT: and w8, w8, w10
7278
; CHECK-GI-NEXT: orr w8, w9, w8
7379
; CHECK-GI-NEXT: fmov s0, w8
7480
; CHECK-GI-NEXT: mov v0.s[1], w8

llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,14 @@ define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
1616
;
1717
; CHECK-GI-LABEL: test_bit_v1i8:
1818
; CHECK-GI: // %bb.0:
19-
; CHECK-GI-NEXT: fmov x8, d0
20-
; CHECK-GI-NEXT: fmov x9, d1
21-
; CHECK-GI-NEXT: fmov x10, d2
22-
; CHECK-GI-NEXT: and w9, w10, w9
23-
; CHECK-GI-NEXT: bic w8, w8, w10
19+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
20+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
21+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
22+
; CHECK-GI-NEXT: umov w8, v2.b[0]
23+
; CHECK-GI-NEXT: umov w9, v1.b[0]
24+
; CHECK-GI-NEXT: umov w10, v0.b[0]
25+
; CHECK-GI-NEXT: and w9, w8, w9
26+
; CHECK-GI-NEXT: bic w8, w10, w8
2427
; CHECK-GI-NEXT: orr w8, w9, w8
2528
; CHECK-GI-NEXT: fmov s0, w8
2629
; CHECK-GI-NEXT: ret
@@ -41,11 +44,14 @@ define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
4144
;
4245
; CHECK-GI-LABEL: test_bit_v1i16:
4346
; CHECK-GI: // %bb.0:
44-
; CHECK-GI-NEXT: fmov x8, d0
45-
; CHECK-GI-NEXT: fmov x9, d1
46-
; CHECK-GI-NEXT: fmov x10, d2
47-
; CHECK-GI-NEXT: and w9, w10, w9
48-
; CHECK-GI-NEXT: bic w8, w8, w10
47+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
48+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
49+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
50+
; CHECK-GI-NEXT: umov w8, v2.h[0]
51+
; CHECK-GI-NEXT: umov w9, v1.h[0]
52+
; CHECK-GI-NEXT: umov w10, v0.h[0]
53+
; CHECK-GI-NEXT: and w9, w8, w9
54+
; CHECK-GI-NEXT: bic w8, w10, w8
4955
; CHECK-GI-NEXT: orr w8, w9, w8
5056
; CHECK-GI-NEXT: fmov s0, w8
5157
; CHECK-GI-NEXT: ret
@@ -66,11 +72,11 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
6672
;
6773
; CHECK-GI-LABEL: test_bit_v1i32:
6874
; CHECK-GI: // %bb.0:
69-
; CHECK-GI-NEXT: fmov x8, d0
70-
; CHECK-GI-NEXT: fmov x9, d1
71-
; CHECK-GI-NEXT: fmov x10, d2
72-
; CHECK-GI-NEXT: and w9, w10, w9
73-
; CHECK-GI-NEXT: bic w8, w8, w10
75+
; CHECK-GI-NEXT: fmov w8, s2
76+
; CHECK-GI-NEXT: fmov w9, s1
77+
; CHECK-GI-NEXT: fmov w10, s0
78+
; CHECK-GI-NEXT: and w9, w8, w9
79+
; CHECK-GI-NEXT: bic w8, w10, w8
7480
; CHECK-GI-NEXT: orr w8, w9, w8
7581
; CHECK-GI-NEXT: fmov s0, w8
7682
; CHECK-GI-NEXT: mov v0.s[1], w8

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
250250
;
251251
; CHECK-GI-LABEL: abs_v1i32:
252252
; CHECK-GI: // %bb.0: // %entry
253-
; CHECK-GI-NEXT: fmov x8, d0
253+
; CHECK-GI-NEXT: fmov w8, s0
254254
; CHECK-GI-NEXT: asr w9, w8, #31
255255
; CHECK-GI-NEXT: add w8, w8, w9
256256
; CHECK-GI-NEXT: eor w8, w8, w9

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 28 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,7 +1394,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
13941394
;
13951395
; CHECK-GI-LABEL: testDUP.v1i8:
13961396
; CHECK-GI: // %bb.0:
1397-
; CHECK-GI-NEXT: fmov x8, d0
1397+
; CHECK-GI-NEXT: fmov w8, s0
13981398
; CHECK-GI-NEXT: dup v0.8b, w8
13991399
; CHECK-GI-NEXT: ret
14001400
%b = extractelement <1 x i8> %a, i32 0
@@ -1410,17 +1410,11 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
14101410
}
14111411

14121412
define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
1413-
; CHECK-SD-LABEL: testDUP.v1i16:
1414-
; CHECK-SD: // %bb.0:
1415-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1416-
; CHECK-SD-NEXT: dup v0.8h, v0.h[0]
1417-
; CHECK-SD-NEXT: ret
1418-
;
1419-
; CHECK-GI-LABEL: testDUP.v1i16:
1420-
; CHECK-GI: // %bb.0:
1421-
; CHECK-GI-NEXT: fmov x8, d0
1422-
; CHECK-GI-NEXT: dup v0.8h, w8
1423-
; CHECK-GI-NEXT: ret
1413+
; CHECK-LABEL: testDUP.v1i16:
1414+
; CHECK: // %bb.0:
1415+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1416+
; CHECK-NEXT: dup v0.8h, v0.h[0]
1417+
; CHECK-NEXT: ret
14241418
%b = extractelement <1 x i16> %a, i32 0
14251419
%c = insertelement <8 x i16> undef, i16 %b, i32 0
14261420
%d = insertelement <8 x i16> %c, i16 %b, i32 1
@@ -1434,17 +1428,11 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
14341428
}
14351429

14361430
define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
1437-
; CHECK-SD-LABEL: testDUP.v1i32:
1438-
; CHECK-SD: // %bb.0:
1439-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1440-
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
1441-
; CHECK-SD-NEXT: ret
1442-
;
1443-
; CHECK-GI-LABEL: testDUP.v1i32:
1444-
; CHECK-GI: // %bb.0:
1445-
; CHECK-GI-NEXT: fmov x8, d0
1446-
; CHECK-GI-NEXT: dup v0.4s, w8
1447-
; CHECK-GI-NEXT: ret
1431+
; CHECK-LABEL: testDUP.v1i32:
1432+
; CHECK: // %bb.0:
1433+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1434+
; CHECK-NEXT: dup v0.4s, v0.s[0]
1435+
; CHECK-NEXT: ret
14481436
%b = extractelement <1 x i32> %a, i32 0
14491437
%c = insertelement <4 x i32> undef, i32 %b, i32 0
14501438
%d = insertelement <4 x i32> %c, i32 %b, i32 1
@@ -2448,33 +2436,21 @@ define <16 x i8> @concat_vector_v16i8_const() {
24482436
}
24492437

24502438
define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
2451-
; CHECK-SD-LABEL: concat_vector_v4i16:
2452-
; CHECK-SD: // %bb.0:
2453-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
2454-
; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
2455-
; CHECK-SD-NEXT: ret
2456-
;
2457-
; CHECK-GI-LABEL: concat_vector_v4i16:
2458-
; CHECK-GI: // %bb.0:
2459-
; CHECK-GI-NEXT: fmov x8, d0
2460-
; CHECK-GI-NEXT: dup v0.4h, w8
2461-
; CHECK-GI-NEXT: ret
2439+
; CHECK-LABEL: concat_vector_v4i16:
2440+
; CHECK: // %bb.0:
2441+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
2442+
; CHECK-NEXT: dup v0.4h, v0.h[0]
2443+
; CHECK-NEXT: ret
24622444
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
24632445
ret <4 x i16> %r
24642446
}
24652447

24662448
define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
2467-
; CHECK-SD-LABEL: concat_vector_v4i32:
2468-
; CHECK-SD: // %bb.0:
2469-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
2470-
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
2471-
; CHECK-SD-NEXT: ret
2472-
;
2473-
; CHECK-GI-LABEL: concat_vector_v4i32:
2474-
; CHECK-GI: // %bb.0:
2475-
; CHECK-GI-NEXT: fmov x8, d0
2476-
; CHECK-GI-NEXT: dup v0.4s, w8
2477-
; CHECK-GI-NEXT: ret
2449+
; CHECK-LABEL: concat_vector_v4i32:
2450+
; CHECK: // %bb.0:
2451+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
2452+
; CHECK-NEXT: dup v0.4s, v0.s[0]
2453+
; CHECK-NEXT: ret
24782454
%r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
24792455
ret <4 x i32> %r
24802456
}
@@ -2488,25 +2464,19 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
24882464
;
24892465
; CHECK-GI-LABEL: concat_vector_v8i8:
24902466
; CHECK-GI: // %bb.0:
2491-
; CHECK-GI-NEXT: fmov x8, d0
2467+
; CHECK-GI-NEXT: fmov w8, s0
24922468
; CHECK-GI-NEXT: dup v0.8b, w8
24932469
; CHECK-GI-NEXT: ret
24942470
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
24952471
ret <8 x i8> %r
24962472
}
24972473

24982474
define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
2499-
; CHECK-SD-LABEL: concat_vector_v8i16:
2500-
; CHECK-SD: // %bb.0:
2501-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
2502-
; CHECK-SD-NEXT: dup v0.8h, v0.h[0]
2503-
; CHECK-SD-NEXT: ret
2504-
;
2505-
; CHECK-GI-LABEL: concat_vector_v8i16:
2506-
; CHECK-GI: // %bb.0:
2507-
; CHECK-GI-NEXT: fmov x8, d0
2508-
; CHECK-GI-NEXT: dup v0.8h, w8
2509-
; CHECK-GI-NEXT: ret
2475+
; CHECK-LABEL: concat_vector_v8i16:
2476+
; CHECK: // %bb.0:
2477+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
2478+
; CHECK-NEXT: dup v0.8h, v0.h[0]
2479+
; CHECK-NEXT: ret
25102480
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
25112481
ret <8 x i16> %r
25122482
}
@@ -2520,7 +2490,7 @@ define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
25202490
;
25212491
; CHECK-GI-LABEL: concat_vector_v16i8:
25222492
; CHECK-GI: // %bb.0:
2523-
; CHECK-GI-NEXT: fmov x8, d0
2493+
; CHECK-GI-NEXT: fmov w8, s0
25242494
; CHECK-GI-NEXT: dup v0.16b, w8
25252495
; CHECK-GI-NEXT: ret
25262496
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer

llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,6 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
217217
;
218218
; GISEL-LABEL: test_vcvt_f16_f32:
219219
; GISEL: // %bb.0:
220-
; GISEL-NEXT: fmov x8, d0
221-
; GISEL-NEXT: fmov s0, w8
222220
; GISEL-NEXT: fcvt h0, s0
223221
; GISEL-NEXT: ret
224222
%tmp = fptrunc <1 x float> %x to <1 x half>

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
134134
;
135135
; CHECK-GI-LABEL: bswap_v1i32:
136136
; CHECK-GI: // %bb.0: // %entry
137-
; CHECK-GI-NEXT: fmov x8, d0
137+
; CHECK-GI-NEXT: fmov w8, s0
138138
; CHECK-GI-NEXT: rev w8, w8
139139
; CHECK-GI-NEXT: fmov s0, w8
140140
; CHECK-GI-NEXT: mov v0.s[1], w8

llvm/test/CodeGen/AArch64/fpext.ll

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,14 @@ entry:
8585
}
8686

8787
define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) {
88-
; CHECK-SD-LABEL: fpext_v2f16_v2f64:
89-
; CHECK-SD: // %bb.0: // %entry
90-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
91-
; CHECK-SD-NEXT: mov h1, v0.h[1]
92-
; CHECK-SD-NEXT: fcvt d0, h0
93-
; CHECK-SD-NEXT: fcvt d1, h1
94-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
95-
; CHECK-SD-NEXT: ret
96-
;
97-
; CHECK-GI-LABEL: fpext_v2f16_v2f64:
98-
; CHECK-GI: // %bb.0: // %entry
99-
; CHECK-GI-NEXT: fmov x8, d0
100-
; CHECK-GI-NEXT: fmov s0, w8
101-
; CHECK-GI-NEXT: mov h1, v0.h[1]
102-
; CHECK-GI-NEXT: fcvt d0, h0
103-
; CHECK-GI-NEXT: fcvt d1, h1
104-
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
105-
; CHECK-GI-NEXT: ret
88+
; CHECK-LABEL: fpext_v2f16_v2f64:
89+
; CHECK: // %bb.0: // %entry
90+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
91+
; CHECK-NEXT: mov h1, v0.h[1]
92+
; CHECK-NEXT: fcvt d0, h0
93+
; CHECK-NEXT: fcvt d1, h1
94+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
95+
; CHECK-NEXT: ret
10696
entry:
10797
%c = fpext <2 x half> %a to <2 x double>
10898
ret <2 x double> %c
@@ -165,8 +155,7 @@ define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) {
165155
;
166156
; CHECK-GI-LABEL: fpext_v2f16_v2f32:
167157
; CHECK-GI: // %bb.0: // %entry
168-
; CHECK-GI-NEXT: fmov x8, d0
169-
; CHECK-GI-NEXT: fmov s0, w8
158+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
170159
; CHECK-GI-NEXT: mov h1, v0.h[1]
171160
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
172161
; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]

0 commit comments

Comments
 (0)