Skip to content

[AArch64][AMDGPU][GlobalISel] Remove vector handling from unmerge_dead_to_trunc #82224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 4 additions & 14 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2077,6 +2077,9 @@ bool CombinerHelper::matchCombineUnmergeUndef(
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
return false;
// Check that all the lanes are dead except the first one.
for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
Expand All @@ -2088,21 +2091,8 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
Builder.setInstrAndDebugLoc(MI);
Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
// Truncating a vector is going to truncate every single lane,
// whereas we want the full lowbits.
// Do the operation on a scalar instead.
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
SrcReg =
Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);

Register Dst0Reg = MI.getOperand(0).getReg();
LLT Dst0Ty = MRI.getType(Dst0Reg);
if (Dst0Ty.isVector()) {
auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
Builder.buildCast(Dst0Reg, MIB);
} else
Builder.buildTrunc(Dst0Reg, SrcReg);
Builder.buildTrunc(Dst0Reg, SrcReg);
MI.eraseFromParent();
}

Expand Down
11 changes: 4 additions & 7 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
Original file line number Diff line number Diff line change
Expand Up @@ -326,10 +326,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32)
; CHECK-NEXT: $w0 = COPY [[BITCAST1]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: $w0 = COPY [[UV]](<2 x s16>)
%0:_(<4 x s16>) = COPY $x0
%1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>)
$w0 = COPY %1(<2 x s16>)
Expand All @@ -343,9 +341,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64)
; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: $h0 = COPY [[UV]](s16)
%0:_(<2 x s32>) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>)
$h0 = COPY %1(s16)
Expand Down
36 changes: 21 additions & 15 deletions llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ define <1 x i8> @test_bitf_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
;
; CHECK-GI-LABEL: test_bitf_v1i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: bic w9, w9, w10
; CHECK-GI-NEXT: and w8, w10, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.b[0]
; CHECK-GI-NEXT: umov w9, v1.b[0]
; CHECK-GI-NEXT: umov w10, v0.b[0]
; CHECK-GI-NEXT: bic w9, w9, w8
; CHECK-GI-NEXT: and w8, w8, w10
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -39,11 +42,14 @@ define <1 x i16> @test_bitf_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
;
; CHECK-GI-LABEL: test_bitf_v1i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: bic w9, w9, w10
; CHECK-GI-NEXT: and w8, w10, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.h[0]
; CHECK-GI-NEXT: umov w9, v1.h[0]
; CHECK-GI-NEXT: umov w10, v0.h[0]
; CHECK-GI-NEXT: bic w9, w9, w8
; CHECK-GI-NEXT: and w8, w8, w10
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -64,11 +70,11 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
;
; CHECK-GI-LABEL: test_bitf_v1i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: bic w9, w9, w10
; CHECK-GI-NEXT: and w8, w10, w8
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: fmov w10, s0
; CHECK-GI-NEXT: bic w9, w9, w8
; CHECK-GI-NEXT: and w8, w8, w10
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v0.s[1], w8
Expand Down
36 changes: 21 additions & 15 deletions llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
;
; CHECK-GI-LABEL: test_bit_v1i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: and w9, w10, w9
; CHECK-GI-NEXT: bic w8, w8, w10
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.b[0]
; CHECK-GI-NEXT: umov w9, v1.b[0]
; CHECK-GI-NEXT: umov w10, v0.b[0]
; CHECK-GI-NEXT: and w9, w8, w9
; CHECK-GI-NEXT: bic w8, w10, w8
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -41,11 +44,14 @@ define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
;
; CHECK-GI-LABEL: test_bit_v1i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: and w9, w10, w9
; CHECK-GI-NEXT: bic w8, w8, w10
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.h[0]
; CHECK-GI-NEXT: umov w9, v1.h[0]
; CHECK-GI-NEXT: umov w10, v0.h[0]
; CHECK-GI-NEXT: and w9, w8, w9
; CHECK-GI-NEXT: bic w8, w10, w8
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -66,11 +72,11 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
;
; CHECK-GI-LABEL: test_bit_v1i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: and w9, w10, w9
; CHECK-GI-NEXT: bic w8, w8, w10
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: fmov w10, s0
; CHECK-GI-NEXT: and w9, w8, w9
; CHECK-GI-NEXT: bic w8, w10, w8
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v0.s[1], w8
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
;
; CHECK-GI-LABEL: abs_v1i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: asr w9, w8, #31
; CHECK-GI-NEXT: add w8, w8, w9
; CHECK-GI-NEXT: eor w8, w8, w9
Expand Down
86 changes: 28 additions & 58 deletions llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1394,7 +1394,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
;
; CHECK-GI-LABEL: testDUP.v1i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.8b, w8
; CHECK-GI-NEXT: ret
%b = extractelement <1 x i8> %a, i32 0
Expand All @@ -1410,17 +1410,11 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
}

define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
; CHECK-SD-LABEL: testDUP.v1i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.8h, v0.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testDUP.v1i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.8h, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: testDUP.v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: ret
%b = extractelement <1 x i16> %a, i32 0
%c = insertelement <8 x i16> undef, i16 %b, i32 0
%d = insertelement <8 x i16> %c, i16 %b, i32 1
Expand All @@ -1434,17 +1428,11 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
}

define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
; CHECK-SD-LABEL: testDUP.v1i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testDUP.v1i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: testDUP.v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%b = extractelement <1 x i32> %a, i32 0
%c = insertelement <4 x i32> undef, i32 %b, i32 0
%d = insertelement <4 x i32> %c, i32 %b, i32 1
Expand Down Expand Up @@ -2448,33 +2436,21 @@ define <16 x i8> @concat_vector_v16i8_const() {
}

define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
; CHECK-SD-LABEL: concat_vector_v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_vector_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.4h, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_vector_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4h, v0.h[0]
; CHECK-NEXT: ret
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %r
}

define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
; CHECK-SD-LABEL: concat_vector_v4i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_vector_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_vector_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %r
}
Expand All @@ -2488,25 +2464,19 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
;
; CHECK-GI-LABEL: concat_vector_v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.8b, w8
; CHECK-GI-NEXT: ret
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %r
}

define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
; CHECK-SD-LABEL: concat_vector_v8i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.8h, v0.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_vector_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.8h, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_vector_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: ret
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %r
}
Expand All @@ -2520,7 +2490,7 @@ define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
;
; CHECK-GI-LABEL: concat_vector_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.16b, w8
; CHECK-GI-NEXT: ret
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,6 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
;
; GISEL-LABEL: test_vcvt_f16_f32:
; GISEL: // %bb.0:
; GISEL-NEXT: fmov x8, d0
; GISEL-NEXT: fmov s0, w8
; GISEL-NEXT: fcvt h0, s0
; GISEL-NEXT: ret
%tmp = fptrunc <1 x float> %x to <1 x half>
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
;
; CHECK-GI-LABEL: bswap_v1i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: rev w8, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v0.s[1], w8
Expand Down
29 changes: 9 additions & 20 deletions llvm/test/CodeGen/AArch64/fpext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,14 @@ entry:
}

define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) {
; CHECK-SD-LABEL: fpext_v2f16_v2f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: mov h1, v0.h[1]
; CHECK-SD-NEXT: fcvt d0, h0
; CHECK-SD-NEXT: fcvt d1, h1
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fpext_v2f16_v2f64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: fcvt d0, h0
; CHECK-GI-NEXT: fcvt d1, h1
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fpext_v2f16_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvt d0, h0
; CHECK-NEXT: fcvt d1, h1
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
entry:
%c = fpext <2 x half> %a to <2 x double>
ret <2 x double> %c
Expand Down Expand Up @@ -165,8 +155,7 @@ define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) {
;
; CHECK-GI-LABEL: fpext_v2f16_v2f32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]
Expand Down
Loading