Skip to content

[GlobalISel] Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a #115377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,10 @@ class CombinerHelper {
/// Remove references to rhs if it is undef
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo);

/// Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not
/// reference a.
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);

/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction def'd on \p MO from the function.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);
Expand Down
10 changes: 9 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,13 @@ def combine_shuffle_undef_rhs : GICombineRule<
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
>;

def combine_shuffle_disjoint_mask : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return Helper.matchShuffleDisjointMask(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])
>;

// match_extract_of_element and insert_vector_elt_oob must be the first!
def vector_ops_combines: GICombineGroup<[
match_extract_of_element_undef_vector,
Expand Down Expand Up @@ -1945,7 +1952,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
def prefer_sign_combines : GICombineGroup<[nneg_zext]>;

def shuffle_combines : GICombineGroup<[combine_shuffle_concat,
combine_shuffle_undef_rhs]>;
combine_shuffle_undef_rhs,
combine_shuffle_disjoint_mask]>;

def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
vector_ops_combines, freeze_combines, cast_combines,
Expand Down
66 changes: 66 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7724,3 +7724,69 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI,

return true;
}

static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
const unsigned MaskSize = Mask.size();
for (unsigned I = 0; I < MaskSize; ++I) {
int Idx = Mask[I];
if (Idx < 0)
continue;

if (Idx < (int)NumElems)
Mask[I] = Idx + NumElems;
else
Mask[I] = Idx - NumElems;
}
}

bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,
BuildFnTy &MatchInfo) {

auto &Shuffle = cast<GShuffleVector>(MI);
// If any of the two inputs is already undef, don't check the mask again to
// prevent infinite loop
if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
return false;

if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
return false;
Comment on lines +7746 to +7752
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After the discussion on this PR, you can remove this again since it should be impossible for G_IMPLICIT_DEF of that type to be illegal.


const LLT DstTy = MRI.getType(Shuffle.getReg(0));
const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
return false;

ArrayRef<int> Mask = Shuffle.getMask();
const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;

bool TouchesSrc1 = false;
bool TouchesSrc2 = false;
const unsigned NumElems = Mask.size();
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
if (Mask[Idx] < 0)
continue;

if (Mask[Idx] < (int)NumSrcElems)
TouchesSrc1 = true;
else
TouchesSrc2 = true;
}

if (TouchesSrc1 == TouchesSrc2)
return false;

Register NewSrc1 = Shuffle.getSrc1Reg();
SmallVector<int, 16> NewMask(Mask);
if (TouchesSrc2) {
NewSrc1 = Shuffle.getSrc2Reg();
commuteMask(NewMask, NumSrcElems);
}

MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
auto Undef = B.buildUndef(Src1Ty);
B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
};

return true;
}
Original file line number Diff line number Diff line change
Expand Up @@ -598,9 +598,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = COPY $x1
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(undef, 0, 0, 0)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 0)
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
; CHECK-NEXT: RET_ReallyLR implicit $x0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
Original file line number Diff line number Diff line change
Expand Up @@ -1395,9 +1395,9 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %freeze_sv(<4 x s32>), %idx(s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
Expand All @@ -1422,9 +1422,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %elt:_(s32) = COPY $w0
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(3, 0, 0, 0)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), [[DEF]], shufflemask(3, 0, 0, 0)
; CHECK-NEXT: %freeze_sv:_(<4 x s32>) = G_FREEZE %sv
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %elt
; CHECK-NEXT: %extract:_(<4 x s32>) = G_INSERT_VECTOR_ELT %freeze_sv, [[FREEZE]](s32), %idx(s64)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s

---
name: shuffle_vector_unused_lhs
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1

; CHECK-LABEL: name: shuffle_vector_unused_lhs
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(1, 0, 1, 0)
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(3, 2, 3, 2)
RET_ReallyLR implicit %2
...

---
name: shuffle_vector_unused_rhs
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1

; CHECK-LABEL: name: shuffle_vector_unused_rhs
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(0, 0, 1, 1)
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,0,1,1)
RET_ReallyLR implicit %2
...

---
name: shuffle_vector_both_used
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1

; CHECK-LABEL: name: shuffle_vector_both_used
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,2,1,3)
RET_ReallyLR implicit %2
...

---
name: shuffle_vector_undef_elems
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1

; CHECK-LABEL: name: shuffle_vector_undef_elems
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[DEF]], shufflemask(undef, 0, 1, undef)
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1)
RET_ReallyLR implicit %2
...

---
name: shuffle_vector_scalar
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1

; CHECK-LABEL: name: shuffle_vector_scalar
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64)
; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0)
RET_ReallyLR implicit %2
...
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,21 +201,21 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
; CHECK-GI: // %bb.0: // %vector.header
; CHECK-GI-NEXT: and w9, w3, #0xffff
; CHECK-GI-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NEXT: dup v1.4s, w9
; CHECK-GI-NEXT: dup v0.4s, w9
; CHECK-GI-NEXT: mov w9, w0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NEXT: and x8, x9, #0xfffffff8
; CHECK-GI-NEXT: .LBB2_1: // %vector.body
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w9, [x2], #16
; CHECK-GI-NEXT: subs x8, x8, #8
; CHECK-GI-NEXT: mov v0.s[0], w9
; CHECK-GI-NEXT: mov v2.s[0], w9
; CHECK-GI-NEXT: mov w9, w0
; CHECK-GI-NEXT: add w0, w0, #8
; CHECK-GI-NEXT: lsl x9, x9, #2
; CHECK-GI-NEXT: tbl v3.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: mul v3.4s, v1.4s, v3.4s
; CHECK-GI-NEXT: str q3, [x1, x9]
; CHECK-GI-NEXT: tbl v2.16b, { v2.16b, v3.16b }, v1.16b
; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s
; CHECK-GI-NEXT: str q2, [x1, x9]
; CHECK-GI-NEXT: b.ne .LBB2_1
; CHECK-GI-NEXT: // %bb.2: // %for.end12
; CHECK-GI-NEXT: ret
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/aarch64-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2385,9 +2385,8 @@ define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) {
;
; CHECK-GI-LABEL: do_stuff:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: ext v2.16b, v1.16b, v2.16b, #8
; CHECK-GI-NEXT: mov d2, v1.d[1]
; CHECK-GI-NEXT: umull v0.2d, v2.2s, v0.2s
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
Expand Down
13 changes: 4 additions & 9 deletions llvm/test/CodeGen/AArch64/arm64-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,10 @@ define <8 x i8> @test_vextd_undef(<8 x i8> %tmp1, <8 x i8> %tmp2) {
}

define <8 x i8> @test_vextd_undef2(<8 x i8> %tmp1, <8 x i8> %tmp2) {
; CHECK-SD-LABEL: test_vextd_undef2:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v0.8b, #6
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_vextd_undef2:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ext v0.8b, v1.8b, v0.8b, #6
; CHECK-GI-NEXT: ret
; CHECK-LABEL: test_vextd_undef2:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #6
; CHECK-NEXT: ret
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
ret <8 x i8> %tmp3
}
Expand Down
64 changes: 42 additions & 22 deletions llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
Original file line number Diff line number Diff line change
@@ -1,35 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-SD
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-GI

; Extract of an upper half of a vector is an "ext.16b v0, v0, v0, #8" insn.

define <8 x i8> @v8i8(<16 x i8> %a) nounwind {
; CHECK-LABEL: v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ext.16b v0, v0, v0, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: v8i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov d0, v0[1]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After rebasing this commit now also modifies this test.
I'm not an AArch64 expert, is there any drawback (or advantage) of using plain mov over ext?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it looks OK, they should both be about the same.

I would keep the prefixes though, to keep them in sync with how we run the other tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no common CHECK lines left, so FileCheck was complaining:

error: no check strings found with prefix 'CHECK:'

; CHECK-GI-NEXT: ret
%ret = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %ret
}

define <4 x i16> @v4i16(<8 x i16> %a) nounwind {
; CHECK-LABEL: v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext.16b v0, v0, v0, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov d0, v0[1]
; CHECK-GI-NEXT: ret
%ret = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i16> %ret
}

define <2 x i32> @v2i32(<4 x i32> %a) nounwind {
; CHECK-LABEL: v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext.16b v0, v0, v0, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: v2i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v2i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov d0, v0[1]
; CHECK-GI-NEXT: ret
%ret = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
ret <2 x i32> %ret
}
Expand Down Expand Up @@ -65,11 +80,16 @@ define <1 x ptr> @v1p0(<2 x ptr> %a) nounwind {
}

define <2 x float> @v2f32(<4 x float> %a) nounwind {
; CHECK-LABEL: v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext.16b v0, v0, v0, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: v2f32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ext.16b v0, v0, v0, #8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: v2f32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov d0, v0[1]
; CHECK-GI-NEXT: ret
%ret = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
ret <2 x float> %ret
}
Expand Down
Loading
Loading