Skip to content

[GlobalISel] Add support for interleave and deinterleave intrinsics to IRTranslator #85199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,14 @@ class IRTranslator : public MachineFunctionPass {
bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
unsigned Opcode);

// Translate @llvm.experimental.vector.interleave2 and
// @llvm.experimental.vector.deinterleave2 intrinsics for fixed-width vector
// types into vector shuffles.
bool translateVectorInterleave2Intrinsic(const CallInst &CI,
MachineIRBuilder &MIRBuilder);
bool translateVectorDeinterleave2Intrinsic(const CallInst &CI,
MachineIRBuilder &MIRBuilder);

void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);

bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
Expand Down
51 changes: 51 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
Expand Down Expand Up @@ -1770,6 +1771,41 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
return true;
}

bool IRTranslator::translateVectorInterleave2Intrinsic(
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2 &&
"This function can only be called on the interleave2 intrinsic!");
// Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
Register Op0 = getOrCreateVReg(*CI.getOperand(0));
Register Op1 = getOrCreateVReg(*CI.getOperand(1));
Register Res = getOrCreateVReg(CI);

LLT OpTy = MRI->getType(Op0);
MIRBuilder.buildShuffleVector(Res, Op0, Op1,
createInterleaveMask(OpTy.getNumElements(), 2));

return true;
}

bool IRTranslator::translateVectorDeinterleave2Intrinsic(
const CallInst &CI, MachineIRBuilder &MIRBuilder) {
assert(CI.getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2 &&
"This function can only be called on the deinterleave2 intrinsic!");
// Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
// SelectionDAG).
Register Op = getOrCreateVReg(*CI.getOperand(0));
auto Undef = MIRBuilder.buildUndef(MRI->getType(Op));
ArrayRef<Register> Res = getOrCreateVRegs(CI);

LLT ResTy = MRI->getType(Res[0]);
MIRBuilder.buildShuffleVector(Res[0], Op, Undef,
createStrideMask(0, 2, ResTy.getNumElements()));
MIRBuilder.buildShuffleVector(Res[1], Op, Undef,
createStrideMask(1, 2, ResTy.getNumElements()));

return true;
}

void IRTranslator::getStackGuard(Register DstReg,
MachineIRBuilder &MIRBuilder) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Expand Down Expand Up @@ -2474,6 +2510,21 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,

return true;
}

case Intrinsic::experimental_vector_interleave2:
case Intrinsic::experimental_vector_deinterleave2: {
// Both intrinsics have at least one operand.
Value *Op0 = CI.getOperand(0);
LLT ResTy = getLLTForType(*Op0->getType(), MIRBuilder.getDataLayout());
if (!ResTy.isFixedVector())
return false;

if (CI.getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
return translateVectorInterleave2Intrinsic(CI, MIRBuilder);

return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
}

#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s

define void @vector_deinterleave2_v4i32(<4 x i32> %a) {
; CHECK-LABEL: name: vector_deinterleave2_v4i32
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(0, 2)
; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(1, 3)
; CHECK-NEXT: RET_ReallyLR
%res = call {<2 x i32>, <2 x i32>} @llvm.experimental.vector.deinterleave2.v4i32(<4 x i32> %a)
ret void
}

define void @vector_deinterleave2_v8f32(<8 x float> %a) {
; CHECK-LABEL: name: vector_deinterleave2_v8f32
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[BITCAST1]](<4 x s32>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(0, 2, 4, 6)
; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<8 x s32>), [[DEF]], shufflemask(1, 3, 5, 7)
; CHECK-NEXT: RET_ReallyLR
%res = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %a)
ret void
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
; RUN: llc -O0 -mtriple=aarch64-- --global-isel --global-isel-abort=2 --verify-machineinstrs --stop-after=irtranslator %s -o - | FileCheck %s

define void @vector_interleave2_v4i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: name: vector_interleave2_v4i32
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(0, 2, 1, 3)
; CHECK-NEXT: RET_ReallyLR
%res = call <4 x i32> @llvm.experimental.vector.interleave2.v4i32(<2 x i32> %a, <2 x i32> %b)
ret void
}

define void @vector_interleave2_v8f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: name: vector_interleave2_v8f32
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[BITCAST]](<4 x s32>), [[BITCAST1]], shufflemask(0, 4, 1, 5, 2, 6, 3, 7)
; CHECK-NEXT: RET_ReallyLR
%res = call <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
ret void
}
81 changes: 56 additions & 25 deletions llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
Original file line number Diff line number Diff line change
@@ -1,23 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; RUN: llc < %s --global-isel --global-isel-abort=2 --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
Comment on lines +2 to +7
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think too much is changing in this test that's only tangentially related to the change. Can you add a dedicated IRTranslator test for this, like others in test/CodeGen/AArch64/GlobalISel/irtranslator-*?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added two test files for this.


target triple = "aarch64"

; CHECK-GI: warning: Instruction selection used fallback path for complex_add_v16f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v32f16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for complex_add_v16f16_with_intrinsic

; Expected to not transform
define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
; CHECK-LABEL: complex_add_v2f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h2, v0.h[1]
; CHECK-NEXT: mov h3, v1.h[1]
; CHECK-NEXT: fsub h1, h1, h2
; CHECK-NEXT: fadd h0, h3, h0
; CHECK-NEXT: mov v1.h[1], v0.h[0]
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: complex_add_v2f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: mov h2, v0.h[1]
; CHECK-SD-NEXT: mov h3, v1.h[1]
; CHECK-SD-NEXT: fsub h1, h1, h2
; CHECK-SD-NEXT: fadd h0, h3, h0
; CHECK-SD-NEXT: mov v1.h[1], v0.h[0]
; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: complex_add_v2f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov h2, v0.h[1]
; CHECK-GI-NEXT: mov h3, v1.h[1]
; CHECK-GI-NEXT: fsub h1, h1, h2
; CHECK-GI-NEXT: fadd h0, h3, h0
; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
; CHECK-GI-NEXT: fmov d0, d1
; CHECK-GI-NEXT: ret
entry:
%a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
%a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>
Expand Down Expand Up @@ -162,17 +181,29 @@ entry:

; Expected not to transform as it is integer
define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: complex_add_v16i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h
; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h
; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h
; CHECK-NEXT: add v1.8h, v1.8h, v5.8h
; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h
; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h
; CHECK-NEXT: ret
; CHECK-SD-LABEL: complex_add_v16i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: uzp1 v4.8h, v2.8h, v3.8h
; CHECK-SD-NEXT: uzp1 v5.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: uzp2 v1.8h, v2.8h, v3.8h
; CHECK-SD-NEXT: sub v2.8h, v4.8h, v0.8h
; CHECK-SD-NEXT: add v1.8h, v1.8h, v5.8h
; CHECK-SD-NEXT: zip1 v0.8h, v2.8h, v1.8h
; CHECK-SD-NEXT: zip2 v1.8h, v2.8h, v1.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: complex_add_v16i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: uzp1 v4.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: uzp2 v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v3.8h
; CHECK-GI-NEXT: uzp2 v2.8h, v2.8h, v3.8h
; CHECK-GI-NEXT: sub v1.8h, v1.8h, v0.8h
; CHECK-GI-NEXT: add v2.8h, v2.8h, v4.8h
; CHECK-GI-NEXT: zip1 v0.8h, v1.8h, v2.8h
; CHECK-GI-NEXT: zip2 v1.8h, v1.8h, v2.8h
; CHECK-GI-NEXT: ret
entry:
%a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
Expand Down
79 changes: 54 additions & 25 deletions llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
Original file line number Diff line number Diff line change
@@ -1,29 +1,50 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
; CHECK-LABEL: vector_deinterleave_v2f16_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v2.2s, v0.s[1]
; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: mov v1.h[0], v0.h[1]
; CHECK-NEXT: mov v0.h[1], v2.h[0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v2.2s, v0.s[1]
; CHECK-SD-NEXT: mov v1.16b, v2.16b
; CHECK-SD-NEXT: mov v1.h[0], v0.h[1]
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: mov h0, v2.h[1]
; CHECK-GI-NEXT: mov h3, v1.h[1]
; CHECK-GI-NEXT: mov v2.h[1], v0.h[0]
; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-GI-NEXT: fmov d0, d2
; CHECK-GI-NEXT: ret
%retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec)
ret {<2 x half>, <2 x half>} %retval
}

define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec) {
; CHECK-LABEL: vector_deinterleave_v4f16_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: uzp1 v2.4h, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v1.4h, v0.4h, v1.4h
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
; CHECK-SD-LABEL: vector_deinterleave_v4f16_v8f16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: uzp1 v2.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: uzp2 v1.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: fmov d0, d2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_deinterleave_v4f16_v8f16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uzp1 v2.8h, v0.8h, v0.8h
; CHECK-GI-NEXT: uzp2 v1.8h, v0.8h, v0.8h
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-GI-NEXT: fmov d0, d2
; CHECK-GI-NEXT: ret
%retval = call {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half> %vec)
ret {<4 x half>, <4 x half>} %retval
}
Expand All @@ -40,13 +61,21 @@ define {<8 x half>, <8 x half>} @vector_deinterleave_v8f16_v16f16(<16 x half> %v
}

define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %vec) {
; CHECK-LABEL: vector_deinterleave_v2f32_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
; CHECK-NEXT: zip2 v1.2s, v0.2s, v1.2s
; CHECK-NEXT: fmov d0, d2
; CHECK-NEXT: ret
; CHECK-SD-LABEL: vector_deinterleave_v2f32_v4f32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: zip1 v2.2s, v0.2s, v1.2s
; CHECK-SD-NEXT: zip2 v1.2s, v0.2s, v1.2s
; CHECK-SD-NEXT: fmov d0, d2
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vector_deinterleave_v2f32_v4f32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: uzp1 v2.4s, v0.4s, v0.4s
; CHECK-GI-NEXT: uzp2 v1.4s, v0.4s, v0.4s
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-GI-NEXT: fmov d0, d2
; CHECK-GI-NEXT: ret
%retval = call {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float> %vec)
ret {<2 x float>, <2 x float>} %retval
}
Expand Down
Loading