-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][CodeGen][AArch64] Lower vector-(de)interleave to multi-register uzp/zip instructions. #143128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLVM][CodeGen][AArch64] Lower vector-(de)interleave to multi-register uzp/zip instructions. #143128
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesPatch is 57.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143128.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f51caef6d228..882264b859410 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29210,6 +29210,28 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
+ // Are multi-register uzp instructions available?
+ if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
+ OpVT.getVectorElementType() != MVT::i1) {
+ Intrinsic::ID IntID;
+ switch (Op->getNumOperands()) {
+ default:
+ return SDValue();
+ case 2:
+ IntID = Intrinsic::aarch64_sve_uzp_x2;
+ break;
+ case 4:
+ IntID = Intrinsic::aarch64_sve_uzp_x4;
+ break;
+ }
+
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
+ for (unsigned I = 0; I < Op.getNumOperands(); ++I)
+ Ops.push_back(Op.getOperand(I));
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op->getVTList(), Ops);
+ }
+
if (Op->getNumOperands() != 2)
return SDValue();
@@ -29227,6 +29249,28 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
assert(OpVT.isScalableVector() &&
"Expected scalable vector in LowerVECTOR_INTERLEAVE.");
+ // Are multi-register zip instructions available?
+ if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
+ OpVT.getVectorElementType() != MVT::i1) {
+ Intrinsic::ID IntID;
+ switch (Op->getNumOperands()) {
+ default:
+ return SDValue();
+ case 2:
+ IntID = Intrinsic::aarch64_sve_zip_x2;
+ break;
+ case 4:
+ IntID = Intrinsic::aarch64_sve_zip_x4;
+ break;
+ }
+
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
+ for (unsigned I = 0; I < Op.getNumOperands(); ++I)
+ Ops.push_back(Op.getOperand(I));
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op->getVTList(), Ops);
+ }
+
if (Op->getNumOperands() != 2)
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
index 89fc10b47bb35..4889861444bbe 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
@@ -1,106 +1,163 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2 | FileCheck %s -check-prefixes=CHECK,SVE
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming | FileCheck %s -check-prefixes=CHECK,SME2
define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv4f16(<vscale x 4 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT: uzp2 z2.s, z0.s, z0.s
-; CHECK-NEXT: uunpklo z0.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z2.s
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z1.s, z0.s, z0.s
+; SVE-NEXT: uzp2 z2.s, z0.s, z0.s
+; SVE-NEXT: uunpklo z0.d, z1.s
+; SVE-NEXT: uunpklo z1.d, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
+; SME2: // %bb.0:
+; SME2-NEXT: uunpkhi z1.d, z0.s
+; SME2-NEXT: uunpklo z0.d, z0.s
+; SME2-NEXT: uzp { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: ret
%retval = call {<vscale x 2 x half>, <vscale x 2 x half>} @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %vec)
ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
}
define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT: uzp2 z2.h, z0.h, z0.h
-; CHECK-NEXT: uunpklo z0.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z2.h
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z1.h, z0.h, z0.h
+; SVE-NEXT: uzp2 z2.h, z0.h, z0.h
+; SVE-NEXT: uunpklo z0.s, z1.h
+; SVE-NEXT: uunpklo z1.s, z2.h
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
+; SME2: // %bb.0:
+; SME2-NEXT: uunpkhi z1.s, z0.h
+; SME2-NEXT: uunpklo z0.s, z0.h
+; SME2-NEXT: uzp { z0.s, z1.s }, z0.s, z1.s
+; SME2-NEXT: ret
%retval = call {<vscale x 4 x half>, <vscale x 4 x half>} @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %vec)
ret {<vscale x 4 x half>, <vscale x 4 x half>} %retval
}
define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h
-; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.h, z0.h, z1.h
+; SVE-NEXT: uzp2 z1.h, z0.h, z1.h
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.h, z1.h }, z0.h, z1.h
+; SME2-NEXT: ret
%retval = call {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %vec)
ret {<vscale x 8 x half>, <vscale x 8 x half>} %retval
}
define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32_nxv4f32(<vscale x 4 x float> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv4f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT: uzp2 z2.s, z0.s, z0.s
-; CHECK-NEXT: uunpklo z0.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z2.s
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv2f32_nxv4f32:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z1.s, z0.s, z0.s
+; SVE-NEXT: uzp2 z2.s, z0.s, z0.s
+; SVE-NEXT: uunpklo z0.d, z1.s
+; SVE-NEXT: uunpklo z1.d, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv2f32_nxv4f32:
+; SME2: // %bb.0:
+; SME2-NEXT: uunpkhi z1.d, z0.s
+; SME2-NEXT: uunpklo z0.d, z0.s
+; SME2-NEXT: uzp { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: ret
%retval = call {<vscale x 2 x float>, <vscale x 2 x float>} @llvm.vector.deinterleave2.nxv4f32(<vscale x 4 x float> %vec)
ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
}
define {<vscale x 4 x float>, <vscale x 4 x float>} @vector_deinterleave_nxv4f32_nxv8f32(<vscale x 8 x float> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv8f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.s, z0.s, z1.s
-; CHECK-NEXT: uzp2 z1.s, z0.s, z1.s
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv4f32_nxv8f32:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.s, z0.s, z1.s
+; SVE-NEXT: uzp2 z1.s, z0.s, z1.s
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv4f32_nxv8f32:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.s, z1.s }, z0.s, z1.s
+; SME2-NEXT: ret
%retval = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %vec)
ret {<vscale x 4 x float>, <vscale x 4 x float>} %retval
}
define {<vscale x 2 x double>, <vscale x 2 x double>} @vector_deinterleave_nxv2f64_nxv4f64(<vscale x 4 x double> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.d, z0.d, z1.d
+; SVE-NEXT: uzp2 z1.d, z0.d, z1.d
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv2f64_nxv4f64:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: ret
%retval = call {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %vec)
ret {<vscale x 2 x double>, <vscale x 2 x double>} %retval
}
define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT: uzp2 z2.s, z0.s, z0.s
-; CHECK-NEXT: uunpklo z0.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z2.s
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z1.s, z0.s, z0.s
+; SVE-NEXT: uzp2 z2.s, z0.s, z0.s
+; SVE-NEXT: uunpklo z0.d, z1.s
+; SVE-NEXT: uunpklo z1.d, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
+; SME2: // %bb.0:
+; SME2-NEXT: uunpkhi z1.d, z0.s
+; SME2-NEXT: uunpklo z0.d, z0.s
+; SME2-NEXT: uzp { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: ret
%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
}
define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT: uzp2 z2.h, z0.h, z0.h
-; CHECK-NEXT: uunpklo z0.s, z1.h
-; CHECK-NEXT: uunpklo z1.s, z2.h
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z1.h, z0.h, z0.h
+; SVE-NEXT: uzp2 z2.h, z0.h, z0.h
+; SVE-NEXT: uunpklo z0.s, z1.h
+; SVE-NEXT: uunpklo z1.s, z2.h
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
+; SME2: // %bb.0:
+; SME2-NEXT: uunpkhi z1.s, z0.h
+; SME2-NEXT: uunpklo z0.s, z0.h
+; SME2-NEXT: uzp { z0.s, z1.s }, z0.s, z1.s
+; SME2-NEXT: ret
%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
}
define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv16bf16(<vscale x 16 x bfloat> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h
-; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.h, z0.h, z1.h
+; SVE-NEXT: uzp2 z1.h, z0.h, z1.h
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.h, z1.h }, z0.h, z1.h
+; SME2-NEXT: ret
%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
}
@@ -108,141 +165,223 @@ define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8b
; Integers
define {<vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv32i8(<vscale x 32 x i8> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv32i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.b, z0.b, z1.b
-; CHECK-NEXT: uzp2 z1.b, z0.b, z1.b
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv16i8_nxv32i8:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.b, z0.b, z1.b
+; SVE-NEXT: uzp2 z1.b, z0.b, z1.b
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv16i8_nxv32i8:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.b, z1.b }, z0.b, z1.b
+; SME2-NEXT: ret
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %retval
}
define {<vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv16i16(<vscale x 16 x i16> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h
-; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv8i16_nxv16i16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.h, z0.h, z1.h
+; SVE-NEXT: uzp2 z1.h, z0.h, z1.h
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv8i16_nxv16i16:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.h, z1.h }, z0.h, z1.h
+; SME2-NEXT: ret
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %vec)
ret {<vscale x 8 x i16>, <vscale x 8 x i16>} %retval
}
define {<vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxvv8i32(<vscale x 8 x i32> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv4i32_nxvv8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.s, z0.s, z1.s
-; CHECK-NEXT: uzp2 z1.s, z0.s, z1.s
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv4i32_nxvv8i32:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.s, z0.s, z1.s
+; SVE-NEXT: uzp2 z1.s, z0.s, z1.s
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv4i32_nxvv8i32:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.s, z1.s }, z0.s, z1.s
+; SME2-NEXT: ret
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %vec)
ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %retval
}
define {<vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv4i64(<vscale x 4 x i64> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, z2.d
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z2.d, z0.d, z1.d
+; SVE-NEXT: uzp2 z1.d, z0.d, z1.d
+; SVE-NEXT: mov z0.d, z2.d
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv2i64_nxv4i64:
+; SME2: // %bb.0:
+; SME2-NEXT: uzp { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: ret
%retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %vec)
ret {<vscale x 2 x i64>, <vscale x 2 x i64>} %retval
}
define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @vector_deinterleave_nxv16i8_nxv64i8(<vscale x 64 x i8> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z4.b, z2.b, z3.b
-; CHECK-NEXT: uzp1 z5.b, z0.b, z1.b
-; CHECK-NEXT: uzp2 z3.b, z2.b, z3.b
-; CHECK-NEXT: uzp2 z6.b, z0.b, z1.b
-; CHECK-NEXT: uzp1 z0.b, z5.b, z4.b
-; CHECK-NEXT: uzp2 z2.b, z5.b, z4.b
-; CHECK-NEXT: uzp1 z1.b, z6.b, z3.b
-; CHECK-NEXT: uzp2 z3.b, z6.b, z3.b
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z4.b, z2.b, z3.b
+; SVE-NEXT: uzp1 z5.b, z0.b, z1.b
+; SVE-NEXT: uzp2 z3.b, z2.b, z3.b
+; SVE-NEXT: uzp2 z6.b, z0.b, z1.b
+; SVE-NEXT: uzp1 z0.b, z5.b, z4.b
+; SVE-NEXT: uzp2 z2.b, z5.b, z4.b
+; SVE-NEXT: uzp1 z1.b, z6.b, z3.b
+; SVE-NEXT: uzp2 z3.b, z6.b, z3.b
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
+; SME2: // %bb.0:
+; SME2-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: uzp { z0.b - z3.b }, { z0.b - z3.b }
+; SME2-NEXT: ret
%retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> %vec)
ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %retval
}
define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @vector_deinterleave_nxv8i16_nxv32i16(<vscale x 32 x i16> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z4.h, z2.h, z3.h
-; CHECK-NEXT: uzp1 z5.h, z0.h, z1.h
-; CHECK-NEXT: uzp2 z3.h, z2.h, z3.h
-; CHECK-NEXT: uzp2 z6.h, z0.h, z1.h
-; CHECK-NEXT: uzp1 z0.h, z5.h, z4.h
-; CHECK-NEXT: uzp2 z2.h, z5.h, z4.h
-; CHECK-NEXT: uzp1 z1.h, z6.h, z3.h
-; CHECK-NEXT: uzp2 z3.h, z6.h, z3.h
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z4.h, z2.h, z3.h
+; SVE-NEXT: uzp1 z5.h, z0.h, z1.h
+; SVE-NEXT: uzp2 z3.h, z2.h, z3.h
+; SVE-NEXT: uzp2 z6.h, z0.h, z1.h
+; SVE-NEXT: uzp1 z0.h, z5.h, z4.h
+; SVE-NEXT: uzp2 z2.h, z5.h, z4.h
+; SVE-NEXT: uzp1 z1.h, z6.h, z3.h
+; SVE-NEXT: uzp2 z3.h, z6.h, z3.h
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
+; SME2: // %bb.0:
+; SME2-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: uzp { z0.h - z3.h }, { z0.h - z3.h }
+; SME2-NEXT: ret
%retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave4.nxv32i16(<vscale x 32 x i16> %vec)
ret {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} %retval
}
define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @vector_deinterleave_nxv4i32_nxv16i32(<vscale x 16 x i32> %vec) {
-; CHECK-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 z4.s, z2.s, z3.s
-; CHECK-NEXT: uzp1 z5.s, z0.s, z1.s
-; CHECK-NEXT: uzp2 z3.s, z2.s, z3.s
-; CHECK-NEXT: uzp2 z6.s, z0.s, z1.s
-; CHECK-NEXT: uzp1 z0.s, z5.s, z4.s
-; CHECK-NEXT: uzp2 z2.s, z5.s, z4.s
-; CHECK-NEXT: uzp1 z1.s, z6.s, z3.s
-; CHECK-NEXT: uzp2 z3.s, z6.s, z3.s
-; CHECK-NEXT: ret
+; SVE-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
+; SVE: // %bb.0:
+; SVE-NEXT: uzp1 z4.s, z2.s, z3.s
+; SVE-NEXT: uzp1 z5.s, z0.s, z1.s
+; SVE-NEXT: uzp2 z3.s, z2.s, z3.s
+; SVE-NEXT: uzp2 z6.s, z0.s, z1.s
+; SVE-NEXT: uzp1 z0.s, z5.s, z4.s
+; SVE-NEXT: uzp2 z2.s, z5.s, z4.s
+; SVE-NEXT: uzp1 z1.s, z6.s, z3.s
+; SVE-NEXT: uzp2 z3.s, z6.s, z3.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
+; SME2: // %bb.0:
+; SME2-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; SME2-NEXT: uzp { z0.s - z3.s }, { z0.s - z3.s }
+; SME2-NEXT: ret
%retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %vec)
ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %retval
}
define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @vector_deinterleave_nxv2i64_nxv8i64(<vscale x 8 x i64> %vec) {
-; ...
[truncated]
|
for (unsigned I = 0; I < Op.getNumOperands(); ++I) | ||
Ops.push_back(Op.getOperand(I)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for (unsigned I = 0; I < Op.getNumOperands(); ++I) | |
Ops.push_back(Op.getOperand(I)); | |
Ops.append(Op->op_values().begin(), Op->op_values().end()); |
for (unsigned I = 0; I < Op.getNumOperands(); ++I) | ||
Ops.push_back(Op.getOperand(I)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for (unsigned I = 0; I < Op.getNumOperands(); ++I) | |
Ops.push_back(Op.getOperand(I)); | |
Ops.append(Op->op_values().begin(), Op->op_values().end()); |
case 4: | ||
IntID = Intrinsic::aarch64_sve_uzp_x4; | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think for the x4 uzp/zip a vector length of at least 256-bit is required for the i64/double elements.
See: https://developer.arm.com/documentation/ddi0602/2025-03/SME-Instructions/UZP--four-registers---Concatenate-elements-from-four-vectors-
if size == '11' && MaxImplementedSVL() < 256 then EndOfDecode(Decode_UNDEF);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh yes :( I've updated the check to bail when the minimum SVE vector length is not known to be 256-bit or larger.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/30591 Here is the relevant piece of the build log for the reference
|
…r uzp/zip instructions. (llvm#143128)
…r uzp/zip instructions. (llvm#143128)
No description provided.