Skip to content

Commit 88bbd30

Browse files
committed
[SVE][CodeGen] Fix issues with EXTRACT_SUBVECTOR when using scalable FP vectors
In this patch I have fixed two issues: 1. Our SVE tuple get/set intrinsics were using the wrong constant type for the index passed to EXTRACT_SUBVECTOR. I have fixed this by using the function SelectionDAG::getVectorIdxConstant to create the value. Also, I have updated the documentation for EXTRACT_SUBVECTOR describing what type the constant index should be and we now enforce this when creating the node. 2. The AArch64 backend was missing the appropriate patterns for extracting certain subvectors (nxv4f16 and nxv2f32) from legal SVE types. I have added them as part of this patch. The only way that I could find to test the new patterns was to use the SVE tuple get intrinsics, although I realise it looks a bit unusual. Tests added here: test/CodeGen/AArch64/sve-extract-subvector.ll Differential Revision: https://reviews.llvm.org/D85516
1 parent 5d54921 commit 88bbd30

File tree

5 files changed

+60
-6
lines changed

5 files changed

+60
-6
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,8 @@ enum NodeType {
514514
/// IDX is first scaled by the runtime scaling factor of T. Elements IDX
515515
/// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this
516516
/// condition cannot be determined statically but is false at runtime, then
517-
/// the result vector is undefined.
517+
/// the result vector is undefined. The IDX parameter must be a vector index
518+
/// constant type, which for most targets will be an integer pointer type.
518519
///
519520
/// This operation supports extracting a fixed-width vector from a scalable
520521
/// vector, but not the other way around.

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5560,6 +5560,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
55605560
(VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
55615561
N1VT.getVectorMinNumElements()) &&
55625562
"Extract subvector overflow!");
5563+
assert(N2C->getAPIntValue().getBitWidth() ==
5564+
TLI->getVectorIdxTy(getDataLayout())
5565+
.getSizeInBits()
5566+
.getFixedSize() &&
5567+
"Constant index for EXTRACT_SUBVECTOR has an invalid size");
55635568

55645569
// Trivial extraction.
55655570
if (VT == N1VT)

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14235,9 +14235,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1423514235
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
1423614236
EVT ResVT = N->getValueType(0);
1423714237
uint64_t NumLanes = ResVT.getVectorElementCount().Min;
14238+
SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
1423814239
SDValue Val =
14239-
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1,
14240-
DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32));
14240+
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
1424114241
return DAG.getMergeValues({Val, Chain}, DL);
1424214242
}
1424314243
case Intrinsic::aarch64_sve_tuple_set: {
@@ -14263,9 +14263,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1426314263
if (I == IdxConst)
1426414264
Opnds.push_back(Vec);
1426514265
else {
14266-
Opnds.push_back(
14267-
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple,
14268-
DAG.getConstant(I * NumLanes, DL, MVT::i32)));
14266+
SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
14267+
Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
14268+
Vec.getValueType(), Tuple, ExtIdx));
1426914269
}
1427014270
}
1427114271
SDValue Concat =

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,16 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
11551155
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
11561156
(ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
11571157

1158+
// Extract subvectors from FP SVE vectors
1159+
def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
1160+
(UUNPKLO_ZZ_S ZPR:$Zs)>;
1161+
def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
1162+
(UUNPKHI_ZZ_S ZPR:$Zs)>;
1163+
def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))),
1164+
(UUNPKLO_ZZ_D ZPR:$Zs)>;
1165+
def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))),
1166+
(UUNPKHI_ZZ_D ZPR:$Zs)>;
1167+
11581168
// Concatenate two predicates.
11591169
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
11601170
(UZP1_PPP_S $p1, $p2)>;

llvm/test/CodeGen/AArch64/sve-extract-subvector.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,43 @@ define <vscale x 2 x i64> @extract_nxv2i64_nxv32i8(<vscale x 32 x i8> %z0_z1) {
2828
ret <vscale x 2 x i64> %ext
2929
}
3030

31+
define <vscale x 4 x half> @extract_lo_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
32+
; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: uunpklo z0.s, z0.h
35+
; CHECK-NEXT: ret
36+
%ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 0)
37+
ret <vscale x 4 x half> %ext
38+
}
39+
40+
define <vscale x 4 x half> @extract_hi_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
41+
; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16:
42+
; CHECK: // %bb.0:
43+
; CHECK-NEXT: uunpkhi z0.s, z0.h
44+
; CHECK-NEXT: ret
45+
%ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 1)
46+
ret <vscale x 4 x half> %ext
47+
}
48+
49+
define <vscale x 2 x float> @extract_lo_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
50+
; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32:
51+
; CHECK: // %bb.0:
52+
; CHECK-NEXT: uunpklo z0.d, z0.s
53+
; CHECK-NEXT: ret
54+
%ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 0)
55+
ret <vscale x 2 x float> %ext
56+
}
57+
58+
define <vscale x 2 x float> @extract_hi_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
59+
; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: uunpkhi z0.d, z0.s
62+
; CHECK-NEXT: ret
63+
%ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 1)
64+
ret <vscale x 2 x float> %ext
65+
}
66+
3167
declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64>, i32)
3268
declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8>, i32)
69+
declare <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float>, i32)
70+
declare <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half>, i32)

0 commit comments

Comments
 (0)