Skip to content

Commit d863a0d

Browse files
author
Joe Ellis
committed
[SelectionDAG] Implement SplitVecOp_INSERT_SUBVECTOR
This function is needed for when it is necessary to split the subvector operand of an llvm.experimental.vector.insert call. Splitting the subvector operand means performing two insertions: one inserting the lower part of the split subvector into the destination vector, and another for inserting the upper part. Through experimenting, it seems quite rare to need split the subvector operand, but this is necessary to avoid assertion errors. Differential Revision: https://reviews.llvm.org/D92760
1 parent 68dbb77 commit d863a0d

File tree

3 files changed

+143
-0
lines changed

3 files changed

+143
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
841841
SDValue SplitVecOp_TruncateHelper(SDNode *N);
842842

843843
SDValue SplitVecOp_BITCAST(SDNode *N);
844+
SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo);
844845
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
845846
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
846847
SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2062,6 +2062,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
20622062
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
20632063
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
20642064
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
2065+
case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
20652066
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
20662067
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
20672068
case ISD::TRUNCATE:
@@ -2278,6 +2279,32 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
22782279
JoinIntegers(Lo, Hi));
22792280
}
22802281

2282+
SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
2283+
unsigned OpNo) {
2284+
assert(OpNo == 1 && "Invalid OpNo; can only split SubVec.");
2285+
// We know that the result type is legal.
2286+
EVT ResVT = N->getValueType(0);
2287+
2288+
SDValue Vec = N->getOperand(0);
2289+
SDValue SubVec = N->getOperand(1);
2290+
SDValue Idx = N->getOperand(2);
2291+
SDLoc dl(N);
2292+
2293+
SDValue Lo, Hi;
2294+
GetSplitVector(SubVec, Lo, Hi);
2295+
2296+
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
2297+
uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
2298+
2299+
SDValue FirstInsertion =
2300+
DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx);
2301+
SDValue SecondInsertion =
2302+
DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi,
2303+
DAG.getVectorIdxConstant(IdxVal + LoElts, dl));
2304+
2305+
return SecondInsertion;
2306+
}
2307+
22812308
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
22822309
// We know that the extracted result type is legal.
22832310
EVT SubVT = N->getValueType(0);
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; RUN: llc < %s -debug-only=legalize-types 2>&1 | FileCheck %s --check-prefix=CHECK-LEGALIZATION
2+
; RUN: llc < %s | FileCheck %s
3+
; REQUIRES: asserts
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
attributes #0 = {"target-features"="+sve"}
7+
8+
declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64>, <8 x i64>, i64)
9+
declare <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double>, <8 x double>, i64)
10+
11+
define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %b) #0 {
12+
; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0>
13+
; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2i64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2>
14+
; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4>
15+
; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6>
16+
17+
; CHECK-LABEL: test_nxv2i64_v8i64:
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
20+
; CHECK-NEXT: addvl sp, sp, #-4
21+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
22+
; CHECK-NEXT: .cfi_offset w29, -16
23+
; CHECK-NEXT: cntd x8
24+
; CHECK-NEXT: sub x8, x8, #1 // =1
25+
; CHECK-NEXT: cmp x8, #0 // =0
26+
; CHECK-NEXT: csel x10, x8, xzr, lo
27+
; CHECK-NEXT: ptrue p0.d
28+
; CHECK-NEXT: mov x9, sp
29+
; CHECK-NEXT: lsl x10, x10, #3
30+
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
31+
; CHECK-NEXT: str q1, [x9, x10]
32+
; CHECK-NEXT: addvl x10, sp, #1
33+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
34+
; CHECK-NEXT: mov w9, #2
35+
; CHECK-NEXT: cmp x8, #2 // =2
36+
; CHECK-NEXT: csel x9, x8, x9, lo
37+
; CHECK-NEXT: lsl x9, x9, #3
38+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
39+
; CHECK-NEXT: str q2, [x10, x9]
40+
; CHECK-NEXT: addvl x10, sp, #2
41+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl]
42+
; CHECK-NEXT: mov w9, #4
43+
; CHECK-NEXT: cmp x8, #4 // =4
44+
; CHECK-NEXT: csel x9, x8, x9, lo
45+
; CHECK-NEXT: lsl x9, x9, #3
46+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
47+
; CHECK-NEXT: str q3, [x10, x9]
48+
; CHECK-NEXT: addvl x10, sp, #3
49+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
50+
; CHECK-NEXT: mov w9, #6
51+
; CHECK-NEXT: cmp x8, #6 // =6
52+
; CHECK-NEXT: csel x8, x8, x9, lo
53+
; CHECK-NEXT: lsl x8, x8, #3
54+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl]
55+
; CHECK-NEXT: str q4, [x10, x8]
56+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl]
57+
; CHECK-NEXT: addvl sp, sp, #4
58+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
59+
; CHECK-NEXT: ret
60+
%r = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> %a, <8 x i64> %b, i64 0)
61+
ret <vscale x 2 x i64> %r
62+
}
63+
64+
define <vscale x 2 x double> @test_nxv2f64_v8f64(<vscale x 2 x double> %a, <8 x double> %b) #0 {
65+
; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2f64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0>
66+
; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2f64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2>
67+
; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2f64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4>
68+
; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2f64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6>
69+
70+
; CHECK-LABEL: test_nxv2f64_v8f64:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
73+
; CHECK-NEXT: addvl sp, sp, #-4
74+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
75+
; CHECK-NEXT: .cfi_offset w29, -16
76+
; CHECK-NEXT: cntd x8
77+
; CHECK-NEXT: sub x8, x8, #1 // =1
78+
; CHECK-NEXT: cmp x8, #0 // =0
79+
; CHECK-NEXT: csel x10, x8, xzr, lo
80+
; CHECK-NEXT: ptrue p0.d
81+
; CHECK-NEXT: mov x9, sp
82+
; CHECK-NEXT: lsl x10, x10, #3
83+
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
84+
; CHECK-NEXT: str q1, [x9, x10]
85+
; CHECK-NEXT: addvl x10, sp, #1
86+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
87+
; CHECK-NEXT: mov w9, #2
88+
; CHECK-NEXT: cmp x8, #2 // =2
89+
; CHECK-NEXT: csel x9, x8, x9, lo
90+
; CHECK-NEXT: lsl x9, x9, #3
91+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
92+
; CHECK-NEXT: str q2, [x10, x9]
93+
; CHECK-NEXT: addvl x10, sp, #2
94+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl]
95+
; CHECK-NEXT: mov w9, #4
96+
; CHECK-NEXT: cmp x8, #4 // =4
97+
; CHECK-NEXT: csel x9, x8, x9, lo
98+
; CHECK-NEXT: lsl x9, x9, #3
99+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
100+
; CHECK-NEXT: str q3, [x10, x9]
101+
; CHECK-NEXT: addvl x10, sp, #3
102+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
103+
; CHECK-NEXT: mov w9, #6
104+
; CHECK-NEXT: cmp x8, #6 // =6
105+
; CHECK-NEXT: csel x8, x8, x9, lo
106+
; CHECK-NEXT: lsl x8, x8, #3
107+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl]
108+
; CHECK-NEXT: str q4, [x10, x8]
109+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl]
110+
; CHECK-NEXT: addvl sp, sp, #4
111+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
112+
; CHECK-NEXT: ret
113+
%r = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> %a, <8 x double> %b, i64 0)
114+
ret <vscale x 2 x double> %r
115+
}

0 commit comments

Comments
 (0)