Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 0d10177

Browse files
committed
AMDGPU/GlobalISel: Legalize more concat_vectors
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365488 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 9b30bc5 commit 0d10177

File tree

3 files changed

+115
-27
lines changed

3 files changed

+115
-27
lines changed

lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,21 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
8282
};
8383
}
8484

85+
// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
86+
// v2s16.
87+
static LegalityPredicate isRegisterType(unsigned TypeIdx) {
88+
return [=](const LegalityQuery &Query) {
89+
const LLT Ty = Query.Types[TypeIdx];
90+
if (Ty.isVector()) {
91+
const int EltSize = Ty.getElementType().getSizeInBits();
92+
return EltSize == 32 || EltSize == 64 ||
93+
(EltSize == 16 && Ty.getNumElements() % 2 == 0);
94+
}
95+
96+
return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
97+
};
98+
}
99+
85100
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
86101
const GCNTargetMachine &TM)
87102
: ST(ST_) {
@@ -102,7 +117,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
102117

103118
const LLT V2S16 = LLT::vector(2, 16);
104119
const LLT V4S16 = LLT::vector(4, 16);
105-
const LLT V8S16 = LLT::vector(8, 16);
106120

107121
const LLT V2S32 = LLT::vector(2, 32);
108122
const LLT V3S32 = LLT::vector(3, 32);
@@ -647,19 +661,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
647661
Query.Types[0].getScalarSizeInBits() == 64;
648662
});
649663

650-
// TODO: Support any combination of s16, s32, s64, pointer vectors.
651664
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
652-
.legalFor({{V4S32, V2S32},
653-
{V8S32, V2S32},
654-
{V8S32, V4S32},
655-
{V4S64, V2S64},
656-
{V4S16, V2S16},
657-
{V8S16, V2S16},
658-
{V8S16, V4S16},
659-
{LLT::vector(4, LocalPtr), LLT::vector(2, LocalPtr)},
660-
{LLT::vector(4, PrivatePtr), LLT::vector(2, PrivatePtr)}})
661-
// FIXME: Should restrict maximum size, but there seems to be a missing predicate.
662-
.legalIf(typeInSet(1, {V2S32, V4S32, V8S32,V2S16, V4S16, V8S16, LLT::vector(16, 16), V2S64}));
665+
.legalIf(isRegisterType(0));
663666

664667
// Merge/Unmerge
665668
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {

test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -639,10 +639,10 @@ body: |
639639
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
640640
641641
; GCN-LABEL: name: test_concat_vectors_s_v8s64_s_v4s64_s_v4s64
642-
; GCN: [[COPY:%[0-9]+]]:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
643-
; GCN: [[COPY1:%[0-9]+]]:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
644-
; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<8 x s64>) = G_CONCAT_VECTORS [[COPY]](<4 x s64>), [[COPY1]](<4 x s64>)
645-
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[CONCAT_VECTORS]](<8 x s64>)
642+
; GCN: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
643+
; GCN: [[COPY1:%[0-9]+]]:sreg_256 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
644+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, [[COPY1]], %subreg.sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
645+
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[REG_SEQUENCE]]
646646
%0:sgpr(<4 x s64>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7,
647647
%1:sgpr(<4 x s64>) = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
648648
%4:sgpr(<8 x s64>) = G_CONCAT_VECTORS %0, %1
@@ -683,10 +683,10 @@ body: |
683683
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
684684
685685
; GCN-LABEL: name: test_concat_vectors_s_v4p1_s_v2p1_s_v2p1
686-
; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
687-
; GCN: [[COPY1:%[0-9]+]]:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
688-
; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<4 x p1>) = G_CONCAT_VECTORS [[COPY]](<2 x p1>), [[COPY1]](<2 x p1>)
689-
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>)
686+
; GCN: [[COPY:%[0-9]+]]:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
687+
; GCN: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr4_sgpr5_sgpr6_sgpr7
688+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3, [[COPY1]], %subreg.sub4_sub5_sub6_sub7
689+
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]]
690690
%0:sgpr(<2 x p1>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
691691
%1:sgpr(<2 x p1>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
692692
%3:sgpr(<4 x p1>) = G_CONCAT_VECTORS %0, %1
@@ -723,12 +723,12 @@ body: |
723723
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7
724724
725725
; GCN-LABEL: name: test_concat_vectors_s_v8p3_s_v2p3_s_v2p3_v2p3_s_v2p3
726-
; GCN: [[COPY:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1
727-
; GCN: [[COPY1:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3
728-
; GCN: [[COPY2:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5
729-
; GCN: [[COPY3:%[0-9]+]]:sgpr(<2 x p3>) = COPY $sgpr6_sgpr7
730-
; GCN: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<8 x p3>) = G_CONCAT_VECTORS [[COPY]](<2 x p3>), [[COPY1]](<2 x p3>), [[COPY2]](<2 x p3>), [[COPY3]](<2 x p3>)
731-
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[CONCAT_VECTORS]](<8 x p3>)
726+
; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
727+
; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
728+
; GCN: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
729+
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $sgpr6_sgpr7
730+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_256 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3, [[COPY2]], %subreg.sub4_sub5, [[COPY3]], %subreg.sub6_sub7
731+
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[REG_SEQUENCE]]
732732
%0:sgpr(<2 x p3>) = COPY $sgpr0_sgpr1
733733
%1:sgpr(<2 x p3>) = COPY $sgpr2_sgpr3
734734
%2:sgpr(<2 x p3>) = COPY $sgpr4_sgpr5

test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,88 @@ body: |
127127
%2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
128128
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
129129
...
130+
131+
---
132+
name: concat_vectors_v2p1_v2p1
133+
134+
body: |
135+
bb.0:
136+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
137+
; CHECK-LABEL: name: concat_vectors_v2p1_v2p1
138+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
139+
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
140+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p1>) = G_CONCAT_VECTORS [[COPY]](<2 x p1>), [[COPY1]](<2 x p1>)
141+
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p1>)
142+
%0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
143+
%1:_(<2 x p1>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
144+
%2:_(<4 x p1>) = G_CONCAT_VECTORS %0, %1
145+
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
146+
...
147+
148+
---
149+
name: concat_vectors_v2p0_v2p0
150+
151+
body: |
152+
bb.0:
153+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
154+
; CHECK-LABEL: name: concat_vectors_v2p0_v2p0
155+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
156+
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
157+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p0>) = G_CONCAT_VECTORS [[COPY]](<2 x p0>), [[COPY1]](<2 x p0>)
158+
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p0>)
159+
%0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
160+
%1:_(<2 x p0>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
161+
%2:_(<4 x p0>) = G_CONCAT_VECTORS %0, %1
162+
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
163+
...
164+
165+
---
166+
name: concat_vectors_v2p3_v2p3
167+
168+
body: |
169+
bb.0:
170+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
171+
; CHECK-LABEL: name: concat_vectors_v2p3_v2p3
172+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1
173+
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr2_vgpr3
174+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p3>) = G_CONCAT_VECTORS [[COPY]](<2 x p3>), [[COPY1]](<2 x p3>)
175+
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p3>)
176+
%0:_(<2 x p3>) = COPY $vgpr0_vgpr1
177+
%1:_(<2 x p3>) = COPY $vgpr2_vgpr3
178+
%2:_(<4 x p3>) = G_CONCAT_VECTORS %0, %1
179+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
180+
...
181+
182+
---
183+
name: concat_vectors_v2p5_v2p5
184+
185+
body: |
186+
bb.0:
187+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
188+
; CHECK-LABEL: name: concat_vectors_v2p5_v2p5
189+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr0_vgpr1
190+
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p5>) = COPY $vgpr2_vgpr3
191+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p5>) = G_CONCAT_VECTORS [[COPY]](<2 x p5>), [[COPY1]](<2 x p5>)
192+
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x p5>)
193+
%0:_(<2 x p5>) = COPY $vgpr0_vgpr1
194+
%1:_(<2 x p5>) = COPY $vgpr2_vgpr3
195+
%2:_(<4 x p5>) = G_CONCAT_VECTORS %0, %1
196+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
197+
...
198+
199+
---
200+
name: concat_vectors_v2p999_v2p999
201+
202+
body: |
203+
bb.0:
204+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
205+
; CHECK-LABEL: name: concat_vectors_v2p999_v2p999
206+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
207+
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
208+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x p999>) = G_CONCAT_VECTORS [[COPY]](<2 x p999>), [[COPY1]](<2 x p999>)
209+
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x p999>)
210+
%0:_(<2 x p999>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
211+
%1:_(<2 x p999>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
212+
%2:_(<4 x p999>) = G_CONCAT_VECTORS %0, %1
213+
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %2
214+
...

0 commit comments

Comments
 (0)