Skip to content

Commit 0b0ccd5

Browse files
author
Thorsten Schütt
authored
[GlobalIsel] Push cast through build vector (#104634)
Credits: #100563
1 parent 9d739e5 commit 0b0ccd5

File tree

13 files changed

+1450
-910
lines changed

13 files changed

+1450
-910
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,6 +900,9 @@ class CombinerHelper {
900900
bool matchExtOfExt(const MachineInstr &FirstMI, const MachineInstr &SecondMI,
901901
BuildFnTy &MatchInfo);
902902

903+
bool matchCastOfBuildVector(const MachineInstr &CastMI,
904+
const MachineInstr &BVMI, BuildFnTy &MatchInfo);
905+
903906
private:
904907
/// Checks for legality of an indexed variant of \p LdSt.
905908
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1857,6 +1857,16 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
18571857
def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
18581858
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
18591859

1860+
// Push cast through build vector.
1861+
class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
1862+
(defs root:$root, build_fn_matchinfo:$matchinfo),
1863+
(match (G_BUILD_VECTOR $bv, GIVariadic<>:$unused):$Build,
1864+
(castOpcode $root, $bv):$Cast,
1865+
[{ return Helper.matchCastOfBuildVector(*${Cast}, *${Build}, ${matchinfo}); }]),
1866+
(apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;
1867+
1868+
def buildvector_of_truncate : buildvector_of_opcode<G_TRUNC>;
1869+
18601870
def cast_combines: GICombineGroup<[
18611871
truncate_of_zext,
18621872
truncate_of_sext,
@@ -1870,7 +1880,8 @@ def cast_combines: GICombineGroup<[
18701880
sext_of_anyext,
18711881
anyext_of_anyext,
18721882
anyext_of_zext,
1873-
anyext_of_sext
1883+
anyext_of_sext,
1884+
buildvector_of_truncate
18741885
]>;
18751886

18761887

llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,3 +273,43 @@ bool CombinerHelper::matchExtOfExt(const MachineInstr &FirstMI,
273273

274274
return false;
275275
}
276+
277+
bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI,
278+
const MachineInstr &BVMI,
279+
BuildFnTy &MatchInfo) {
280+
const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
281+
const GBuildVector *BV = cast<GBuildVector>(&BVMI);
282+
283+
if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
284+
return false;
285+
286+
Register Dst = Cast->getReg(0);
287+
// The type of the new build vector.
288+
LLT DstTy = MRI.getType(Dst);
289+
// The scalar or element type of the new build vector.
290+
LLT ElemTy = DstTy.getScalarType();
291+
// The scalar or element type of the old build vector.
292+
LLT InputElemTy = MRI.getType(BV->getReg(0)).getElementType();
293+
294+
// Check legality of new build vector, the scalar casts, and profitability of
295+
// the many casts.
296+
if (!isLegalOrBeforeLegalizer(
297+
{TargetOpcode::G_BUILD_VECTOR, {DstTy, ElemTy}}) ||
298+
!isLegalOrBeforeLegalizer({Cast->getOpcode(), {ElemTy, InputElemTy}}) ||
299+
!isCastFree(Cast->getOpcode(), ElemTy, InputElemTy))
300+
return false;
301+
302+
MatchInfo = [=](MachineIRBuilder &B) {
303+
SmallVector<Register> Casts;
304+
unsigned Elements = BV->getNumSources();
305+
for (unsigned I = 0; I < Elements; ++I) {
306+
auto CastI =
307+
B.buildInstr(Cast->getOpcode(), {ElemTy}, {BV->getSourceReg(I)});
308+
Casts.push_back(CastI.getReg(0));
309+
}
310+
311+
B.buildBuildVector(Dst, Casts);
312+
};
313+
314+
return true;
315+
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
953953
.clampNumElements(0, v2s64, v2s64)
954954
.minScalarOrElt(0, s8)
955955
.widenVectorEltsToVectorMinSize(0, 64)
956+
.widenScalarOrEltToNextPow2(0)
956957
.minScalarSameAs(1, 0);
957958

958959
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();

llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,98 @@ body: |
129129
%res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
130130
%small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
131131
$x0 = COPY %small(<2 x s32>)
132+
...
133+
---
134+
name: test_combine_trunc_build_vector
135+
legalized: true
136+
body: |
137+
bb.1:
138+
; CHECK-PRE-LABEL: name: test_combine_trunc_build_vector
139+
; CHECK-PRE: %arg1:_(s64) = COPY $x0
140+
; CHECK-PRE-NEXT: %arg2:_(s64) = COPY $x0
141+
; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
142+
; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64)
143+
; CHECK-PRE-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
144+
; CHECK-PRE-NEXT: $x0 = COPY %small(<2 x s32>)
145+
;
146+
; CHECK-POST-LABEL: name: test_combine_trunc_build_vector
147+
; CHECK-POST: %arg1:_(s64) = COPY $x0
148+
; CHECK-POST-NEXT: %arg2:_(s64) = COPY $x0
149+
; CHECK-POST-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
150+
; CHECK-POST-NEXT: %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
151+
; CHECK-POST-NEXT: $x0 = COPY %small(<2 x s32>)
152+
%arg1:_(s64) = COPY $x0
153+
%arg2:_(s64) = COPY $x0
154+
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
155+
%small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
156+
$x0 = COPY %small(<2 x s32>)
157+
...
158+
---
159+
name: test_combine_zext_build_vector
160+
legalized: true
161+
body: |
162+
bb.1:
163+
; CHECK-LABEL: name: test_combine_zext_build_vector
164+
; CHECK: %arg1:_(s32) = COPY $w0
165+
; CHECK-NEXT: %arg2:_(s32) = COPY $w0
166+
; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
167+
; CHECK-NEXT: %large:_(<2 x s64>) = G_ZEXT %bv(<2 x s32>)
168+
; CHECK-NEXT: $q0 = COPY %large(<2 x s64>)
169+
%arg1:_(s32) = COPY $w0
170+
%arg2:_(s32) = COPY $w0
171+
%bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
172+
%large:_(<2 x s64>) = G_ZEXT %bv(<2 x s32>)
173+
$q0 = COPY %large(<2 x s64>)
174+
...
175+
---
176+
name: test_combine_anyext_build_vector
177+
legalized: true
178+
body: |
179+
bb.1:
180+
; CHECK-LABEL: name: test_combine_anyext_build_vector
181+
; CHECK: %arg1:_(s32) = COPY $w0
182+
; CHECK-NEXT: %arg2:_(s32) = COPY $w0
183+
; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
184+
; CHECK-NEXT: %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
185+
; CHECK-NEXT: $q0 = COPY %large(<2 x s64>)
186+
%arg1:_(s32) = COPY $w0
187+
%arg2:_(s32) = COPY $w0
188+
%bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
189+
%large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
190+
$q0 = COPY %large(<2 x s64>)
191+
...
192+
---
193+
name: test_combine_sext_build_vector
194+
legalized: true
195+
body: |
196+
bb.1:
197+
; CHECK-LABEL: name: test_combine_sext_build_vector
198+
; CHECK: %arg1:_(s32) = COPY $w0
199+
; CHECK-NEXT: %arg2:_(s32) = COPY $w0
200+
; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
201+
; CHECK-NEXT: %large:_(<2 x s64>) = G_SEXT %bv(<2 x s32>)
202+
; CHECK-NEXT: $q0 = COPY %large(<2 x s64>)
203+
%arg1:_(s32) = COPY $w0
204+
%arg2:_(s32) = COPY $w0
205+
%bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
206+
%large:_(<2 x s64>) = G_SEXT %bv(<2 x s32>)
207+
$q0 = COPY %large(<2 x s64>)
208+
...
209+
---
210+
name: test_combine_anyext_build_vector_multi_use
211+
legalized: true
212+
body: |
213+
bb.1:
214+
; CHECK-LABEL: name: test_combine_anyext_build_vector_multi_use
215+
; CHECK: %arg1:_(s32) = COPY $w0
216+
; CHECK-NEXT: %arg2:_(s32) = COPY $w0
217+
; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
218+
; CHECK-NEXT: %large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
219+
; CHECK-NEXT: $q0 = COPY %large(<2 x s64>)
220+
; CHECK-NEXT: $d0 = COPY %bv(<2 x s32>)
221+
%arg1:_(s32) = COPY $w0
222+
%arg2:_(s32) = COPY $w0
223+
%bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
224+
%large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
225+
$q0 = COPY %large(<2 x s64>)
226+
$d0 = COPY %bv(<2 x s32>)

llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ body: |
4949
; CHECK: liveins: $x0, $x1
5050
; CHECK-NEXT: {{ $}}
5151
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
52-
; CHECK-NEXT: %extract:_(s32) = G_TRUNC %arg1(s64)
53-
; CHECK-NEXT: %zext:_(s64) = G_ZEXT %extract(s32)
52+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
53+
; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[TRUNC]](s32)
5454
; CHECK-NEXT: $x0 = COPY %zext(s64)
5555
; CHECK-NEXT: RET_ReallyLR implicit $x0
5656
%arg1:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,11 @@ body: |
6060
; CHECK-NEXT: {{ $}}
6161
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
6262
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
63-
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
64-
; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
63+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
64+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
65+
; CHECK-NEXT: %trunc:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16), [[TRUNC1]](s16)
66+
; CHECK-NEXT: %zext:_(<4 x s32>) = G_ZEXT %trunc(<4 x s16>)
67+
; CHECK-NEXT: $q0 = COPY %zext(<4 x s32>)
6568
; CHECK-NEXT: RET_ReallyLR implicit $w0
6669
%0:_(s32) = COPY $w0
6770
%1:_(s32) = COPY $w1
@@ -165,8 +168,13 @@ body: |
165168
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
166169
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
167170
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
168-
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
169-
; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
171+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
172+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
173+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
174+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
175+
; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
176+
; CHECK-NEXT: %s:_(<4 x s32>) = G_SEXT %t(<4 x s16>)
177+
; CHECK-NEXT: $q0 = COPY %s(<4 x s32>)
170178
%0:_(s32) = COPY $w0
171179
%1:_(s32) = COPY $w1
172180
%2:_(s32) = COPY $w2
@@ -188,8 +196,11 @@ body: |
188196
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
189197
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
190198
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
191-
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
192-
; CHECK-NEXT: %t:_(<4 x s16>) = G_TRUNC %bv0(<4 x s32>)
199+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
200+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
201+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
202+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
203+
; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
193204
; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
194205
; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
195206
%0:_(s32) = COPY $w0
@@ -213,8 +224,11 @@ body: |
213224
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
214225
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
215226
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
216-
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
217-
; CHECK-NEXT: %t:_(<4 x s16>) = nsw G_TRUNC %bv0(<4 x s32>)
227+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
228+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
229+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
230+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
231+
; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
218232
; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
219233
; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
220234
%0:_(s32) = COPY $w0
@@ -238,8 +252,13 @@ body: |
238252
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
239253
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
240254
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
241-
; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
242-
; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
255+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
256+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
257+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
258+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
259+
; CHECK-NEXT: %t:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
260+
; CHECK-NEXT: %z:_(<4 x s32>) = G_ZEXT %t(<4 x s16>)
261+
; CHECK-NEXT: $q0 = COPY %z(<4 x s32>)
243262
%0:_(s32) = COPY $w0
244263
%1:_(s32) = COPY $w1
245264
%2:_(s32) = COPY $w2
@@ -259,8 +278,10 @@ body: |
259278
; CHECK-NEXT: {{ $}}
260279
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
261280
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
262-
; CHECK-NEXT: %bv0:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64)
263-
; CHECK-NEXT: %z:_(<2 x s32>) = nuw G_TRUNC %bv0(<2 x s64>)
281+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
282+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
283+
; CHECK-NEXT: %t:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
284+
; CHECK-NEXT: %z:_(<2 x s32>) = G_ZEXT %t(<2 x s16>)
264285
; CHECK-NEXT: $d0 = COPY %z(<2 x s32>)
265286
%0:_(s64) = COPY $x0
266287
%1:_(s64) = COPY $x1

0 commit comments

Comments
 (0)