Skip to content

Commit 1cb12fa

Browse files
authored
[GlobalISel] Combine unmerge(unmerge()) if the result is legal. (#109606)
This attempts to fold: ``` %1:_(<2 x s32>), %2:_(<2 x s32>) = G_UNMERGE_VALUES %0:_(<4 x s32>) %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %1 ``` Into a single UNMERGE: ``` %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %0 ``` This transform already exists, this patch alters it to occur when the result UNMERGE is considered legal. It does not try to transform where the result would be extracting a subelement from a vector at the moment, as the code is not setup to handle it. ``` %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(<2 x s32>) %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 ``` This helps us reduce the amount of legalization artefacts, especially from widened vectors padded with undef.
1 parent c2fd3b7 commit 1cb12fa

21 files changed

+439
-562
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,10 @@ class LegalizationArtifactCombiner {
10901090
LegalizeActionStep ActionStep = LI.getAction(
10911091
{TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
10921092
switch (ActionStep.Action) {
1093+
case LegalizeActions::Legal:
1094+
if (!OpTy.isVector() || !LI.isLegal({TargetOpcode::G_UNMERGE_VALUES,
1095+
{DestTy, SrcUnmergeSrcTy}}))
1096+
return false;
10931097
case LegalizeActions::Lower:
10941098
case LegalizeActions::Unsupported:
10951099
break;

llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -412,12 +412,11 @@ body: |
412412
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
413413
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
414414
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]]
415-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>)
416-
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<4 x s8>)
415+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>)
417416
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
418-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
419-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
420-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
417+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
418+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
419+
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
421420
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
422421
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32)
423422
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64)

llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -307,29 +307,24 @@ body: |
307307
; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16)
308308
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
309309
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef)
310-
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s8>), [[UV9:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>)
311310
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
312-
; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>)
313-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV10]](s8), [[UV11]](s8), [[UV12]](s8), [[UV13]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
314-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
315-
; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<4 x s16>), [[UV15:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
316-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
317-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
318-
; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<4 x s16>), [[UV17:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
319-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV14]], [[UV16]]
311+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>)
312+
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
313+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
314+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>)
315+
; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
316+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV8]], [[UV10]]
320317
; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
321-
; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV8]](<4 x s8>)
322-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV18]](s8), [[UV19]](s8), [[UV20]](s8), [[UV21]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
323-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
324-
; CHECK-NEXT: [[UV22:%[0-9]+]]:_(<4 x s16>), [[UV23:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
325-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV22]]
318+
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>)
319+
; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
320+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV12]]
326321
; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
327322
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC10]], [[XOR]]
328323
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
329324
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>)
330325
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
331-
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
332-
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR5]]
326+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
327+
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR3]]
333328
; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
334329
; CHECK-NEXT: RET_ReallyLR implicit $q0
335330
%w0:_(s32) = COPY $w0

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,6 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){
179179
; CHECK-GI: // %bb.0: // %entry
180180
; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
181181
; CHECK-GI-NEXT: rev16 v0.8b, v0.8b
182-
; CHECK-GI-NEXT: mov h1, v0.h[1]
183-
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
184182
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
185183
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
186184
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,6 @@ define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec
1818
; CHECK-GI: // %bb.0:
1919
; CHECK-GI-NEXT: uzp1 v2.4h, v0.4h, v0.4h
2020
; CHECK-GI-NEXT: uzp2 v1.4h, v0.4h, v0.4h
21-
; CHECK-GI-NEXT: mov h0, v2.h[1]
22-
; CHECK-GI-NEXT: mov h3, v1.h[1]
23-
; CHECK-GI-NEXT: mov v2.h[1], v0.h[0]
24-
; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
25-
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1
2621
; CHECK-GI-NEXT: fmov d0, d2
2722
; CHECK-GI-NEXT: ret
2823
%retval = call {<2 x half>, <2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half> %vec)

llvm/test/CodeGen/AArch64/fpext.ll

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -376,36 +376,27 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) {
376376
; CHECK-GI-LABEL: fpext_v4f16_v4f64:
377377
; CHECK-GI: // %bb.0: // %entry
378378
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
379-
; CHECK-GI-NEXT: mov s1, v0.s[1]
380-
; CHECK-GI-NEXT: mov h2, v0.h[1]
379+
; CHECK-GI-NEXT: mov h1, v0.h[1]
380+
; CHECK-GI-NEXT: mov h2, v0.h[2]
381+
; CHECK-GI-NEXT: mov h3, v0.h[3]
381382
; CHECK-GI-NEXT: fcvt d0, h0
382-
; CHECK-GI-NEXT: mov h3, v1.h[1]
383-
; CHECK-GI-NEXT: fcvt d2, h2
384-
; CHECK-GI-NEXT: fcvt d1, h1
385-
; CHECK-GI-NEXT: fcvt d3, h3
386-
; CHECK-GI-NEXT: mov v0.d[1], v2.d[0]
387-
; CHECK-GI-NEXT: mov v1.d[1], v3.d[0]
383+
; CHECK-GI-NEXT: fcvt d4, h1
384+
; CHECK-GI-NEXT: fcvt d1, h2
385+
; CHECK-GI-NEXT: fcvt d2, h3
386+
; CHECK-GI-NEXT: mov v0.d[1], v4.d[0]
387+
; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
388388
; CHECK-GI-NEXT: ret
389389
entry:
390390
%c = fpext <4 x half> %a to <4 x double>
391391
ret <4 x double> %c
392392
}
393393

394394
define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) {
395-
; CHECK-SD-LABEL: fpext_v2f16_v2f32:
396-
; CHECK-SD: // %bb.0: // %entry
397-
; CHECK-SD-NEXT: fcvtl v0.4s, v0.4h
398-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
399-
; CHECK-SD-NEXT: ret
400-
;
401-
; CHECK-GI-LABEL: fpext_v2f16_v2f32:
402-
; CHECK-GI: // %bb.0: // %entry
403-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
404-
; CHECK-GI-NEXT: mov h1, v0.h[1]
405-
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
406-
; CHECK-GI-NEXT: fcvtl v0.4s, v0.4h
407-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
408-
; CHECK-GI-NEXT: ret
395+
; CHECK-LABEL: fpext_v2f16_v2f32:
396+
; CHECK: // %bb.0: // %entry
397+
; CHECK-NEXT: fcvtl v0.4s, v0.4h
398+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
399+
; CHECK-NEXT: ret
409400
entry:
410401
%c = fpext <2 x half> %a to <2 x float>
411402
ret <2 x float> %c

0 commit comments

Comments
 (0)