Skip to content

Commit 73056f2

Browse files
committed
[AArch64][GlobalISel] Simplify/nuke the merge/unmerge legalizer rules.
These rules were originally written when the new predicate based legalizer was introduced in an attempt to preserve existing behaviour. It wasn't properly kept up to date as things like vector support was split out into G_CONCAT_VECTORS, and frankly, even if it was, it was too complex. It's much easier to start from scratch with what we can actually support, which is just a few type combinations. Anything illegal we should either legalize, or should be eliminated as a side effect of artifact combination. Differential Revision: https://reviews.llvm.org/D107937
1 parent b062d63 commit 73056f2

File tree

4 files changed

+46
-83
lines changed

4 files changed

+46
-83
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 21 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
4545
const LLT s32 = LLT::scalar(32);
4646
const LLT s64 = LLT::scalar(64);
4747
const LLT s128 = LLT::scalar(128);
48-
const LLT s256 = LLT::scalar(256);
4948
const LLT v16s8 = LLT::fixed_vector(16, 8);
5049
const LLT v8s8 = LLT::fixed_vector(8, 8);
5150
const LLT v4s8 = LLT::fixed_vector(4, 8);
@@ -534,76 +533,30 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
534533
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
535534
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
536535
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
537-
538-
auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
539-
const LLT &Ty = Query.Types[TypeIdx];
540-
if (Ty.isVector()) {
541-
const LLT &EltTy = Ty.getElementType();
542-
if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
543-
return true;
544-
if (!isPowerOf2_32(EltTy.getSizeInBits()))
545-
return true;
546-
}
547-
return false;
548-
};
549-
550-
// FIXME: This rule is horrible, but specifies the same as what we had
551-
// before with the particularly strange definitions removed (e.g.
552-
// s8 = G_MERGE_VALUES s32, s32).
553-
// Part of the complexity comes from these ops being extremely flexible. For
554-
// example, you can build/decompose vectors with it, concatenate vectors,
555-
// etc. and in addition to this you can also bitcast with it at the same
556-
// time. We've been considering breaking it up into multiple ops to make it
557-
// more manageable throughout the backend.
558536
getActionDefinitionsBuilder(Op)
559-
// Break up vectors with weird elements into scalars
560-
.fewerElementsIf(
561-
[=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
562-
scalarize(0))
563-
.fewerElementsIf(
564-
[=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
565-
scalarize(1))
566-
// Clamp the big scalar to s8-s128 and make it a power of 2.
567-
.clampScalar(BigTyIdx, s8, s128)
568-
.widenScalarIf(
569-
[=](const LegalityQuery &Query) {
570-
const LLT &Ty = Query.Types[BigTyIdx];
571-
return !isPowerOf2_32(Ty.getSizeInBits()) &&
572-
Ty.getSizeInBits() % 64 != 0 && Ty.isScalar();
573-
},
574-
[=](const LegalityQuery &Query) {
575-
// Pick the next power of 2, or a multiple of 64 over 128.
576-
// Whichever is smaller.
577-
const LLT &Ty = Query.Types[BigTyIdx];
578-
unsigned NewSizeInBits = 1
579-
<< Log2_32_Ceil(Ty.getSizeInBits() + 1);
580-
if (NewSizeInBits >= 256) {
581-
unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
582-
if (RoundedTo < NewSizeInBits)
583-
NewSizeInBits = RoundedTo;
584-
}
585-
return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
586-
})
587-
// Clamp the little scalar to s8-s256 and make it a power of 2. It's not
588-
// worth considering the multiples of 64 since 2*192 and 2*384 are not
589-
// valid.
590-
.clampScalar(LitTyIdx, s8, s256)
591-
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
592-
// So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
593-
// s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
594-
// At this point it's simple enough to accept the legal types.
595-
.legalIf([=](const LegalityQuery &Query) {
596-
const LLT &BigTy = Query.Types[BigTyIdx];
597-
const LLT &LitTy = Query.Types[LitTyIdx];
598-
if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
537+
.widenScalarToNextPow2(LitTyIdx, 8)
538+
.widenScalarToNextPow2(BigTyIdx, 32)
539+
.clampScalar(LitTyIdx, s8, s64)
540+
.clampScalar(BigTyIdx, s32, s128)
541+
.legalIf([=](const LegalityQuery &Q) {
542+
switch (Q.Types[BigTyIdx].getSizeInBits()) {
543+
case 32:
544+
case 64:
545+
case 128:
546+
break;
547+
default:
599548
return false;
600-
if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
549+
}
550+
switch (Q.Types[LitTyIdx].getSizeInBits()) {
551+
case 8:
552+
case 16:
553+
case 32:
554+
case 64:
555+
return true;
556+
default:
601557
return false;
602-
return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
603-
})
604-
// Any vectors left are the wrong size. Scalarize them.
605-
.scalarize(0)
606-
.scalarize(1);
558+
}
559+
});
607560
}
608561

609562
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)

llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,6 @@ end:
5151
br label %block
5252
}
5353

54-
; Currently can't handle vector lengths that aren't an exact multiple of
55-
; natively supported vector lengths. Test that the fall-back works for those.
56-
; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements
57-
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %47:_(<14 x s64>) = G_CONCAT_VECTORS %41:_(<2 x s64>), %42:_(<2 x s64>), %43:_(<2 x s64>), %44:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>) (in function: nonpow2_vector_add_fewerelements)
58-
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements
59-
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements:
60-
define void @nonpow2_vector_add_fewerelements() {
61-
%dummy = add <7 x i64> undef, undef
62-
%ex = extractelement <7 x i64> %dummy, i64 0
63-
store i64 %ex, i64* undef
64-
ret void
65-
}
66-
6754
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: RET_ReallyLR implicit $x0 (in function: strict_align_feature)
6855
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for strict_align_feature
6956
; FALLBACK-WITH-REPORT-OUT-LABEL: strict_align_feature

llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -march=aarch64 -O0 -run-pass=legalizer %s -o - | FileCheck %s
2+
# RUN: llc -march=aarch64 -O0 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
33

44
---
55
name: test_merge_s4
@@ -26,3 +26,26 @@ body: |
2626
%4:_(s64) = G_ANYEXT %3
2727
$x0 = COPY %4
2828
...
29+
---
30+
name: test_merge_s16_s8
31+
body: |
32+
bb.0:
33+
; CHECK-LABEL: name: test_merge_s16_s8
34+
; CHECK: %a:_(s32) = COPY $w0
35+
; CHECK: %b:_(s32) = COPY $w1
36+
; CHECK: %a_t:_(s8) = G_TRUNC %a(s32)
37+
; CHECK: %b_t:_(s8) = G_TRUNC %b(s32)
38+
; CHECK: %m:_(s16) = G_MERGE_VALUES %a_t(s8), %b_t(s8)
39+
; CHECK: %ext:_(s64) = G_ANYEXT %m(s16)
40+
; CHECK: $x0 = COPY %ext(s64)
41+
42+
; This isn't legal but we don't support widening the destination type.
43+
%a:_(s32) = COPY $w0
44+
%b:_(s32) = COPY $w1
45+
%a_t:_(s8) = G_TRUNC %a
46+
%b_t:_(s8) = G_TRUNC %b
47+
48+
%m:_(s16) = G_MERGE_VALUES %a_t, %b_t
49+
%ext:_(s64) = G_ANYEXT %m
50+
$x0 = COPY %ext
51+
...

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,9 @@ body: |
262262
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
263263
; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C1]](s64)
264264
; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C3]](s64)
265+
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
265266
; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64)
266267
; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64)
267-
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
268268
; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64)
269269
; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
270270
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C5]](s64)

0 commit comments

Comments
 (0)