Skip to content

Commit ca2e053

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Legalize wide vector G_PHIs
Clamp the max number of elements when legalizing G_PHI. This allows us to legalize some common fallbacks like 4 x s64. Here's an example: https://godbolt.org/z/6YocsEYTd Had to add -global-isel-abort=0 to legalize-phi.mir to account for the G_EXTRACT_VECTOR_ELT from the 32 x s8 G_PHI. Differential Revision: https://reviews.llvm.org/D107508
1 parent 7df405e commit ca2e053

File tree

2 files changed

+234
-3
lines changed

2 files changed

+234
-3
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
9595
return std::make_pair(0, EltTy);
9696
});
9797

98-
getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
98+
getActionDefinitionsBuilder(G_PHI)
99+
.legalFor({p0, s16, s32, s64})
99100
.legalFor(PackedVectorAllTypeList)
100101
.widenScalarToNextPow2(0)
101-
.clampScalar(0, s16, s64);
102+
.clampScalar(0, s16, s64)
103+
// Maximum: sN * k = 128
104+
.clampMaxNumElements(0, s8, 16)
105+
.clampMaxNumElements(0, s16, 8)
106+
.clampMaxNumElements(0, s32, 4)
107+
.clampMaxNumElements(0, s64, 2)
108+
.clampMaxNumElements(0, p0, 2);
102109

103110
getActionDefinitionsBuilder(G_BSWAP)
104111
.legalFor({s32, s64, v4s32, v2s32, v2s64})

llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir

Lines changed: 225 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -O0 -mtriple=aarch64-unknown-unknown -verify-machineinstrs -global-isel-abort=1 -run-pass=legalizer %s -o - | FileCheck %s
2+
# RUN: llc -O0 -mtriple=aarch64-unknown-unknown -verify-machineinstrs -global-isel-abort=0 -run-pass=legalizer %s -o - | FileCheck %s
33
---
44
name: legalize_phi
55
alignment: 4
@@ -665,3 +665,227 @@ body: |
665665
%trunc:_(s64) = G_TRUNC %phi
666666
$x0 = COPY %trunc
667667
RET_ReallyLR implicit $x0
668+
...
669+
---
670+
name: v4s64
671+
alignment: 4
672+
tracksRegLiveness: true
673+
body: |
674+
; CHECK-LABEL: name: v4s64
675+
; CHECK: bb.0:
676+
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
677+
; CHECK: liveins: $x0, $x1
678+
; CHECK: %ptr1:_(p0) = COPY $x1
679+
; CHECK: %ptr2:_(p0) = COPY $x0
680+
; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
681+
; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr1(p0) :: (load (<2 x s64>), align 32)
682+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
683+
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr1, [[C]](s64)
684+
; CHECK: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16)
685+
; CHECK: G_BRCOND %cond(s1), %bb.2
686+
; CHECK: G_BR %bb.1
687+
; CHECK: bb.1:
688+
; CHECK: successors: %bb.2(0x80000000)
689+
; CHECK: [[LOAD2:%[0-9]+]]:_(<2 x s64>) = G_LOAD %ptr2(p0) :: (load (<2 x s64>), align 32)
690+
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
691+
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr2, [[C1]](s64)
692+
; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD1]](p0) :: (load (<2 x s64>) from unknown-address + 16)
693+
; CHECK: bb.2:
694+
; CHECK: [[PHI:%[0-9]+]]:_(<2 x s64>) = G_PHI [[LOAD2]](<2 x s64>), %bb.1, [[LOAD]](<2 x s64>), %bb.0
695+
; CHECK: [[PHI1:%[0-9]+]]:_(<2 x s64>) = G_PHI [[LOAD3]](<2 x s64>), %bb.1, [[LOAD1]](<2 x s64>), %bb.0
696+
; CHECK: $q0 = COPY [[PHI]](<2 x s64>)
697+
; CHECK: $q1 = COPY [[PHI1]](<2 x s64>)
698+
; CHECK: RET_ReallyLR implicit $q0, implicit $q1
699+
bb.0:
700+
successors: %bb.1(0x50000000), %bb.2(0x30000000)
701+
liveins: $x0, $x1
702+
703+
%ptr1:_(p0) = COPY $x1
704+
%ptr2:_(p0) = COPY $x0
705+
%cond:_(s1) = G_IMPLICIT_DEF
706+
%val_1:_(<4 x s64>) = G_LOAD %ptr1(p0) :: (load (<4 x s64>))
707+
G_BRCOND %cond(s1), %bb.2
708+
G_BR %bb.1
709+
bb.1:
710+
%val_2:_(<4 x s64>) = G_LOAD %ptr2(p0) :: (load (<4 x s64>))
711+
bb.2:
712+
%phi:_(<4 x s64>) = G_PHI %val_2(<4 x s64>), %bb.1, %val_1(<4 x s64>), %bb.0
713+
%unmerge_1:_(<2 x s64>), %unmerge_2:_(<2 x s64>) = G_UNMERGE_VALUES %phi(<4 x s64>)
714+
$q0 = COPY %unmerge_1(<2 x s64>)
715+
$q1 = COPY %unmerge_2(<2 x s64>)
716+
RET_ReallyLR implicit $q0, implicit $q1
717+
...
718+
---
719+
name: v8s32
720+
alignment: 4
721+
tracksRegLiveness: true
722+
body: |
723+
; CHECK-LABEL: name: v8s32
724+
; CHECK: bb.0:
725+
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
726+
; CHECK: liveins: $x0, $x1
727+
; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
728+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
729+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
730+
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
731+
; CHECK: G_BRCOND %cond(s1), %bb.2
732+
; CHECK: G_BR %bb.1
733+
; CHECK: bb.1:
734+
; CHECK: successors: %bb.2(0x80000000)
735+
; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
736+
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32)
737+
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32)
738+
; CHECK: bb.2:
739+
; CHECK: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[BUILD_VECTOR2]](<4 x s32>), %bb.1, [[BUILD_VECTOR]](<4 x s32>), %bb.0
740+
; CHECK: [[PHI1:%[0-9]+]]:_(<4 x s32>) = G_PHI [[BUILD_VECTOR3]](<4 x s32>), %bb.1, [[BUILD_VECTOR1]](<4 x s32>), %bb.0
741+
; CHECK: %one:_(s32) = G_CONSTANT i32 1
742+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
743+
; CHECK: %extract:_(s32) = G_EXTRACT_VECTOR_ELT [[PHI]](<4 x s32>), [[C]](s64)
744+
; CHECK: $w0 = COPY %extract(s32)
745+
; CHECK: RET_ReallyLR implicit $w0
746+
bb.0:
747+
successors: %bb.1(0x50000000), %bb.2(0x30000000)
748+
liveins: $x0, $x1
749+
%cond:_(s1) = G_IMPLICIT_DEF
750+
%val_1:_(<8 x s32>) = G_IMPLICIT_DEF
751+
G_BRCOND %cond(s1), %bb.2
752+
G_BR %bb.1
753+
bb.1:
754+
%val_2:_(<8 x s32>) = G_IMPLICIT_DEF
755+
bb.2:
756+
%phi:_(<8 x s32>) = G_PHI %val_2(<8 x s32>), %bb.1, %val_1(<8 x s32>), %bb.0
757+
%one:_(s32) = G_CONSTANT i32 1
758+
%extract:_(s32) = G_EXTRACT_VECTOR_ELT %phi(<8 x s32>), %one(s32)
759+
$w0 = COPY %extract
760+
RET_ReallyLR implicit $w0
761+
...
762+
---
763+
name: v16s16
764+
alignment: 4
765+
tracksRegLiveness: true
766+
body: |
767+
; CHECK-LABEL: name: v16s16
768+
; CHECK: bb.0:
769+
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
770+
; CHECK: liveins: $x0, $x1
771+
; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
772+
; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
773+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
774+
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
775+
; CHECK: G_BRCOND %cond(s1), %bb.2
776+
; CHECK: G_BR %bb.1
777+
; CHECK: bb.1:
778+
; CHECK: successors: %bb.2(0x80000000)
779+
; CHECK: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
780+
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16)
781+
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16), [[DEF1]](s16)
782+
; CHECK: bb.2:
783+
; CHECK: [[PHI:%[0-9]+]]:_(<8 x s16>) = G_PHI [[BUILD_VECTOR2]](<8 x s16>), %bb.1, [[BUILD_VECTOR]](<8 x s16>), %bb.0
784+
; CHECK: [[PHI1:%[0-9]+]]:_(<8 x s16>) = G_PHI [[BUILD_VECTOR3]](<8 x s16>), %bb.1, [[BUILD_VECTOR1]](<8 x s16>), %bb.0
785+
; CHECK: %one:_(s16) = G_CONSTANT i16 1
786+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
787+
; CHECK: %extract:_(s16) = G_EXTRACT_VECTOR_ELT [[PHI]](<8 x s16>), [[C]](s64)
788+
; CHECK: $h0 = COPY %extract(s16)
789+
; CHECK: RET_ReallyLR implicit $h0
790+
bb.0:
791+
successors: %bb.1(0x50000000), %bb.2(0x30000000)
792+
liveins: $x0, $x1
793+
%cond:_(s1) = G_IMPLICIT_DEF
794+
%val_1:_(<16 x s16>) = G_IMPLICIT_DEF
795+
G_BRCOND %cond(s1), %bb.2
796+
G_BR %bb.1
797+
bb.1:
798+
%val_2:_(<16 x s16>) = G_IMPLICIT_DEF
799+
bb.2:
800+
%phi:_(<16 x s16>) = G_PHI %val_2(<16 x s16>), %bb.1, %val_1(<16 x s16>), %bb.0
801+
%one:_(s16) = G_CONSTANT i16 1
802+
%extract:_(s16) = G_EXTRACT_VECTOR_ELT %phi(<16 x s16>), %one(s16)
803+
$h0 = COPY %extract
804+
RET_ReallyLR implicit $h0
805+
...
806+
---
807+
name: v32s8
808+
alignment: 4
809+
tracksRegLiveness: true
810+
body: |
811+
; CHECK-LABEL: name: v32s8
812+
; CHECK: bb.0:
813+
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
814+
; CHECK: liveins: $x0, $x1
815+
; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
816+
; CHECK: %val_1:_(<32 x s8>) = G_IMPLICIT_DEF
817+
; CHECK: G_BRCOND %cond(s1), %bb.2
818+
; CHECK: G_BR %bb.1
819+
; CHECK: bb.1:
820+
; CHECK: successors: %bb.2(0x80000000)
821+
; CHECK: %val_2:_(<32 x s8>) = G_IMPLICIT_DEF
822+
; CHECK: bb.2:
823+
; CHECK: %phi:_(<32 x s8>) = G_PHI %val_2(<32 x s8>), %bb.1, %val_1(<32 x s8>), %bb.0
824+
; CHECK: %one:_(s8) = G_CONSTANT i8 1
825+
; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %one(s8)
826+
; CHECK: %extract:_(s8) = G_EXTRACT_VECTOR_ELT %phi(<32 x s8>), [[SEXT]](s64)
827+
; CHECK: $b0 = COPY %extract(s8)
828+
; CHECK: RET_ReallyLR implicit $b0
829+
bb.0:
830+
successors: %bb.1(0x50000000), %bb.2(0x30000000)
831+
liveins: $x0, $x1
832+
%cond:_(s1) = G_IMPLICIT_DEF
833+
%val_1:_(<32 x s8>) = G_IMPLICIT_DEF
834+
G_BRCOND %cond(s1), %bb.2
835+
G_BR %bb.1
836+
bb.1:
837+
%val_2:_(<32 x s8>) = G_IMPLICIT_DEF
838+
bb.2:
839+
%phi:_(<32 x s8>) = G_PHI %val_2(<32 x s8>), %bb.1, %val_1(<32 x s8>), %bb.0
840+
%one:_(s8) = G_CONSTANT i8 1
841+
%extract:_(s8) = G_EXTRACT_VECTOR_ELT %phi(<32 x s8>), %one(s8)
842+
$b0 = COPY %extract
843+
RET_ReallyLR implicit $b0
844+
...
845+
---
846+
name: v4p0
847+
alignment: 4
848+
tracksRegLiveness: true
849+
body: |
850+
; CHECK-LABEL: name: v4p0
851+
; CHECK: bb.0:
852+
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
853+
; CHECK: liveins: $x0, $x1
854+
; CHECK: %ptr1:_(p0) = COPY $x1
855+
; CHECK: %ptr2:_(p0) = COPY $x0
856+
; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
857+
; CHECK: %val_1:_(<4 x p0>) = G_LOAD %ptr1(p0) :: (load (<4 x p0>))
858+
; CHECK: [[UV:%[0-9]+]]:_(<2 x p0>), [[UV1:%[0-9]+]]:_(<2 x p0>) = G_UNMERGE_VALUES %val_1(<4 x p0>)
859+
; CHECK: G_BRCOND %cond(s1), %bb.2
860+
; CHECK: G_BR %bb.1
861+
; CHECK: bb.1:
862+
; CHECK: successors: %bb.2(0x80000000)
863+
; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD %ptr2(p0) :: (load (<4 x s64>))
864+
; CHECK: %val_2:_(<4 x p0>) = G_BITCAST [[LOAD]](<4 x s64>)
865+
; CHECK: [[UV2:%[0-9]+]]:_(<2 x p0>), [[UV3:%[0-9]+]]:_(<2 x p0>) = G_UNMERGE_VALUES %val_2(<4 x p0>)
866+
; CHECK: bb.2:
867+
; CHECK: [[PHI:%[0-9]+]]:_(<2 x p0>) = G_PHI [[UV2]](<2 x p0>), %bb.1, [[UV]](<2 x p0>), %bb.0
868+
; CHECK: [[PHI1:%[0-9]+]]:_(<2 x p0>) = G_PHI [[UV3]](<2 x p0>), %bb.1, [[UV1]](<2 x p0>), %bb.0
869+
; CHECK: %phi:_(<4 x p0>) = G_CONCAT_VECTORS [[PHI]](<2 x p0>), [[PHI1]](<2 x p0>)
870+
; CHECK: %unmerge_1:_(<2 x p0>), %unmerge_2:_(<2 x p0>) = G_UNMERGE_VALUES %phi(<4 x p0>)
871+
; CHECK: $q0 = COPY %unmerge_1(<2 x p0>)
872+
; CHECK: $q1 = COPY %unmerge_2(<2 x p0>)
873+
; CHECK: RET_ReallyLR implicit $q0, implicit $q1
874+
bb.0:
875+
successors: %bb.1(0x50000000), %bb.2(0x30000000)
876+
liveins: $x0, $x1
877+
878+
%ptr1:_(p0) = COPY $x1
879+
%ptr2:_(p0) = COPY $x0
880+
%cond:_(s1) = G_IMPLICIT_DEF
881+
%val_1:_(<4 x p0>) = G_LOAD %ptr1(p0) :: (load (<4 x p0>))
882+
G_BRCOND %cond(s1), %bb.2
883+
G_BR %bb.1
884+
bb.1:
885+
%val_2:_(<4 x p0>) = G_LOAD %ptr2(p0) :: (load (<4 x p0>))
886+
bb.2:
887+
%phi:_(<4 x p0>) = G_PHI %val_2(<4 x p0>), %bb.1, %val_1(<4 x p0>), %bb.0
888+
%unmerge_1:_(<2 x p0>), %unmerge_2:_(<2 x p0>) = G_UNMERGE_VALUES %phi(<4 x p0>)
889+
$q0 = COPY %unmerge_1(<2 x p0>)
890+
$q1 = COPY %unmerge_2(<2 x p0>)
891+
RET_ReallyLR implicit $q0, implicit $q1

0 commit comments

Comments
 (0)