Skip to content

Commit dca4dfc

Browse files
committed
[AArch64][GISel] Separate legalize actions for G_FREEZE from G_IMPLICIT_DEF
It does not make sense to scalarize G_FREEZE as it leads to the generation of pairs of G_UNMERGE_VALUES and G_BUILD_VECTORs which are difficult to optimize especially when operations like G_TRUNC operate before G_FREEZE but after G_UNMERGE_VALUES. Instead, it is better to legalize G_FREEZE like any other vector type would be, as it gets lowered to a COPY during instruction selection anyways. This is an issue that was encountered when looking at the TSVC benchmark, where the legalization of G_FREEZE would cause generation of unnecessary MOVs that adversely affected the performance.
1 parent f42a3fe commit dca4dfc

File tree

3 files changed

+41
-47
lines changed

3 files changed

+41
-47
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
8787
const bool HasCSSC = ST.hasCSSC();
8888
const bool HasRCPC3 = ST.hasRCPC3();
8989

90-
getActionDefinitionsBuilder(
91-
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
90+
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
9291
.legalFor({p0, s8, s16, s32, s64})
9392
.legalFor(PackedVectorAllTypeList)
9493
.widenScalarToNextPow2(0)
@@ -106,6 +105,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
106105
return std::make_pair(0, EltTy);
107106
});
108107

108+
getActionDefinitionsBuilder(G_FREEZE)
109+
.legalFor({p0, s8, s16, s32, s64})
110+
.legalFor(PackedVectorAllTypeList)
111+
.widenScalarToNextPow2(0)
112+
.clampScalar(0, s8, s64)
113+
.moreElementsToNextPow2(0)
114+
.widenVectorEltsToVectorMinSize(0, 64)
115+
.clampNumElements(0, v8s8, v16s8)
116+
.clampNumElements(0, v4s16, v8s16)
117+
.clampNumElements(0, v2s32, v4s32)
118+
.clampNumElements(0, v2s64, v2s64);
119+
109120
getActionDefinitionsBuilder(G_PHI)
110121
.legalFor({p0, s16, s32, s64})
111122
.legalFor(PackedVectorAllTypeList)

llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir

Lines changed: 26 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -140,25 +140,14 @@ body: |
140140
; CHECK: liveins: $q0
141141
; CHECK-NEXT: {{ $}}
142142
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
143-
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]]
144-
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]]
145-
; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]]
146-
; CHECK-NEXT: [[FREEZE3:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]]
147-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[FREEZE]](s8)
148-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[FREEZE1]](s8)
149-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[FREEZE2]](s8)
150-
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[FREEZE3]](s8)
151-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
152-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[DEF1]](s16), [[DEF1]](s16)
153-
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s16>)
154-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT4]](<4 x s32>)
155-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[ANYEXT3]](s16), [[DEF1]](s16), [[DEF1]](s16)
156-
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s16>)
157-
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT5]](<4 x s32>)
158-
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV2]](<2 x s32>)
143+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
144+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
145+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
146+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[UV]]
147+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[FREEZE]](<4 x s16>)
159148
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
160-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
161-
; CHECK-NEXT: %ext:_(<4 x s32>) = G_AND [[CONCAT_VECTORS]], [[BUILD_VECTOR2]]
149+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
150+
; CHECK-NEXT: %ext:_(<4 x s32>) = G_AND [[ANYEXT1]], [[BUILD_VECTOR1]]
162151
; CHECK-NEXT: $q0 = COPY %ext(<4 x s32>)
163152
%x:_(<4 x s1>) = G_IMPLICIT_DEF
164153
%freeze:_(<4 x s1>) = G_FREEZE %x
@@ -174,13 +163,19 @@ body: |
174163
; CHECK: liveins: $q0
175164
; CHECK-NEXT: {{ $}}
176165
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
177-
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]]
178-
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]]
179-
; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s8) = G_FREEZE [[DEF]]
166+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
167+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
168+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
169+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[UV]]
170+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
180171
; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
181-
; CHECK-NEXT: %ext0:_(s32) = G_ZEXT [[FREEZE]](s8)
182-
; CHECK-NEXT: %ext1:_(s32) = G_ZEXT [[FREEZE1]](s8)
183-
; CHECK-NEXT: %ext2:_(s32) = G_ZEXT [[FREEZE2]](s8)
172+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
173+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
174+
; CHECK-NEXT: %ext0:_(s32) = G_AND [[ANYEXT1]], [[C]]
175+
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
176+
; CHECK-NEXT: %ext1:_(s32) = G_AND [[ANYEXT2]], [[C]]
177+
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
178+
; CHECK-NEXT: %ext2:_(s32) = G_AND [[ANYEXT3]], [[C]]
184179
; CHECK-NEXT: %res:_(<4 x s32>) = G_BUILD_VECTOR %ext0(s32), %ext1(s32), %ext2(s32), %undef(s32)
185180
; CHECK-NEXT: $q0 = COPY %res(<4 x s32>)
186181
%x:_(<3 x s8>) = G_IMPLICIT_DEF
@@ -205,25 +200,14 @@ body: |
205200
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
206201
; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(olt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
207202
; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(<4 x s32>) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(ogt), [[COPY1]](<4 x s32>), [[COPY]]
208-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FCMP1]](<4 x s32>)
209-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
210-
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s8) = G_FREEZE [[TRUNC]]
211-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
212-
; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s8) = G_FREEZE [[TRUNC1]]
213-
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32)
214-
; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s8) = G_FREEZE [[TRUNC2]]
215-
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32)
216-
; CHECK-NEXT: [[FREEZE3:%[0-9]+]]:_(s8) = G_FREEZE [[TRUNC3]]
217-
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP]](<4 x s32>)
218-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
219-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[FREEZE]](s8), [[FREEZE1]](s8), [[FREEZE2]](s8), [[FREEZE3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
220-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<8 x s8>)
221-
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
222-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC4]], [[UV4]]
223-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[AND]](<4 x s16>)
203+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP1]](<4 x s32>)
204+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[TRUNC]]
205+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP]](<4 x s32>)
206+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC1]], [[FREEZE]]
207+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[AND]](<4 x s16>)
224208
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
225-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
226-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[ANYEXT1]], [[BUILD_VECTOR2]]
209+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
210+
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[ANYEXT]], [[BUILD_VECTOR1]]
227211
; CHECK-NEXT: $q0 = COPY [[AND1]](<4 x s32>)
228212
%1:_(<4 x s32>) = COPY $q0
229213
%2:_(<4 x s32>) = COPY $q1

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,8 @@
131131
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
132132
#
133133
# DEBUG-NEXT: G_FREEZE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
134-
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
135-
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
136-
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
134+
# DEBUG-NEXT: .. the first uncovered type index: {{[0-9]+}}, OK
135+
# DEBUG-NEXT: .. the first uncovered imm index: {{[0-9]+}}, OK
137136

138137
# DEBUG-NEXT: G_CONSTANT_FOLD_BARRIER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
139138
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}

0 commit comments

Comments
 (0)