Skip to content

Commit 19d7ab1

Browse files
committed
[GlobalISel] Handle sequences of trunc(sext/zext/anyext...) in artifact combiner
trunc(sext/zext/anyext... x) -> x pattern is handled in artifact combiner to avoid extra copy instructions in https://reviews.llvm.org/D156831.
1 parent 6b5ce2c commit 19d7ab1

21 files changed

+446
-485
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/CodeGen/GlobalISel/Utils.h"
2525
#include "llvm/CodeGen/MachineRegisterInfo.h"
2626
#include "llvm/CodeGen/Register.h"
27+
#include "llvm/CodeGen/TargetOpcodes.h"
2728
#include "llvm/IR/Constants.h"
2829
#include "llvm/Support/Debug.h"
2930

@@ -235,12 +236,12 @@ class LegalizationArtifactCombiner {
235236

236237
Builder.setInstr(MI);
237238
Register DstReg = MI.getOperand(0).getReg();
239+
const LLT DstTy = MRI.getType(DstReg);
238240
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
239241

240242
// Try to fold trunc(g_constant) when the smaller constant type is legal.
241243
auto *SrcMI = MRI.getVRegDef(SrcReg);
242244
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
243-
const LLT DstTy = MRI.getType(DstReg);
244245
if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
245246
auto &CstVal = SrcMI->getOperand(1);
246247
Builder.buildConstant(
@@ -256,7 +257,6 @@ class LegalizationArtifactCombiner {
256257
if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) {
257258
const Register MergeSrcReg = SrcMerge->getSourceReg(0);
258259
const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
259-
const LLT DstTy = MRI.getType(DstReg);
260260

261261
// We can only fold if the types are scalar
262262
const unsigned DstSize = DstTy.getSizeInBits();
@@ -325,6 +325,23 @@ class LegalizationArtifactCombiner {
325325
return true;
326326
}
327327

328+
// trunc(ext x) -> x
329+
ArtifactValueFinder Finder(MRI, Builder, LI);
330+
if (Register FoundReg =
331+
Finder.findValueFromDef(DstReg, 0, DstTy.getSizeInBits())) {
332+
LLT FoundRegTy = MRI.getType(FoundReg);
333+
if (DstTy == FoundRegTy) {
334+
LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): "
335+
<< MI;);
336+
337+
replaceRegOrBuildCopy(DstReg, FoundReg, MRI, Builder, UpdatedDefs,
338+
Observer);
339+
UpdatedDefs.push_back(DstReg);
340+
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
341+
return true;
342+
}
343+
}
344+
328345
return false;
329346
}
330347

@@ -719,6 +736,55 @@ class LegalizationArtifactCombiner {
719736
return Register();
720737
}
721738

739+
/// Given an G_SEXT, G_ZEXT, G_ANYEXT op \p MI and a start bit and
740+
/// size, try to find the origin of the value defined by that start
741+
/// position and size.
742+
///
743+
/// \returns a register with the requested size, or the current best
744+
/// register found during the current query.
745+
Register findValueFromExt(MachineInstr &MI, unsigned StartBit,
746+
unsigned Size) {
747+
assert(MI.getOpcode() == TargetOpcode::G_SEXT ||
748+
MI.getOpcode() == TargetOpcode::G_ZEXT ||
749+
MI.getOpcode() == TargetOpcode::G_ANYEXT);
750+
assert(Size > 0);
751+
752+
Register SrcReg = MI.getOperand(1).getReg();
753+
LLT SrcType = MRI.getType(SrcReg);
754+
unsigned SrcSize = SrcType.getSizeInBits();
755+
756+
// Currently we don't go into vectors.
757+
if (!SrcType.isScalar())
758+
return CurrentBest;
759+
760+
if (StartBit + Size > SrcSize)
761+
return CurrentBest;
762+
763+
if (StartBit == 0 && SrcType.getSizeInBits() == Size)
764+
CurrentBest = SrcReg;
765+
return findValueFromDefImpl(SrcReg, StartBit, Size);
766+
}
767+
768+
/// Given an G_TRUNC op \p MI and a start bit and size, try to find
769+
/// the origin of the value defined by that start position and size.
770+
///
771+
/// \returns a register with the requested size, or the current best
772+
/// register found during the current query.
773+
Register findValueFromTrunc(MachineInstr &MI, unsigned StartBit,
774+
unsigned Size) {
775+
assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
776+
assert(Size > 0);
777+
778+
Register SrcReg = MI.getOperand(1).getReg();
779+
LLT SrcType = MRI.getType(SrcReg);
780+
781+
// Currently we don't go into vectors.
782+
if (!SrcType.isScalar())
783+
return CurrentBest;
784+
785+
return findValueFromDefImpl(SrcReg, StartBit, Size);
786+
}
787+
722788
/// Internal implementation for findValueFromDef(). findValueFromDef()
723789
/// initializes some data like the CurrentBest register, which this method
724790
/// and its callees rely upon.
@@ -759,6 +825,12 @@ class LegalizationArtifactCombiner {
759825
Size);
760826
case TargetOpcode::G_INSERT:
761827
return findValueFromInsert(*Def, StartBit, Size);
828+
case TargetOpcode::G_TRUNC:
829+
return findValueFromTrunc(*Def, StartBit, Size);
830+
case TargetOpcode::G_SEXT:
831+
case TargetOpcode::G_ZEXT:
832+
case TargetOpcode::G_ANYEXT:
833+
return findValueFromExt(*Def, StartBit, Size);
762834
default:
763835
return CurrentBest;
764836
}

llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-trunc.mir

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,66 @@ body: |
145145
%2:_(s32) = G_TRUNC %1
146146
$vgpr0 = COPY %2
147147
...
148+
149+
---
150+
name: trunc_sext
151+
152+
body: |
153+
bb.0:
154+
; Test that trunc(sext) is replaced with sext source.
155+
; CHECK-LABEL: name: trunc_sext
156+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
157+
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
158+
%0:_(s32) = G_IMPLICIT_DEF
159+
%1:_(s64) = G_SEXT %0
160+
%2:_(s32) = G_TRUNC %1
161+
$vgpr0 = COPY %2
162+
...
163+
164+
---
165+
name: trunc_zext
166+
167+
body: |
168+
bb.0:
169+
; Test that trunc(zext) is replaced with zext source.
170+
; CHECK-LABEL: name: trunc_zext
171+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
172+
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
173+
%0:_(s32) = G_IMPLICIT_DEF
174+
%1:_(s64) = G_ZEXT %0
175+
%2:_(s32) = G_TRUNC %1
176+
$vgpr0 = COPY %2
177+
...
178+
179+
---
180+
name: trunc_anyext
181+
182+
body: |
183+
bb.0:
184+
; Test that trunc(anyext) is replaced with anyext source.
185+
; CHECK-LABEL: name: trunc_anyext
186+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
187+
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
188+
%0:_(s32) = G_IMPLICIT_DEF
189+
%1:_(s64) = G_ANYEXT %0
190+
%2:_(s32) = G_TRUNC %1
191+
$vgpr0 = COPY %2
192+
...
193+
194+
---
195+
name: trunc_ext
196+
197+
body: |
198+
bb.0:
199+
; Test that trunc(sext (trunc (...))) is replaced with source.
200+
; CHECK-LABEL: name: trunc_ext
201+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
202+
; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
203+
%0:_(s32) = G_IMPLICIT_DEF
204+
%1:_(s64) = G_SEXT %0
205+
%2:_(s32) = G_TRUNC %1
206+
%3:_(s128) = G_ZEXT %2
207+
%4:_(s64) = G_TRUNC %3
208+
%5:_(s32) = G_TRUNC %4
209+
$vgpr0 = COPY %5
210+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -457,9 +457,9 @@ body: |
457457
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
458458
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32)
459459
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
460-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
460+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
461461
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
462-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
462+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
463463
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
464464
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
465465
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
@@ -1495,13 +1495,15 @@ body: |
14951495
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
14961496
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]]
14971497
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]]
1498-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
1499-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
1500-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
1501-
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT1]](s64)
1502-
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
1498+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1)
1499+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1)
1500+
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP]](s1)
1501+
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[ICMP1]](s1)
1502+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT2]](s64)
1503+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT3]](s64)
1504+
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
15031505
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
1504-
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
1506+
; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT1]](s32)
15051507
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
15061508
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
15071509
%0:_(s32) = COPY $vgpr0

0 commit comments

Comments
 (0)