Skip to content

Commit 4842f7c

Browse files
committed
use lookthrucopylike
1 parent 6fcbcc5 commit 4842f7c

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,10 +1819,13 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18191819
if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
18201820
return false;
18211821

1822+
if (!ClampSrc->getReg().isVirtual())
1823+
return false;
1824+
18221825
// Look through COPY. COPY only observed with True16.
1823-
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, ClampSrc->getReg());
1824-
MachineInstr *Def = MRI->getVRegDef(
1825-
DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
1826+
Register DefSrcReg = TRI->lookThruCopyLike(ClampSrc->getReg(), MRI);
1827+
MachineInstr *Def =
1828+
MRI->getVRegDef(DefSrcReg.isVirtual() ? DefSrcReg : ClampSrc->getReg());
18261829

18271830
// The type of clamp must be compatible.
18281831
if (TII->getClampMask(*Def) != TII->getClampMask(MI))

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,10 @@ body: |
108108
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
109109
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
110110
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
111-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
111+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
112112
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
113-
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
113+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
114+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
114115
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
115116
%0:vgpr_32 = COPY $vgpr2
116117
%1:vgpr_32 = COPY $vgpr1

0 commit comments

Comments
 (0)