Skip to content

Commit a7d130c

Browse files
committed
use lookthrucopylike
1 parent 50498a6 commit a7d130c

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,10 +1899,13 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18991899
if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
19001900
return false;
19011901

1902+
if (!ClampSrc->getReg().isVirtual())
1903+
return false;
1904+
19021905
// Look through COPY. COPY only observed with True16.
1903-
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, ClampSrc->getReg());
1904-
MachineInstr *Def = MRI->getVRegDef(
1905-
DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
1906+
Register DefSrcReg = TRI->lookThruCopyLike(ClampSrc->getReg(), MRI);
1907+
MachineInstr *Def =
1908+
MRI->getVRegDef(DefSrcReg.isVirtual() ? DefSrcReg : ClampSrc->getReg());
19061909

19071910
// The type of clamp must be compatible.
19081911
if (TII->getClampMask(*Def) != TII->getClampMask(MI))

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,10 @@ body: |
165165
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
166166
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
167167
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
168-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
168+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
169169
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
170-
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
170+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
171+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
171172
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
172173
%0:vgpr_32 = COPY $vgpr2
173174
%1:vgpr_32 = COPY $vgpr1

0 commit comments

Comments
 (0)