Skip to content

Commit cec89d2

Browse files
committed
true16 for fold clamp
1 parent a955426 commit cec89d2

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,10 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
16841684
return false;
16851685

16861686
MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1687+
MachineInstr *OrigDef = Def;
1688+
// Look through COPY. COPY only observed with True16.
1689+
if (Def->isCopy() && Def->getOperand(1).getReg().isVirtual())
1690+
Def = MRI->getVRegDef(Def->getOperand(1).getReg());
16871691

16881692
// The type of clamp must be compatible.
16891693
if (TII->getClampMask(*Def) != TII->getClampMask(MI))
@@ -1701,7 +1705,7 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
17011705
// Clamp is applied after omod, so it is OK if omod is set.
17021706
DefClamp->setImm(1);
17031707

1704-
Register DefReg = Def->getOperand(0).getReg();
1708+
Register DefReg = OrigDef->getOperand(0).getReg();
17051709
Register MIDstReg = MI.getOperand(0).getReg();
17061710
if (TRI->isSGPRReg(*MRI, DefReg)) {
17071711
// Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*

0 commit comments

Comments
 (0)