Skip to content

Commit 0789fcf

Browse files
committed
address comment
1 parent d17eb59 commit 0789fcf

File tree

2 files changed

+96
-6
lines changed

2 files changed

+96
-6
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,11 +1819,13 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18191819
if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
18201820
return false;
18211821

1822-
MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1823-
MachineInstr *OrigDef = Def;
18241822
// Look through COPY. COPY only observed with True16.
1825-
if (Def->isCopy() && Def->getOperand(1).getReg().isVirtual())
1826-
Def = MRI->getVRegDef(Def->getOperand(1).getReg());
1823+
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, ClampSrc->getReg());
1824+
MachineInstr *Def = nullptr;
1825+
if (DefSrc && DefSrc->isReg() && !DefSrc->isImm())
1826+
Def = MRI->getVRegDef(DefSrc->getReg());
1827+
else
1828+
Def = MRI->getVRegDef(ClampSrc->getReg());
18271829

18281830
// The type of clamp must be compatible.
18291831
if (TII->getClampMask(*Def) != TII->getClampMask(MI))
@@ -1841,7 +1843,7 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18411843
// Clamp is applied after omod, so it is OK if omod is set.
18421844
DefClamp->setImm(1);
18431845

1844-
Register DefReg = OrigDef->getOperand(0).getReg();
1846+
Register DefReg = Def->getOperand(0).getReg();
18451847
Register MIDstReg = MI.getOperand(0).getReg();
18461848
if (TRI->isSGPRReg(*MRI, DefReg)) {
18471849
// Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ body: |
1818
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
1919
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
2020
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
21-
; CHECK-NEXT: $vgpr0 = COPY [[COPY4]]
21+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
2222
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
2323
%10:vgpr_32 = COPY $vgpr2
2424
%9:vgpr_32 = COPY $vgpr1
@@ -31,3 +31,91 @@ body: |
3131
$vgpr0 = COPY %14:vgpr_16
3232
S_ENDPGM 0, implicit $vgpr0
3333
...
34+
35+
---
36+
name: fold_16bit_subreg_clamp
37+
tracksRegLiveness: true
38+
registers:
39+
body: |
40+
bb.0.entry:
41+
liveins: $vgpr0, $vgpr1, $vgpr2
42+
; CHECK-LABEL: name: fold_16bit_subreg_clamp
43+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
44+
; CHECK-NEXT: {{ $}}
45+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
46+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
47+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
48+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
49+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
50+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY3]].lo16, 0, [[COPY3]].lo16, -1, 0, 0, implicit $mode, implicit $exec
51+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
52+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
53+
%10:vgpr_32 = COPY $vgpr2
54+
%9:vgpr_32 = COPY $vgpr1
55+
%8:vgpr_32 = COPY $vgpr0
56+
%12:sreg_32 = IMPLICIT_DEF
57+
%13:vgpr_32 = COPY %12:sreg_32
58+
%15:vgpr_16 = COPY %13.lo16:vgpr_32
59+
%14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
60+
$vgpr0 = COPY %14:vgpr_16
61+
S_ENDPGM 0, implicit $vgpr0
62+
...
63+
64+
---
65+
name: fold_16bit_phyreg_clamp
66+
tracksRegLiveness: true
67+
registers:
68+
body: |
69+
bb.0.entry:
70+
liveins: $vgpr0, $vgpr1, $vgpr2
71+
; CHECK-LABEL: name: fold_16bit_phyreg_clamp
72+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
75+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
76+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
77+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
78+
; CHECK-NEXT: $vgpr10_lo16 = COPY [[DEF]]
79+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_16 = COPY $vgpr10_lo16
80+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY3]], 0, [[COPY3]], -1, 0, 0, implicit $mode, implicit $exec
81+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
82+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
83+
%10:vgpr_32 = COPY $vgpr2
84+
%9:vgpr_32 = COPY $vgpr1
85+
%8:vgpr_32 = COPY $vgpr0
86+
%12:sreg_32 = IMPLICIT_DEF
87+
$vgpr10_lo16 = COPY %12:sreg_32
88+
%15:vgpr_16 = COPY $vgpr10_lo16
89+
%14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
90+
$vgpr0 = COPY %14:vgpr_16
91+
S_ENDPGM 0, implicit $vgpr0
92+
...
93+
94+
---
95+
name: fold_16bit_undef_clamp
96+
tracksRegLiveness: true
97+
registers:
98+
body: |
99+
bb.0.entry:
100+
liveins: $vgpr0, $vgpr1, $vgpr2
101+
; CHECK-LABEL: name: fold_16bit_undef_clamp
102+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
103+
; CHECK-NEXT: {{ $}}
104+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
105+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
106+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
107+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
108+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
109+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY3]].lo16, 0, [[COPY3]].lo16, -1, 0, 0, implicit $mode, implicit $exec
110+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
111+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
112+
%10:vgpr_32 = COPY $vgpr2
113+
%9:vgpr_32 = COPY $vgpr1
114+
%8:vgpr_32 = COPY $vgpr0
115+
%12:sreg_32 = IMPLICIT_DEF
116+
%13:vgpr_32 = COPY %12:sreg_32
117+
%15:vgpr_16 = COPY %13.lo16:vgpr_32
118+
%14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
119+
$vgpr0 = COPY %14:vgpr_16
120+
S_ENDPGM 0, implicit $vgpr0
121+
...

0 commit comments

Comments
 (0)