Skip to content

Commit 68325c5

Browse files
committed
address comment
1 parent d17eb59 commit 68325c5

File tree

2 files changed

+96
-6
lines changed

2 files changed

+96
-6
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,11 +1819,13 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18191819
if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
18201820
return false;
18211821

1822-
MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1823-
MachineInstr *OrigDef = Def;
18241822
// Look through COPY. COPY only observed with True16.
1825-
if (Def->isCopy() && Def->getOperand(1).getReg().isVirtual())
1826-
Def = MRI->getVRegDef(Def->getOperand(1).getReg());
1823+
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, ClampSrc->getReg());
1824+
MachineInstr *Def = nullptr;
1825+
if (DefSrc && DefSrc->isReg() && !DefSrc->isImm())
1826+
Def = MRI->getVRegDef(DefSrc->getReg());
1827+
else
1828+
Def = MRI->getVRegDef(ClampSrc->getReg());
18271829

18281830
// The type of clamp must be compatible.
18291831
if (TII->getClampMask(*Def) != TII->getClampMask(MI))
@@ -1841,7 +1843,7 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18411843
// Clamp is applied after omod, so it is OK if omod is set.
18421844
DefClamp->setImm(1);
18431845

1844-
Register DefReg = OrigDef->getOperand(0).getReg();
1846+
Register DefReg = Def->getOperand(0).getReg();
18451847
Register MIDstReg = MI.getOperand(0).getReg();
18461848
if (TRI->isSGPRReg(*MRI, DefReg)) {
18471849
// Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ body: |
1818
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
1919
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
2020
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
21-
; CHECK-NEXT: $vgpr0 = COPY [[COPY4]]
21+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
2222
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
2323
%10:vgpr_32 = COPY $vgpr2
2424
%9:vgpr_32 = COPY $vgpr1
@@ -31,3 +31,91 @@ body: |
3131
$vgpr0 = COPY %14:vgpr_16
3232
S_ENDPGM 0, implicit $vgpr0
3333
...
34+
35+
---
36+
name: fold_16bit_subreg_clamp
37+
tracksRegLiveness: true
38+
registers:
39+
body: |
40+
bb.0.entry:
41+
liveins: $vgpr0, $vgpr1, $vgpr2
42+
; CHECK-LABEL: name: fold_16bit_subreg_clamp
43+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
44+
; CHECK-NEXT: {{ $}}
45+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
46+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
47+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
48+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
49+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
50+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
51+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
52+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
53+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
54+
%10:vgpr_32 = COPY $vgpr2
55+
%9:vgpr_32 = COPY $vgpr1
56+
%8:vgpr_32 = COPY $vgpr0
57+
%12:sreg_32 = IMPLICIT_DEF
58+
%13:vgpr_32 = COPY %12:sreg_32
59+
%11:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %8:vgpr_32, 8, %9:vgpr_32, 0, %10:vgpr_32, 0, %13:vgpr_32, 0, 0, implicit $mode, implicit $exec
60+
%15:vgpr_16 = COPY %11.lo16:vgpr_32
61+
%14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
62+
$vgpr0 = COPY %14:vgpr_16
63+
S_ENDPGM 0, implicit $vgpr0
64+
...
65+
66+
---
67+
name: fold_16bit_phyreg_clamp
68+
tracksRegLiveness: true
69+
registers:
70+
body: |
71+
bb.0.entry:
72+
liveins: $vgpr0, $vgpr1, $vgpr2
73+
; CHECK-LABEL: name: fold_16bit_phyreg_clamp
74+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
75+
; CHECK-NEXT: {{ $}}
76+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
77+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
78+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
79+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
80+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
81+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
82+
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
83+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
84+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
85+
%10:vgpr_32 = COPY $vgpr2
86+
%9:vgpr_32 = COPY $vgpr1
87+
%8:vgpr_32 = COPY $vgpr0
88+
%12:sreg_32 = IMPLICIT_DEF
89+
%13:vgpr_32 = COPY %12:sreg_32
90+
%11:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %8:vgpr_32, 8, %9:vgpr_32, 0, %10:vgpr_32, 0, %13:vgpr_32, 0, 0, implicit $mode, implicit $exec
91+
$vgpr10_lo16 = COPY %11:vgpr_32
92+
%14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
93+
$vgpr0 = COPY %14:vgpr_16
94+
S_ENDPGM 0, implicit $vgpr0
95+
...
96+
97+
---
98+
name: fold_16bit_undef_clamp
99+
tracksRegLiveness: true
100+
registers:
101+
body: |
102+
bb.0.entry:
103+
liveins: $vgpr0, $vgpr1, $vgpr2
104+
; CHECK-LABEL: name: fold_16bit_undef_clamp
105+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
106+
; CHECK-NEXT: {{ $}}
107+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
108+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
109+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
110+
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
111+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
112+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
113+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
114+
%10:vgpr_32 = COPY $vgpr2
115+
%9:vgpr_32 = COPY $vgpr1
116+
%8:vgpr_32 = COPY $vgpr0
117+
%15:vgpr_16 = IMPLICIT_DEF
118+
%14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
119+
$vgpr0 = COPY %14:vgpr_16
120+
S_ENDPGM 0, implicit $vgpr0
121+
...

0 commit comments

Comments
 (0)