Skip to content

Commit 463aba5

Browse files
committed
address comment
1 parent a5dc039 commit 463aba5

File tree

2 files changed

+137
-18
lines changed

2 files changed

+137
-18
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,11 +1819,9 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18191819
if (!ClampSrc || !MRI->hasOneNonDBGUser(ClampSrc->getReg()))
18201820
return false;
18211821

1822-
MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
1823-
MachineInstr *OrigDef = Def;
18241822
// Look through COPY. COPY only observed with True16.
1825-
if (Def->isCopy() && Def->getOperand(1).getReg().isVirtual())
1826-
Def = MRI->getVRegDef(Def->getOperand(1).getReg());
1823+
MachineOperand *DefSrc = TRI->lookThruCopyLike(ClampSrc->getReg(), MRI);
1824+
MachineInstr *Def = MRI->getVRegDef(DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
18271825

18281826
// The type of clamp must be compatible.
18291827
if (TII->getClampMask(*Def) != TII->getClampMask(MI))
@@ -1841,7 +1839,7 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18411839
// Clamp is applied after omod, so it is OK if omod is set.
18421840
DefClamp->setImm(1);
18431841

1844-
Register DefReg = OrigDef->getOperand(0).getReg();
1842+
Register DefReg = Def->getOperand(0).getReg();
18451843
Register MIDstReg = MI.getOperand(0).getReg();
18461844
if (TRI->isSGPRReg(*MRI, DefReg)) {
18471845
// Pseudo scalar instructions have a SGPR for dst and clamp is a v_max*
Lines changed: 134 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr="+wavefrontsize32",+real-true16 -verify-machineinstrs -o - %s | FileCheck %s
2+
# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass si-fold-operands -mattr=+real-true16 -o - %s | FileCheck %s
33

44
---
55
name: fold_16bit_madmix_clamp
66
tracksRegLiveness: true
77
registers:
88
body: |
9-
bb.0.entry:
9+
bb.0:
1010
liveins: $vgpr0, $vgpr1, $vgpr2
1111
; CHECK-LABEL: name: fold_16bit_madmix_clamp
1212
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -16,18 +16,139 @@ body: |
1616
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1818
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
19-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
19+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
2020
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
21-
; CHECK-NEXT: $vgpr0 = COPY [[COPY4]]
21+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
22+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
2223
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
23-
%10:vgpr_32 = COPY $vgpr2
24-
%9:vgpr_32 = COPY $vgpr1
25-
%8:vgpr_32 = COPY $vgpr0
26-
%12:sreg_32 = IMPLICIT_DEF
27-
%13:vgpr_32 = COPY %12:sreg_32
28-
%11:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %8:vgpr_32, 8, %9:vgpr_32, 0, %10:vgpr_32, 0, %13:vgpr_32, 0, 0, implicit $mode, implicit $exec
29-
%15:vgpr_16 = COPY %11:vgpr_32
30-
%14:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %15:vgpr_16, 0, %15:vgpr_16, -1, 0, 0, implicit $mode, implicit $exec
31-
$vgpr0 = COPY %14:vgpr_16
24+
%0:vgpr_32 = COPY $vgpr2
25+
%1:vgpr_32 = COPY $vgpr1
26+
%2:vgpr_32 = COPY $vgpr0
27+
%3:sreg_32 = IMPLICIT_DEF
28+
%4:vgpr_32 = COPY %3
29+
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
30+
%6:vgpr_16 = COPY %5
31+
%7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
32+
$vgpr0 = COPY %7
33+
S_ENDPGM 0, implicit $vgpr0
34+
...
35+
36+
---
37+
name: fold_16bit_subreg_folded_clamp
38+
tracksRegLiveness: true
39+
registers:
40+
body: |
41+
bb.0:
42+
liveins: $vgpr0, $vgpr1, $vgpr2
43+
; CHECK-LABEL: name: fold_16bit_madmix_clamp
44+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
45+
; CHECK-NEXT: {{ $}}
46+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
47+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
48+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
49+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
50+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
51+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
52+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
53+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
54+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
55+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
56+
%0:vgpr_32 = COPY $vgpr2
57+
%1:vgpr_32 = COPY $vgpr1
58+
%2:vgpr_32 = COPY $vgpr0
59+
%3:sreg_32 = IMPLICIT_DEF
60+
%4:vgpr_32 = COPY %3
61+
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
62+
%6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %5.lo16, 0, %5.lo16, -1, 0, 0, implicit $mode, implicit $exec
63+
$vgpr0 = COPY %6
64+
S_ENDPGM 0, implicit $vgpr0
65+
...
66+
67+
---
68+
name: fold_16bit_subreg_clamp
69+
tracksRegLiveness: true
70+
registers:
71+
body: |
72+
bb.0:
73+
liveins: $vgpr0, $vgpr1, $vgpr2
74+
; CHECK-LABEL: name: fold_16bit_subreg_clamp
75+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
76+
; CHECK-NEXT: {{ $}}
77+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
78+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
79+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
80+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
81+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
82+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
83+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
84+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
85+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
86+
%0:vgpr_32 = COPY $vgpr2
87+
%1:vgpr_32 = COPY $vgpr1
88+
%2:vgpr_32 = COPY $vgpr0
89+
%3:sreg_32 = IMPLICIT_DEF
90+
%4:vgpr_32 = COPY %3
91+
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
92+
%6:vgpr_16 = COPY %5.lo16
93+
%7:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %6, 0, %6, -1, 0, 0, implicit $mode, implicit $exec
94+
$vgpr0 = COPY %7
95+
S_ENDPGM 0, implicit $vgpr0
96+
...
97+
98+
---
99+
name: fold_16bit_phyreg_clamp
100+
tracksRegLiveness: true
101+
registers:
102+
body: |
103+
bb.0:
104+
liveins: $vgpr0, $vgpr1, $vgpr2
105+
; CHECK-LABEL: name: fold_16bit_phyreg_clamp
106+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
107+
; CHECK-NEXT: {{ $}}
108+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
109+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
110+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
112+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
113+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
114+
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
115+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
116+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
117+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
118+
%0:vgpr_32 = COPY $vgpr2
119+
%1:vgpr_32 = COPY $vgpr1
120+
%2:vgpr_32 = COPY $vgpr0
121+
%3:sreg_32 = IMPLICIT_DEF
122+
%4:vgpr_32 = COPY %3
123+
%5:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, %2, 8, %1, 0, %0, 0, %4, 0, 0, implicit $mode, implicit $exec
124+
$vgpr10_lo16 = COPY %5
125+
%6:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
126+
$vgpr0 = COPY %6
127+
S_ENDPGM 0, implicit $vgpr0
128+
...
129+
130+
---
131+
name: fold_16bit_undef_clamp
132+
tracksRegLiveness: true
133+
registers:
134+
body: |
135+
bb.0:
136+
liveins: $vgpr0, $vgpr1, $vgpr2
137+
; CHECK-LABEL: name: fold_16bit_undef_clamp
138+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
139+
; CHECK-NEXT: {{ $}}
140+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
141+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
142+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
143+
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
144+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[DEF]], 0, [[DEF]], -1, 0, 0, implicit $mode, implicit $exec
145+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
146+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
147+
%0:vgpr_32 = COPY $vgpr2
148+
%1:vgpr_32 = COPY $vgpr1
149+
%2:vgpr_32 = COPY $vgpr0
150+
%3:vgpr_16 = IMPLICIT_DEF
151+
%4:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, %3, 0, %3, -1, 0, 0, implicit $mode, implicit $exec
152+
$vgpr0 = COPY %4
32153
S_ENDPGM 0, implicit $vgpr0
33154
...

0 commit comments

Comments
 (0)