Skip to content

Commit 3dad0c3

Browse files
committed
address comment
1 parent e772194 commit 3dad0c3

File tree

2 files changed

+12
-14
lines changed

2 files changed

+12
-14
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,8 +1820,9 @@ bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &MI) {
18201820
return false;
18211821

18221822
// Look through COPY. COPY only observed with True16.
1823-
MachineOperand *DefSrc = TRI->lookThruCopyLike(ClampSrc->getReg(), MRI);
1824-
MachineInstr *Def = MRI->getVRegDef(DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
1823+
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, ClampSrc->getReg());
1824+
MachineInstr *Def = MRI->getVRegDef(
1825+
DefSrc && DefSrc->isReg() ? DefSrc->getReg() : ClampSrc->getReg());
18251826

18261827
// The type of clamp must be compatible.
18271828
if (TII->getClampMask(*Def) != TII->getClampMask(MI))

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ body: |
1616
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1717
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1818
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
19-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
19+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
2020
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
21-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
22-
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
21+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
2322
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
2423
%0:vgpr_32 = COPY $vgpr2
2524
%1:vgpr_32 = COPY $vgpr1
@@ -34,13 +33,13 @@ body: |
3433
...
3534

3635
---
37-
name: fold_16bit_subreg_folded_clamp
36+
name: fold_16bit_subreg_1_clamp
3837
tracksRegLiveness: true
3938
registers:
4039
body: |
4140
bb.0:
4241
liveins: $vgpr0, $vgpr1, $vgpr2
43-
; CHECK-LABEL: name: fold_16bit_madmix_clamp
42+
; CHECK-LABEL: name: fold_16bit_subreg_1_clamp
4443
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
4544
; CHECK-NEXT: {{ $}}
4645
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -49,8 +48,7 @@ body: |
4948
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
5049
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
5150
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
52-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_16 = COPY [[V_FMA_MIXLO_F16_]]
53-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[COPY4]], 0, [[COPY4]], -1, 0, 0, implicit $mode, implicit $exec
51+
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, [[V_FMA_MIXLO_F16_]].lo16, 0, [[V_FMA_MIXLO_F16_]].lo16, -1, 0, 0, implicit $mode, implicit $exec
5452
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
5553
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
5654
%0:vgpr_32 = COPY $vgpr2
@@ -65,13 +63,13 @@ body: |
6563
...
6664

6765
---
68-
name: fold_16bit_subreg_clamp
66+
name: fold_16bit_subreg_2_clamp
6967
tracksRegLiveness: true
7068
registers:
7169
body: |
7270
bb.0:
7371
liveins: $vgpr0, $vgpr1, $vgpr2
74-
; CHECK-LABEL: name: fold_16bit_subreg_clamp
72+
; CHECK-LABEL: name: fold_16bit_subreg_2_clamp
7573
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
7674
; CHECK-NEXT: {{ $}}
7775
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@@ -110,10 +108,9 @@ body: |
110108
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
111109
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
112110
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
113-
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
111+
; CHECK-NEXT: [[V_FMA_MIXLO_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIXLO_F16 8, [[COPY2]], 8, [[COPY1]], 0, [[COPY]], 1, [[COPY3]], 0, 0, implicit $mode, implicit $exec
114112
; CHECK-NEXT: $vgpr10_lo16 = COPY [[V_FMA_MIXLO_F16_]]
115-
; CHECK-NEXT: [[V_MAX_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_MAX_F16_t16_e64 0, $vgpr10_lo16, 0, $vgpr10_lo16, -1, 0, 0, implicit $mode, implicit $exec
116-
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F16_t16_e64_]]
113+
; CHECK-NEXT: $vgpr0 = COPY [[V_FMA_MIXLO_F16_]]
117114
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
118115
%0:vgpr_32 = COPY $vgpr2
119116
%1:vgpr_32 = COPY $vgpr1

0 commit comments

Comments
 (0)