Skip to content

Commit 06f3079

Browse files
authored
AMDGPU: More consistently use the fold list instead of direct mutation (#127612)
There were 2 parallel fold check mechanisms, so consistently use the fold list. The worklist management here is still not good. Other types of folds are not using it, and we should probably rewrite the pass to look more like peephole-opt. This should be an alternative fix to skipping commute if the operands are the same (#127562). The new test is still not broken as-is, but demonstrates failures in a future patch.
1 parent 862595c commit 06f3079

File tree

2 files changed

+67
-18
lines changed

2 files changed

+67
-18
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class SIFoldOperandsImpl {
114114
bool
115115
getRegSeqInit(SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
116116
Register UseReg, uint8_t OpTy) const;
117-
bool tryToFoldACImm(const MachineOperand &OpToFold, MachineInstr *UseMI,
117+
bool tryToFoldACImm(MachineOperand &OpToFold, MachineInstr *UseMI,
118118
unsigned UseOpIdx,
119119
SmallVectorImpl<FoldCandidate> &FoldList) const;
120120
void foldOperand(MachineOperand &OpToFold,
@@ -575,11 +575,6 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
575575
return true;
576576
}
577577

578-
static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
579-
const MachineInstr *MI) {
580-
return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; });
581-
}
582-
583578
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
584579
MachineInstr *MI, unsigned OpNo,
585580
MachineOperand *FoldOp, bool Commuted = false,
@@ -680,12 +675,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
680675
}
681676
}
682677

683-
// If we are already folding into another operand of MI, then
684-
// we can't commute the instruction, otherwise we risk making the
685-
// other fold illegal.
686-
if (isUseMIInFoldList(FoldList, MI))
687-
return false;
688-
689678
// Operand is not legal, so try to commute the instruction to
690679
// see if this makes it possible to fold.
691680
unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -816,7 +805,7 @@ bool SIFoldOperandsImpl::getRegSeqInit(
816805
}
817806

818807
bool SIFoldOperandsImpl::tryToFoldACImm(
819-
const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
808+
MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
820809
SmallVectorImpl<FoldCandidate> &FoldList) const {
821810
const MCInstrDesc &Desc = UseMI->getDesc();
822811
if (UseOpIdx >= Desc.getNumOperands())
@@ -827,7 +816,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
827816

828817
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
829818
if (OpToFold.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
830-
UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
819+
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
831820
return true;
832821
}
833822

@@ -838,16 +827,13 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
838827
if (!UseReg.isVirtual())
839828
return false;
840829

841-
if (isUseMIInFoldList(FoldList, UseMI))
842-
return false;
843-
844830
// Maybe it is just a COPY of an immediate itself.
845831
MachineInstr *Def = MRI->getVRegDef(UseReg);
846832
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
847833
if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
848834
MachineOperand &DefOp = Def->getOperand(1);
849835
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
850-
UseMI->getOperand(UseOpIdx).ChangeToImmediate(DefOp.getImm());
836+
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
851837
return true;
852838
}
853839
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=peephole-opt,si-fold-operands -o - %s | FileCheck %s
3+
4+
# Check for assert when trying later folds after commuting an
5+
# instruction where both operands are the same register. This depended
6+
# on use list ordering, so we need to run peephole-opt first to
7+
# reproduce.
8+
9+
---
10+
name: commute_add_same_inputs_assert
11+
tracksRegLiveness: true
12+
body: |
13+
bb.0:
14+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
15+
16+
; CHECK-LABEL: name: commute_add_same_inputs_assert
17+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
20+
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
21+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
22+
; CHECK-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
23+
; CHECK-NEXT: $vgpr4 = COPY [[V_ADDC_U32_e32_]]
24+
; CHECK-NEXT: SI_RETURN implicit $vgpr4
25+
%0:vgpr_32 = COPY $vgpr0
26+
%1:sreg_64 = S_MOV_B64 0
27+
%2:sreg_32 = COPY %1.sub0
28+
%3:vgpr_32 = V_ADD_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
29+
%4:vgpr_32 = COPY %2
30+
%5:vgpr_32 = COPY %2
31+
%6:vgpr_32 = V_ADDC_U32_e32 %4, %5, implicit-def $vcc, implicit $vcc, implicit $exec
32+
$vgpr4 = COPY %6
33+
SI_RETURN implicit $vgpr4
34+
35+
...
36+
37+
---
38+
name: commute_sub_same_inputs_assert
39+
tracksRegLiveness: true
40+
body: |
41+
bb.0:
42+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
43+
44+
; CHECK-LABEL: name: commute_sub_same_inputs_assert
45+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
46+
; CHECK-NEXT: {{ $}}
47+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
48+
; CHECK-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
49+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
50+
; CHECK-NEXT: [[V_SUBBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBBREV_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
51+
; CHECK-NEXT: $vgpr4 = COPY [[V_SUBBREV_U32_e32_]]
52+
; CHECK-NEXT: SI_RETURN implicit $vgpr4
53+
%0:vgpr_32 = COPY $vgpr0
54+
%1:sreg_64 = S_MOV_B64 0
55+
%2:sreg_32 = COPY %1.sub0
56+
%3:vgpr_32 = V_SUB_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
57+
%4:vgpr_32 = COPY %2
58+
%5:vgpr_32 = COPY %2
59+
%6:vgpr_32 = V_SUBB_U32_e32 %5, %4, implicit-def $vcc, implicit $vcc, implicit $exec
60+
$vgpr4 = COPY %6
61+
SI_RETURN implicit $vgpr4
62+
63+
...

0 commit comments

Comments
 (0)