Skip to content

AMDGPU: More consistently use the fold list instead of direct mutation #127612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 4 additions & 18 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class SIFoldOperandsImpl {
bool
getRegSeqInit(SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
Register UseReg, uint8_t OpTy) const;
bool tryToFoldACImm(const MachineOperand &OpToFold, MachineInstr *UseMI,
bool tryToFoldACImm(MachineOperand &OpToFold, MachineInstr *UseMI,
unsigned UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList) const;
void foldOperand(MachineOperand &OpToFold,
Expand Down Expand Up @@ -573,11 +573,6 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
return true;
}

static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
const MachineInstr *MI) {
return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; });
}

static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *FoldOp, bool Commuted = false,
Expand Down Expand Up @@ -678,12 +673,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
}
}

// If we are already folding into another operand of MI, then
// we can't commute the instruction, otherwise we risk making the
// other fold illegal.
if (isUseMIInFoldList(FoldList, MI))
return false;

// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
Expand Down Expand Up @@ -814,7 +803,7 @@ bool SIFoldOperandsImpl::getRegSeqInit(
}

bool SIFoldOperandsImpl::tryToFoldACImm(
const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList) const {
const MCInstrDesc &Desc = UseMI->getDesc();
if (UseOpIdx >= Desc.getNumOperands())
Expand All @@ -825,7 +814,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(

uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
if (OpToFold.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
return true;
}

Expand All @@ -836,16 +825,13 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
if (!UseReg.isVirtual())
return false;

if (isUseMIInFoldList(FoldList, UseMI))
return false;

// Maybe it is just a COPY of an immediate itself.
MachineInstr *Def = MRI->getVRegDef(UseReg);
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
MachineOperand &DefOp = Def->getOperand(1);
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
UseMI->getOperand(UseOpIdx).ChangeToImmediate(DefOp.getImm());
appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
return true;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=peephole-opt,si-fold-operands -o - %s | FileCheck %s

# Check for assert when trying later folds after commuting an
# instruction where both operands are the same register. This depended
# on use list ordering, so we need to run peephole-opt first to
# reproduce.

---
name: commute_add_same_inputs_assert
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3

; CHECK-LABEL: name: commute_add_same_inputs_assert
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
; CHECK-NEXT: $vgpr4 = COPY [[V_ADDC_U32_e32_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr4
%0:vgpr_32 = COPY $vgpr0
%1:sreg_64 = S_MOV_B64 0
%2:sreg_32 = COPY %1.sub0
%3:vgpr_32 = V_ADD_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
%4:vgpr_32 = COPY %2
%5:vgpr_32 = COPY %2
%6:vgpr_32 = V_ADDC_U32_e32 %4, %5, implicit-def $vcc, implicit $vcc, implicit $exec
$vgpr4 = COPY %6
SI_RETURN implicit $vgpr4

...

---
name: commute_sub_same_inputs_assert
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3

; CHECK-LABEL: name: commute_sub_same_inputs_assert
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[V_SUBBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBBREV_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
; CHECK-NEXT: $vgpr4 = COPY [[V_SUBBREV_U32_e32_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr4
%0:vgpr_32 = COPY $vgpr0
%1:sreg_64 = S_MOV_B64 0
%2:sreg_32 = COPY %1.sub0
%3:vgpr_32 = V_SUB_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
%4:vgpr_32 = COPY %2
%5:vgpr_32 = COPY %2
%6:vgpr_32 = V_SUBB_U32_e32 %5, %4, implicit-def $vcc, implicit $vcc, implicit $exec
$vgpr4 = COPY %6
SI_RETURN implicit $vgpr4

...