llvm · arsenm · Feb 25, 2025 · Feb 18, 2025
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -114,7 +114,7 @@ class SIFoldOperandsImpl {
   bool
   getRegSeqInit(SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
                 Register UseReg, uint8_t OpTy) const;
-  bool tryToFoldACImm(const MachineOperand &OpToFold, MachineInstr *UseMI,
+  bool tryToFoldACImm(MachineOperand &OpToFold, MachineInstr *UseMI,
                       unsigned UseOpIdx,
                       SmallVectorImpl<FoldCandidate> &FoldList) const;
   void foldOperand(MachineOperand &OpToFold,
@@ -573,11 +573,6 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
   return true;
 }
 
-static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
-                              const MachineInstr *MI) {
-  return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; });
-}
-
 static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
                                 MachineInstr *MI, unsigned OpNo,
                                 MachineOperand *FoldOp, bool Commuted = false,
@@ -678,12 +673,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
       }
     }
 
-    // If we are already folding into another operand of MI, then
-    // we can't commute the instruction, otherwise we risk making the
-    // other fold illegal.
-    if (isUseMIInFoldList(FoldList, MI))
-      return false;
-
     // Operand is not legal, so try to commute the instruction to
     // see if this makes it possible to fold.
     unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
@@ -814,7 +803,7 @@ bool SIFoldOperandsImpl::getRegSeqInit(
 }
 
 bool SIFoldOperandsImpl::tryToFoldACImm(
-    const MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
+    MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
     SmallVectorImpl<FoldCandidate> &FoldList) const {
   const MCInstrDesc &Desc = UseMI->getDesc();
   if (UseOpIdx >= Desc.getNumOperands())
@@ -825,7 +814,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
 
   uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
   if (OpToFold.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
-    UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
+    appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
     return true;
   }
 
@@ -836,16 +825,13 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
   if (!UseReg.isVirtual())
     return false;
 
-  if (isUseMIInFoldList(FoldList, UseMI))
-    return false;
-
   // Maybe it is just a COPY of an immediate itself.
   MachineInstr *Def = MRI->getVRegDef(UseReg);
   MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
   if (!UseOp.getSubReg() && Def && TII->isFoldableCopy(*Def)) {
     MachineOperand &DefOp = Def->getOperand(1);
     if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
-      UseMI->getOperand(UseOpIdx).ChangeToImmediate(DefOp.getImm());
+      appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
       return true;
     }
   }

diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-operands-commute-same-operands-assert.mir b/llvm/test/CodeGen/AMDGPU/si-fold-operands-commute-same-operands-assert.mir
@@ -0,0 +1,63 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -run-pass=peephole-opt,si-fold-operands -o - %s | FileCheck %s
+
+# Check for assert when trying later folds after commuting an
+# instruction where both operands are the same register. This depended
+# on use list ordering, so we need to run peephole-opt first to
+# reproduce.
+
+---
+name:            commute_add_same_inputs_assert
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: commute_add_same_inputs_assert
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[V_ADDC_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADDC_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
+    ; CHECK-NEXT: $vgpr4 = COPY [[V_ADDC_U32_e32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr4
+    %0:vgpr_32 = COPY $vgpr0
+    %1:sreg_64 = S_MOV_B64 0
+    %2:sreg_32 = COPY %1.sub0
+    %3:vgpr_32 = V_ADD_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
+    %4:vgpr_32 = COPY %2
+    %5:vgpr_32 = COPY %2
+    %6:vgpr_32 = V_ADDC_U32_e32 %4, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+    $vgpr4 = COPY %6
+    SI_RETURN implicit $vgpr4
+
+...
+
+---
+name:            commute_sub_same_inputs_assert
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: commute_sub_same_inputs_assert
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 0, killed [[COPY]], implicit-def $vcc, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[V_SUBBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBBREV_U32_e32 0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $vcc, implicit $exec
+    ; CHECK-NEXT: $vgpr4 = COPY [[V_SUBBREV_U32_e32_]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr4
+    %0:vgpr_32 = COPY $vgpr0
+    %1:sreg_64 = S_MOV_B64 0
+    %2:sreg_32 = COPY %1.sub0
+    %3:vgpr_32 = V_SUB_CO_U32_e32 %2, killed %0, implicit-def $vcc, implicit $exec
+    %4:vgpr_32 = COPY %2
+    %5:vgpr_32 = COPY %2
+    %6:vgpr_32 = V_SUBB_U32_e32 %5, %4, implicit-def $vcc, implicit $vcc, implicit $exec
+    $vgpr4 = COPY %6
+    SI_RETURN implicit $vgpr4
+
+...