AMDGPU: Fold more scalar operations on frame index to VALU #115059

arsenm · 2024-11-05T20:29:10Z

Further extend workaround for the lack of proper regbankselect
for frame indexes.

arsenm · 2024-11-05T20:29:24Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

Join @arsenm and the rest of your teammates on Graphite

llvmbot · 2024-11-05T20:30:26Z

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Further extend workaround for the lack of proper regbankselect
for frame indexes.

Full diff: https://github.com/llvm/llvm-project/pull/115059.diff

2 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+74-47)
(modified) llvm/test/CodeGen/AMDGPU/fold-operands-s-add-copy-to-vgpr.mir (+155-12)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 28bcbd58dc0376..de7dec8831daec 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -78,9 +78,25 @@ class SIFoldOperandsImpl {
   bool frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
                          const MachineOperand &OpToFold) const;
 
-  /// Fold %vgpr = COPY (S_ADD_I32 x, frameindex)
-  ///
-  ///   => %vgpr = V_ADD_U32 x, frameindex
+  // TODO: Just use TII::getVALUOp
+  unsigned convertToVALUOp(unsigned Opc, bool UseVOP3 = false) const {
+    switch (Opc) {
+    case AMDGPU::S_ADD_I32: {
+      if (ST->hasAddNoCarry())
+        return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
+      return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
+    }
+    case AMDGPU::S_OR_B32:
+      return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
+    case AMDGPU::S_AND_B32:
+      return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
+    case AMDGPU::S_MUL_I32:
+      return AMDGPU::V_MUL_LO_U32_e64;
+    default:
+      return AMDGPU::INSTRUCTION_LIST_END;
+    }
+  }
+
   bool foldCopyToVGPROfScalarAddOfFrameIndex(Register DstReg, Register SrcReg,
                                              MachineInstr &MI) const;
 
@@ -202,6 +218,8 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
   const unsigned Opc = UseMI.getOpcode();
   switch (Opc) {
   case AMDGPU::S_ADD_I32:
+  case AMDGPU::S_OR_B32:
+  case AMDGPU::S_AND_B32:
   case AMDGPU::V_ADD_U32_e32:
   case AMDGPU::V_ADD_CO_U32_e32:
     // TODO: Possibly relax hasOneUse. It matters more for mubuf, since we have
@@ -238,53 +256,62 @@ bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
   if (TRI->isVGPR(*MRI, DstReg) && TRI->isSGPRReg(*MRI, SrcReg) &&
       MRI->hasOneNonDBGUse(SrcReg)) {
     MachineInstr *Def = MRI->getVRegDef(SrcReg);
-    if (Def && Def->getOpcode() == AMDGPU::S_ADD_I32 &&
-        Def->getOperand(3).isDead()) {
-      MachineOperand *Src0 = &Def->getOperand(1);
-      MachineOperand *Src1 = &Def->getOperand(2);
-
-      // TODO: This is profitable with more operand types, and for more
-      // opcodes. But ultimately this is working around poor / nonexistent
-      // regbankselect.
-      if (!Src0->isFI() && !Src1->isFI())
-        return false;
+    if (!Def || Def->getNumOperands() != 4)
+      return false;
 
-      if (Src0->isFI())
-        std::swap(Src0, Src1);
-
-      MachineBasicBlock *MBB = Def->getParent();
-      const DebugLoc &DL = Def->getDebugLoc();
-      if (ST->hasAddNoCarry()) {
-        bool UseVOP3 = !Src0->isImm() || TII->isInlineConstant(*Src0);
-        MachineInstrBuilder Add =
-            BuildMI(*MBB, *Def, DL,
-                    TII->get(UseVOP3 ? AMDGPU::V_ADD_U32_e64
-                                     : AMDGPU::V_ADD_U32_e32),
-                    DstReg)
-                .add(*Src0)
-                .add(*Src1)
-                .setMIFlags(Def->getFlags());
-        if (UseVOP3)
-          Add.addImm(0);
-
-        Def->eraseFromParent();
-        MI.eraseFromParent();
-        return true;
-      }
+    MachineOperand *Src0 = &Def->getOperand(1);
+    MachineOperand *Src1 = &Def->getOperand(2);
 
-      MachineBasicBlock::LivenessQueryResult Liveness =
-          MBB->computeRegisterLiveness(TRI, AMDGPU::VCC, *Def, 16);
-      if (Liveness == MachineBasicBlock::LQR_Dead) {
-        // TODO: If src1 satisfies operand constraints, use vop3 version.
-        BuildMI(*MBB, *Def, DL, TII->get(AMDGPU::V_ADD_CO_U32_e32), DstReg)
-            .add(*Src0)
-            .add(*Src1)
-            .setOperandDead(3) // implicit-def $vcc
-            .setMIFlags(Def->getFlags());
-        Def->eraseFromParent();
-        MI.eraseFromParent();
-        return true;
+    // TODO: This is profitable with more operand types, and for more
+    // opcodes. But ultimately this is working around poor / nonexistent
+    // regbankselect.
+    if (!Src0->isFI() && !Src1->isFI())
+      return false;
+
+    if (Src0->isFI())
+      std::swap(Src0, Src1);
+
+    const bool UseVOP3 = !Src0->isImm() || TII->isInlineConstant(*Src0);
+    unsigned NewOp = convertToVALUOp(Def->getOpcode(), UseVOP3);
+    if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
+        !Def->getOperand(3).isDead()) // Check if scc is dead
+      return false;
+
+    MachineBasicBlock *MBB = Def->getParent();
+    const DebugLoc &DL = Def->getDebugLoc();
+    if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
+      MachineInstrBuilder Add =
+          BuildMI(*MBB, *Def, DL, TII->get(NewOp), DstReg);
+
+      if (Add->getDesc().getNumDefs() == 2) {
+        Register CarryOutReg = MRI->createVirtualRegister(TRI->getBoolRC());
+        Add.addDef(CarryOutReg, RegState::Dead);
+        MRI->setRegAllocationHint(CarryOutReg, 0, TRI->getVCC());
       }
+
+      Add.add(*Src0).add(*Src1).setMIFlags(Def->getFlags());
+      if (AMDGPU::hasNamedOperand(NewOp, AMDGPU::OpName::clamp))
+        Add.addImm(0);
+
+      Def->eraseFromParent();
+      MI.eraseFromParent();
+      return true;
+    }
+
+    assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
+
+    MachineBasicBlock::LivenessQueryResult Liveness =
+        MBB->computeRegisterLiveness(TRI, AMDGPU::VCC, *Def, 16);
+    if (Liveness == MachineBasicBlock::LQR_Dead) {
+      // TODO: If src1 satisfies operand constraints, use vop3 version.
+      BuildMI(*MBB, *Def, DL, TII->get(NewOp), DstReg)
+          .add(*Src0)
+          .add(*Src1)
+          .setOperandDead(3) // implicit-def $vcc
+          .setMIFlags(Def->getFlags());
+      Def->eraseFromParent();
+      MI.eraseFromParent();
+      return true;
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-s-add-copy-to-vgpr.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-s-add-copy-to-vgpr.mir
index 683f02b413315e..8c88c7a97174e2 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-s-add-copy-to-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-s-add-copy-to-vgpr.mir
@@ -75,8 +75,8 @@ stack:
 body:             |
   bb.0:
     ; GFX8-LABEL: name: fold_s_add_i32__fi_imm_copy_to_virt_vgpr
-    ; GFX8: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = nuw V_ADD_CO_U32_e32 64, %stack.0, implicit-def dead $vcc, implicit $exec
-    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = nuw V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
+    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
     ;
     ; GFX9-LABEL: name: fold_s_add_i32__fi_imm_copy_to_virt_vgpr
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
@@ -98,8 +98,8 @@ stack:
 body:             |
   bb.0:
     ; GFX8-LABEL: name: fold_s_add_i32__imm_fi_copy_to_virt_vgpr
-    ; GFX8: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = nuw V_ADD_CO_U32_e32 64, %stack.0, implicit-def dead $vcc, implicit $exec
-    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = nuw V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
+    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
     ;
     ; GFX9-LABEL: name: fold_s_add_i32__imm_fi_copy_to_virt_vgpr
     ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
@@ -202,8 +202,8 @@ body:             |
     ; GFX8: liveins: $sgpr8
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; GFX8-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
-    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
+    ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
+    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
     ;
     ; GFX9-LABEL: name: fold_s_add_i32__mov_fi_reg_copy_to_virt_vgpr
     ; GFX9: liveins: $sgpr8
@@ -239,8 +239,8 @@ body:             |
     ; GFX8: liveins: $sgpr8
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; GFX8-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
-    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
+    ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
+    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
     ;
     ; GFX9-LABEL: name: fold_s_add_i32__reg_copy_mov_fi_to_virt_vgpr
     ; GFX9: liveins: $sgpr8
@@ -337,8 +337,8 @@ body:             |
     ; GFX8: liveins: $sgpr8
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; GFX8-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
-    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
+    ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
+    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
     ;
     ; GFX9-LABEL: name: fold_s_add_i32__fi_reg_copy_to_virt_vgpr
     ; GFX9: liveins: $sgpr8
@@ -371,8 +371,8 @@ body:             |
     ; GFX8: liveins: $sgpr8
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
-    ; GFX8-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
-    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
+    ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
+    ; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
     ;
     ; GFX9-LABEL: name: fold_s_add_i32__reg_fi_copy_to_virt_vgpr
     ; GFX9: liveins: $sgpr8
@@ -392,3 +392,146 @@ body:             |
     %2:vgpr_32 = COPY %1
     SI_RETURN implicit %2
 ...
+
+---
+name:  fold_s_or_b32__mov_fi_const_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_or_b32__mov_fi_const_copy_to_virt_vgpr
+    ; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 128, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e32_]]
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_OR_B32 %0, 128, implicit-def dead $scc
+    %2:vgpr_32 = COPY %1
+    SI_RETURN implicit %2
+...
+
+---
+name:  fold_s_or_b32__const_copy_mov_fi_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_or_b32__const_copy_mov_fi_to_virt_vgpr
+    ; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 128, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e32_]]
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_OR_B32 128, %0, implicit-def dead $scc
+    %2:vgpr_32 = COPY %1
+    SI_RETURN implicit %2
+...
+
+---
+name:  fold_s_or_b32__fi_imm_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_or_b32__fi_imm_copy_to_virt_vgpr
+    ; CHECK: %1:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit %1
+    %0:sreg_32 = disjoint S_OR_B32 %stack.0, 64, implicit-def dead $scc
+    %1:vgpr_32 = COPY %0
+    SI_RETURN implicit %1
+...
+
+---
+name:  fold_s_or_b32__imm_fi_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_or_b32__imm_fi_copy_to_virt_vgpr
+    ; CHECK: %1:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit %1
+    %0:sreg_32 = disjoint S_OR_B32 64, %stack.0, implicit-def dead $scc
+    %1:vgpr_32 = COPY %0
+    SI_RETURN implicit %1
+...
+
+---
+name:  fold_s_and_b32__fi_imm_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_and_b32__fi_imm_copy_to_virt_vgpr
+    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 64, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[V_AND_B32_e64_]]
+    %0:sreg_32 = S_AND_B32 %stack.0, 64, implicit-def dead $scc
+    %1:vgpr_32 = COPY %0
+    SI_RETURN implicit %1
+...
+
+---
+name:  fold_s_and_b32__fi_const_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_and_b32__fi_const_copy_to_virt_vgpr
+    ; CHECK: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 128, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[V_AND_B32_e32_]]
+    %0:sreg_32 = S_AND_B32 %stack.0, 128, implicit-def dead $scc
+    %1:vgpr_32 = COPY %0
+    SI_RETURN implicit %1
+...
+
+---
+name:  fold_s_mul_i32__fi_imm_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_mul_i32__fi_imm_copy_to_virt_vgpr
+    ; CHECK: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 64, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[V_MUL_LO_U32_e64_]]
+    %0:sreg_32 = S_MUL_I32 %stack.0, 64, implicit-def dead $scc
+    %1:vgpr_32 = COPY %0
+    SI_RETURN implicit %1
+...
+
+---
+name:  fold_s_mul_i32__fi_reg_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+      liveins: $sgpr4
+    ; CHECK-LABEL: name: fold_s_mul_i32__fi_reg_copy_to_virt_vgpr
+    ; CHECK: liveins: $sgpr4
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[V_MUL_LO_U32_e64_]]
+    %0:sreg_32 = COPY $sgpr4
+    %1:sreg_32 = S_MUL_I32 %stack.0, %0, implicit-def dead $scc
+    %2:vgpr_32 = COPY %1
+    SI_RETURN implicit %2
+...
+
+---
+name:  fold_s_and_b32__mov_fi_const_copy_to_virt_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 16384, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: fold_s_and_b32__mov_fi_const_copy_to_virt_vgpr
+    ; CHECK: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 128, %stack.0, implicit $exec
+    ; CHECK-NEXT: SI_RETURN implicit [[V_AND_B32_e32_]]
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_AND_B32 %0, 128, implicit-def dead $scc
+    %2:vgpr_32 = COPY %1
+    SI_RETURN implicit %2
+...

Further extend workaround for the lack of proper regbankselect for frame indexes.

arsenm · 2024-11-08T03:00:59Z

Merge activity

Nov 7, 10:00 PM EST: A user started a stack merge that includes this pull request via Graphite.
Nov 7, 10:02 PM EST: A user merged this pull request with Graphite.

Further extend workaround for the lack of proper regbankselect for frame indexes.

This was referenced Nov 5, 2024

AMDGPU: Fold copy of scalar add of frame index #115058

Merged

AMDGPU: Default to selecting frame indexes to SGPRs #115060

Merged

arsenm added the backend:AMDGPU label Nov 5, 2024 — with Graphite App

arsenm requested review from Flakebi, jayfoad, Pierre-vh, pravinjagtap, rampitec and rovka November 5, 2024 20:30

arsenm marked this pull request as ready for review November 5, 2024 20:42

rampitec approved these changes Nov 5, 2024

View reviewed changes

Base automatically changed from users/arsenm/amdgpu-si-fold-operands-frame-index-into-add to main November 6, 2024 17:11

AMDGPU: Fold more scalar operations on frame index to VALU

222beef

Further extend workaround for the lack of proper regbankselect for frame indexes.

arsenm force-pushed the users/arsenm/amdgpu-si-fold-operands-copy-more-scalar-ops-to-vector branch from 493a45c to 222beef Compare November 6, 2024 17:12

arsenm merged commit 4fb43c4 into main Nov 8, 2024
8 checks passed

arsenm deleted the users/arsenm/amdgpu-si-fold-operands-copy-more-scalar-ops-to-vector branch November 8, 2024 03:02

Groverkss pushed a commit to iree-org/llvm-project that referenced this pull request Nov 15, 2024

AMDGPU: Fold more scalar operations on frame index to VALU (llvm#115059)

af71a16

Further extend workaround for the lack of proper regbankselect for frame indexes.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Fold more scalar operations on frame index to VALU #115059

AMDGPU: Fold more scalar operations on frame index to VALU #115059

Uh oh!

arsenm commented Nov 5, 2024

Uh oh!

arsenm commented Nov 5, 2024 •

edited

Loading

Uh oh!

llvmbot commented Nov 5, 2024

Uh oh!

arsenm commented Nov 8, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

AMDGPU: Fold more scalar operations on frame index to VALU #115059

AMDGPU: Fold more scalar operations on frame index to VALU #115059

Uh oh!

Conversation

arsenm commented Nov 5, 2024

Uh oh!

arsenm commented Nov 5, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 5, 2024

Uh oh!

arsenm commented Nov 8, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

arsenm commented Nov 5, 2024 •

edited

Loading

arsenm commented Nov 8, 2024 •

edited

Loading