-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Fix using illegal VOP3 literal in frame index elimination #115747
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Fix using illegal VOP3 literal in frame index elimination #115747
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
Most of the change is just moving the default case outside of the switch so we can break to the default handling for arbitrary operations.
a897b3b
to
206b2a3
Compare
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesMost of the change is just moving the default case outside Patch is 77.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115747.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f76d1266f495cf..246ef7ad481ab7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2268,7 +2268,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
+ const DebugLoc &DL = MI->getDebugLoc();
assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
@@ -2496,6 +2496,25 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Register MaterializedReg = FrameReg;
Register ScavengedVGPR;
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ // For the non-immediate case, we could fall through to the default
+ // handling, but we do an in-place update of the result register here to
+ // avoid scavenging another register.
+ if (OtherOp->isImm()) {
+ int64_t TotalOffset = OtherOp->getImm() + Offset;
+
+ if (!ST.hasVOP3Literal() && SIInstrInfo::isVOP3(*MI) &&
+ !AMDGPU::isInlinableIntLiteral(TotalOffset)) {
+ // If we can't support a VOP3 literal in the VALU instruction, we
+ // can't specially fold into the add.
+ // TODO: Handle VOP3->VOP2 shrink to support the fold.
+ break;
+ }
+
+ OtherOp->setImm(TotalOffset);
+ Offset = 0;
+ }
+
if (FrameReg && !ST.enableFlatScratch()) {
// We should just do an in-place update of the result register. However,
// the value there may also be used by the add, in which case we need a
@@ -2516,15 +2535,6 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MaterializedReg = ScavengedVGPR;
}
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- // For the non-immediate case, we could fall through to the default
- // handling, but we do an in-place update of the result register here to
- // avoid scavenging another register.
- if (OtherOp->isImm()) {
- OtherOp->setImm(OtherOp->getImm() + Offset);
- Offset = 0;
- }
-
if ((!OtherOp->isImm() || OtherOp->getImm() != 0) && MaterializedReg) {
if (ST.enableFlatScratch() &&
!TII->isOperandLegal(*MI, Src1Idx, OtherOp)) {
@@ -2761,411 +2771,408 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
return true;
}
default: {
- // Other access to frame index
- const DebugLoc &DL = MI->getDebugLoc();
+ break;
+ }
+ }
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- if (ST.enableFlatScratch()) {
- if (TII->isFLATScratch(*MI)) {
- assert((int16_t)FIOperandNum ==
- AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::saddr));
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (ST.enableFlatScratch()) {
+ if (TII->isFLATScratch(*MI)) {
+ assert(
+ (int16_t)FIOperandNum ==
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::saddr));
- // The offset is always swizzled, just replace it
- if (FrameReg)
- FIOp->ChangeToRegister(FrameReg, false);
+ // The offset is always swizzled, just replace it
+ if (FrameReg)
+ FIOp->ChangeToRegister(FrameReg, false);
- MachineOperand *OffsetOp =
+ MachineOperand *OffsetOp =
TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
- int64_t NewOffset = Offset + OffsetOp->getImm();
- if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
- SIInstrFlags::FlatScratch)) {
- OffsetOp->setImm(NewOffset);
- if (FrameReg)
- return false;
- Offset = 0;
- }
+ int64_t NewOffset = Offset + OffsetOp->getImm();
+ if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
+ SIInstrFlags::FlatScratch)) {
+ OffsetOp->setImm(NewOffset);
+ if (FrameReg)
+ return false;
+ Offset = 0;
+ }
- if (!Offset) {
- unsigned Opc = MI->getOpcode();
- int NewOpc = -1;
- if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
- NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
- } else if (ST.hasFlatScratchSTMode()) {
- // On GFX10 we have ST mode to use no registers for an address.
- // Otherwise we need to materialize 0 into an SGPR.
- NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
- }
+ if (!Offset) {
+ unsigned Opc = MI->getOpcode();
+ int NewOpc = -1;
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
+ NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc);
+ } else if (ST.hasFlatScratchSTMode()) {
+ // On GFX10 we have ST mode to use no registers for an address.
+ // Otherwise we need to materialize 0 into an SGPR.
+ NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc);
+ }
- if (NewOpc != -1) {
- // removeOperand doesn't fixup tied operand indexes as it goes, so
- // it asserts. Untie vdst_in for now and retie them afterwards.
- int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::vdst_in);
- bool TiedVDst = VDstIn != -1 &&
- MI->getOperand(VDstIn).isReg() &&
- MI->getOperand(VDstIn).isTied();
- if (TiedVDst)
- MI->untieRegOperand(VDstIn);
-
- MI->removeOperand(
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
-
- if (TiedVDst) {
- int NewVDst =
- AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
- int NewVDstIn =
- AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
- assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
- MI->tieOperands(NewVDst, NewVDstIn);
- }
- MI->setDesc(TII->get(NewOpc));
- return false;
+ if (NewOpc != -1) {
+ // removeOperand doesn't fixup tied operand indexes as it goes, so
+ // it asserts. Untie vdst_in for now and retie them afterwards.
+ int VDstIn =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+ bool TiedVDst = VDstIn != -1 && MI->getOperand(VDstIn).isReg() &&
+ MI->getOperand(VDstIn).isTied();
+ if (TiedVDst)
+ MI->untieRegOperand(VDstIn);
+
+ MI->removeOperand(
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
+
+ if (TiedVDst) {
+ int NewVDst =
+ AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
+ int NewVDstIn =
+ AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
+ assert(NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
+ MI->tieOperands(NewVDst, NewVDstIn);
}
+ MI->setDesc(TII->get(NewOpc));
+ return false;
}
}
+ }
- if (!FrameReg) {
- FIOp->ChangeToImmediate(Offset);
- if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
- return false;
- }
+ if (!FrameReg) {
+ FIOp->ChangeToImmediate(Offset);
+ if (TII->isImmOperandLegal(*MI, FIOperandNum, *FIOp))
+ return false;
+ }
- // We need to use register here. Check if we can use an SGPR or need
- // a VGPR.
- FIOp->ChangeToRegister(AMDGPU::M0, false);
- bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
+ // We need to use register here. Check if we can use an SGPR or need
+ // a VGPR.
+ FIOp->ChangeToRegister(AMDGPU::M0, false);
+ bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, FIOp);
- if (!Offset && FrameReg && UseSGPR) {
- FIOp->setReg(FrameReg);
- return false;
- }
+ if (!Offset && FrameReg && UseSGPR) {
+ FIOp->setReg(FrameReg);
+ return false;
+ }
- const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
- : &AMDGPU::VGPR_32RegClass;
+ const TargetRegisterClass *RC =
+ UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
- Register TmpReg =
- RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
- FIOp->setReg(TmpReg);
- FIOp->setIsKill();
+ Register TmpReg =
+ RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
+ FIOp->setReg(TmpReg);
+ FIOp->setIsKill();
- if ((!FrameReg || !Offset) && TmpReg) {
- unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
- auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
- if (FrameReg)
- MIB.addReg(FrameReg);
- else
- MIB.addImm(Offset);
+ if ((!FrameReg || !Offset) && TmpReg) {
+ unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+ auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
+ if (FrameReg)
+ MIB.addReg(FrameReg);
+ else
+ MIB.addImm(Offset);
- return false;
- }
+ return false;
+ }
- bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
- !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+ bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+ !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
- Register TmpSReg =
- UseSGPR ? TmpReg
- : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
- MI, false, 0, !UseSGPR);
+ Register TmpSReg =
+ UseSGPR ? TmpReg
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+ MI, false, 0, !UseSGPR);
- // TODO: for flat scratch another attempt can be made with a VGPR index
- // if no SGPRs can be scavenged.
- if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
- report_fatal_error("Cannot scavenge register in FI elimination!");
+ // TODO: for flat scratch another attempt can be made with a VGPR index
+ // if no SGPRs can be scavenged.
+ if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
+ report_fatal_error("Cannot scavenge register in FI elimination!");
- if (!TmpSReg) {
- // Use frame register and restore it after.
- TmpSReg = FrameReg;
- FIOp->setReg(FrameReg);
- FIOp->setIsKill(false);
- }
+ if (!TmpSReg) {
+ // Use frame register and restore it after.
+ TmpSReg = FrameReg;
+ FIOp->setReg(FrameReg);
+ FIOp->setIsKill(false);
+ }
- if (NeedSaveSCC) {
- assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
- .addReg(FrameReg)
- .addImm(Offset);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
- .addReg(TmpSReg)
- .addImm(0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+ if (NeedSaveSCC) {
+ assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
+ .addImm(0)
+ .addReg(TmpSReg);
+ } else {
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+ .addReg(FrameReg)
+ .addImm(Offset);
+ }
+
+ if (!UseSGPR)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addReg(TmpSReg, RegState::Kill);
+
+ if (TmpSReg == FrameReg) {
+ // Undo frame register modification.
+ if (NeedSaveSCC &&
+ !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
+ MachineBasicBlock::iterator I =
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
+ TmpSReg)
+ .addReg(FrameReg)
+ .addImm(-Offset);
+ I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
+ .addReg(TmpSReg)
+ .addImm(0);
+ BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
+ TmpSReg)
.addImm(0)
.addReg(TmpSReg);
} else {
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
+ BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
+ FrameReg)
.addReg(FrameReg)
- .addImm(Offset);
+ .addImm(-Offset);
}
+ }
- if (!UseSGPR)
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
- .addReg(TmpSReg, RegState::Kill);
-
- if (TmpSReg == FrameReg) {
- // Undo frame register modification.
- if (NeedSaveSCC &&
- !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
- MachineBasicBlock::iterator I =
- BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
- TmpSReg)
- .addReg(FrameReg)
- .addImm(-Offset);
- I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
- .addReg(TmpSReg)
- .addImm(0);
- BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
- TmpSReg)
- .addImm(0)
- .addReg(TmpSReg);
- } else {
- BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
- FrameReg)
- .addReg(FrameReg)
- .addImm(-Offset);
- }
- }
+ return false;
+ }
- return false;
- }
+ bool IsMUBUF = TII->isMUBUF(*MI);
+
+ if (!IsMUBUF && !MFI->isBottomOfStack()) {
+ // Convert to a swizzled stack address by scaling by the wave size.
+ // In an entry function/kernel the offset is already swizzled.
+ bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
+ bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
+ !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
+ const TargetRegisterClass *RC = IsSALU && !LiveSCC
+ ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::VGPR_32RegClass;
+ bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
+ MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
+ MI->getOpcode() == AMDGPU::S_MOV_B32;
+ Register ResultReg =
+ IsCopy ? MI->getOperand(0).getReg()
+ : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
- bool IsMUBUF = TII->isMUBUF(*MI);
-
- if (!IsMUBUF && !MFI->isBottomOfStack()) {
- // Convert to a swizzled stack address by scaling by the wave size.
- // In an entry function/kernel the offset is already swizzled.
- bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
- bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
- !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
- const TargetRegisterClass *RC = IsSALU && !LiveSCC
- ? &AMDGPU::SReg_32RegClass
- : &AMDGPU::VGPR_32RegClass;
- bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
- MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
- MI->getOpcode() == AMDGPU::S_MOV_B32;
- Register ResultReg =
- IsCopy ? MI->getOperand(0).getReg()
- : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
-
- int64_t Offset = FrameInfo.getObjectOffset(Index);
- if (Offset == 0) {
- unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
- : AMDGPU::V_LSHRREV_B32_e64;
- Register TmpResultReg = ResultReg;
- if (IsSALU && LiveSCC) {
- TmpResultReg = RS->scavengeRegisterBackwards(
- AMDGPU::VGPR_32RegClass, MI, false, 0);
- }
+ int64_t Offset = FrameInfo.getObjectOffset(Index);
+ if (Offset == 0) {
+ unsigned OpCode =
+ IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
+ Register TmpResultReg = ResultReg;
+ if (IsSALU && LiveSCC) {
+ TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
+ MI, false, 0);
+ }
- auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
- if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
- // For V_LSHRREV, the operands are reversed (the shift count goes
- // first).
- Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
- else
- Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
- if (IsSALU && !LiveSCC)
- Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
- if (IsSALU && LiveSCC) {
- Register NewDest =
- IsCopy ? ResultReg
- : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
- Shift, false, 0);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- NewDest)
- .addReg(TmpResultReg);
- ResultReg = NewDest;
- }
- } else {
- MachineInstrBuilder MIB;
- if (!IsSALU) {
- if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
- nullptr) {
- // Reuse ResultReg in intermediate step.
- Register ScaledReg = ResultReg;
-
- BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
- ScaledReg)
+ auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), TmpResultReg);
+ if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
+ // For V_LSHRREV, the operands are reversed (the shift count goes
+ // first).
+ Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
+ else
+ Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
+ if (IsSALU && !LiveSCC)
+ Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
+ if (IsSALU && LiveSCC) {
+ Register NewDest =
+ IsCopy ? ResultReg
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32RegClass,
+ Shift, false, 0);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), NewDest)
+ .addReg(TmpResultReg);
+ ResultReg = NewDest;
+ }
+ } else {
+ MachineInstrBuilder MIB;
+ if (!IsSALU) {
+ if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
+ nullptr) {
+ // Reuse ResultReg in intermediate step.
+ Register ScaledReg = ResultReg;
+
+ BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
+ ...
[truncated]
|
ping |
if (!ST.hasVOP3Literal() && SIInstrInfo::isVOP3(*MI) && | ||
!AMDGPU::isInlinableIntLiteral(TotalOffset)) { | ||
// If we can't support a VOP3 literal in the VALU instruction, we | ||
// can't specially fold into the add. | ||
// TODO: Handle VOP3->VOP2 shrink to support the fold. | ||
break; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume this is the only significant change?
Why does this block have to move before the !enableFlatScratch handling?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To ensure it doesn't call the scavenger if this path isn't going to handle the case
; MUBUF-NEXT: s_addc_u32 s1, s1, 0 | ||
; MUBUF-NEXT: v_mov_b32_e32 v0, 0x3040 | ||
; MUBUF-NEXT: v_add_u32_e32 v0, 64, v1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would your "TODO: Handle VOP3->VOP2 shrink to support the fold" fix this regression?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Most of the change is just moving the default case outside
of the switch so we can break to the default handling for arbitrary
operations.