@@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
3437
3437
}
3438
3438
}
3439
3439
3440
+ std::optional<int64_t > SIInstrInfo::extractSubregFromImm (int64_t Imm,
3441
+ unsigned SubRegIndex) {
3442
+ switch (SubRegIndex) {
3443
+ case AMDGPU::NoSubRegister:
3444
+ return Imm;
3445
+ case AMDGPU::sub0:
3446
+ return Lo_32 (Imm);
3447
+ case AMDGPU::sub1:
3448
+ return Hi_32 (Imm);
3449
+ case AMDGPU::lo16:
3450
+ return SignExtend64<16 >(Imm);
3451
+ case AMDGPU::hi16:
3452
+ return SignExtend64<16 >(Imm >> 16 );
3453
+ case AMDGPU::sub1_lo16:
3454
+ return SignExtend64<16 >(Imm >> 32 );
3455
+ case AMDGPU::sub1_hi16:
3456
+ return SignExtend64<16 >(Imm >> 48 );
3457
+ default :
3458
+ return std::nullopt;
3459
+ }
3460
+
3461
+ llvm_unreachable (" covered subregister switch" );
3462
+ }
3463
+
3440
3464
bool SIInstrInfo::foldImmediate (MachineInstr &UseMI, MachineInstr &DefMI,
3441
3465
Register Reg, MachineRegisterInfo *MRI) const {
3442
3466
if (!MRI->hasOneNonDBGUse (Reg))
@@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3446
3470
if (!getConstValDefinedInReg (DefMI, Reg, Imm))
3447
3471
return false ;
3448
3472
3449
- auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t {
3450
- switch (UseOp.getSubReg ()) {
3451
- default :
3452
- return Imm;
3453
- case AMDGPU::sub0:
3454
- return Lo_32 (Imm);
3455
- case AMDGPU::sub1:
3456
- return Hi_32 (Imm);
3457
- case AMDGPU::lo16:
3458
- return SignExtend64<16 >(Imm);
3459
- case AMDGPU::hi16:
3460
- return SignExtend64<16 >(Imm >> 16 );
3461
- case AMDGPU::sub1_lo16:
3462
- return SignExtend64<16 >(Imm >> 32 );
3463
- case AMDGPU::sub1_hi16:
3464
- return SignExtend64<16 >(Imm >> 48 );
3465
- }
3466
- };
3467
-
3468
3473
assert (!DefMI.getOperand (0 ).getSubReg () && " Expected SSA form" );
3469
3474
3470
3475
unsigned Opc = UseMI.getOpcode ();
@@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3480
3485
: AMDGPU::V_MOV_B32_e32
3481
3486
: Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3482
3487
: AMDGPU::S_MOV_B32;
3483
- APInt Imm (Is64Bit ? 64 : 32 , getImmFor (UseMI.getOperand (1 )),
3488
+
3489
+ std::optional<int64_t > SubRegImm =
3490
+ extractSubregFromImm (Imm, UseMI.getOperand (1 ).getSubReg ());
3491
+
3492
+ APInt Imm (Is64Bit ? 64 : 32 , *SubRegImm,
3484
3493
/* isSigned=*/ true , /* implicitTrunc=*/ true );
3485
3494
3486
3495
if (RI.isAGPR (*MRI, DstReg)) {
@@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3591
3600
if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3592
3601
return false ;
3593
3602
3594
- const int64_t Imm = getImmFor (RegSrc == Src1 ? *Src0 : *Src1);
3603
+ const std::optional<int64_t > SubRegImm = extractSubregFromImm (
3604
+ Imm, RegSrc == Src1 ? Src0->getSubReg () : Src1->getSubReg ());
3595
3605
3596
3606
// FIXME: This would be a lot easier if we could return a new instruction
3597
3607
// instead of having to modify in place.
@@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3608
3618
UseMI.untieRegOperand (
3609
3619
AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
3610
3620
3611
- Src1->ChangeToImmediate (Imm );
3621
+ Src1->ChangeToImmediate (*SubRegImm );
3612
3622
3613
3623
removeModOperands (UseMI);
3614
3624
UseMI.setDesc (get (NewOpc));
@@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3679
3689
UseMI.untieRegOperand (
3680
3690
AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
3681
3691
3692
+ const std::optional<int64_t > SubRegImm =
3693
+ extractSubregFromImm (Imm, Src2->getSubReg ());
3694
+
3682
3695
// ChangingToImmediate adds Src2 back to the instruction.
3683
- Src2->ChangeToImmediate (getImmFor (*Src2) );
3696
+ Src2->ChangeToImmediate (*SubRegImm );
3684
3697
3685
3698
// These come before src2.
3686
3699
removeModOperands (UseMI);
0 commit comments