@@ -1372,8 +1372,8 @@ bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {
1372
1372
MachineInstrBuilder SelectedMI;
1373
1373
MachineOperand &LHS = I.getOperand (2 );
1374
1374
MachineOperand &RHS = I.getOperand (3 );
1375
- auto [Src0, Src0Mods] = selectVOP3ModsImpl (LHS);
1376
- auto [Src1, Src1Mods] = selectVOP3ModsImpl (RHS);
1375
+ auto [Src0, Src0Mods] = selectVOP3ModsImpl (LHS. getReg () );
1376
+ auto [Src1, Src1Mods] = selectVOP3ModsImpl (RHS. getReg () );
1377
1377
Register Src0Reg =
1378
1378
copyToVGPRIfSrcFolded (Src0, Src0Mods, LHS, &I, /* ForceVGPR*/ true );
1379
1379
Register Src1Reg =
@@ -2467,14 +2467,48 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
2467
2467
return false ;
2468
2468
}
2469
2469
2470
+ static Register stripCopy (Register Reg, MachineRegisterInfo &MRI) {
2471
+ return getDefSrcRegIgnoringCopies (Reg, MRI)->Reg ;
2472
+ }
2473
+
2474
+ static Register stripBitCast (Register Reg, MachineRegisterInfo &MRI) {
2475
+ Register BitcastSrc;
2476
+ if (mi_match (Reg, MRI, m_GBitcast (m_Reg (BitcastSrc))))
2477
+ Reg = BitcastSrc;
2478
+ return Reg;
2479
+ }
2480
+
2470
2481
static bool isExtractHiElt (MachineRegisterInfo &MRI, Register In,
2471
2482
Register &Out) {
2483
+ Register Trunc;
2484
+ if (!mi_match (In, MRI, m_GTrunc (m_Reg (Trunc))))
2485
+ return false ;
2486
+
2472
2487
Register LShlSrc;
2473
- if (mi_match (In, MRI,
2474
- m_GTrunc (m_GLShr (m_Reg (LShlSrc), m_SpecificICst (16 ))))) {
2475
- Out = LShlSrc;
2488
+ Register Cst;
2489
+ if (mi_match (Trunc, MRI, m_GLShr (m_Reg (LShlSrc), m_Reg (Cst)))) {
2490
+ Cst = stripCopy (Cst, MRI);
2491
+ if (mi_match (Cst, MRI, m_SpecificICst (16 ))) {
2492
+ Out = stripBitCast (LShlSrc, MRI);
2493
+ return true ;
2494
+ }
2495
+ }
2496
+
2497
+ MachineInstr *Shuffle = MRI.getVRegDef (Trunc);
2498
+ if (Shuffle->getOpcode () != AMDGPU::G_SHUFFLE_VECTOR)
2499
+ return false ;
2500
+
2501
+ assert (MRI.getType (Shuffle->getOperand (0 ).getReg ()) ==
2502
+ LLT::fixed_vector (2 , 16 ));
2503
+
2504
+ ArrayRef<int > Mask = Shuffle->getOperand (3 ).getShuffleMask ();
2505
+ assert (Mask.size () == 2 );
2506
+
2507
+ if (Mask[0 ] == 1 && Mask[1 ] <= 1 ) {
2508
+ Out = Shuffle->getOperand (0 ).getReg ();
2476
2509
return true ;
2477
2510
}
2511
+
2478
2512
return false ;
2479
2513
}
2480
2514
@@ -3550,11 +3584,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
3550
3584
3551
3585
}
3552
3586
3553
- std::pair<Register, unsigned >
3554
- AMDGPUInstructionSelector::selectVOP3ModsImpl (MachineOperand &Root,
3555
- bool IsCanonicalizing,
3556
- bool AllowAbs, bool OpSel) const {
3557
- Register Src = Root.getReg ();
3587
+ std::pair<Register, unsigned > AMDGPUInstructionSelector::selectVOP3ModsImpl (
3588
+ Register Src, bool IsCanonicalizing, bool AllowAbs, bool OpSel) const {
3558
3589
unsigned Mods = 0 ;
3559
3590
MachineInstr *MI = getDefIgnoringCopies (Src, *MRI);
3560
3591
@@ -3617,7 +3648,7 @@ InstructionSelector::ComplexRendererFns
3617
3648
AMDGPUInstructionSelector::selectVOP3Mods0 (MachineOperand &Root) const {
3618
3649
Register Src;
3619
3650
unsigned Mods;
3620
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
3651
+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
3621
3652
3622
3653
return {{
3623
3654
[=](MachineInstrBuilder &MIB) {
@@ -3633,7 +3664,7 @@ InstructionSelector::ComplexRendererFns
3633
3664
AMDGPUInstructionSelector::selectVOP3BMods0 (MachineOperand &Root) const {
3634
3665
Register Src;
3635
3666
unsigned Mods;
3636
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
3667
+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
3637
3668
/* IsCanonicalizing=*/ true ,
3638
3669
/* AllowAbs=*/ false );
3639
3670
@@ -3660,7 +3691,7 @@ InstructionSelector::ComplexRendererFns
3660
3691
AMDGPUInstructionSelector::selectVOP3Mods (MachineOperand &Root) const {
3661
3692
Register Src;
3662
3693
unsigned Mods;
3663
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
3694
+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
3664
3695
3665
3696
return {{
3666
3697
[=](MachineInstrBuilder &MIB) {
@@ -3675,7 +3706,8 @@ AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3675
3706
MachineOperand &Root) const {
3676
3707
Register Src;
3677
3708
unsigned Mods;
3678
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root, /* IsCanonicalizing=*/ false );
3709
+ std::tie (Src, Mods) =
3710
+ selectVOP3ModsImpl (Root.getReg (), /* IsCanonicalizing=*/ false );
3679
3711
3680
3712
return {{
3681
3713
[=](MachineInstrBuilder &MIB) {
@@ -3689,8 +3721,9 @@ InstructionSelector::ComplexRendererFns
3689
3721
AMDGPUInstructionSelector::selectVOP3BMods (MachineOperand &Root) const {
3690
3722
Register Src;
3691
3723
unsigned Mods;
3692
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root, /* IsCanonicalizing=*/ true ,
3693
- /* AllowAbs=*/ false );
3724
+ std::tie (Src, Mods) =
3725
+ selectVOP3ModsImpl (Root.getReg (), /* IsCanonicalizing=*/ true ,
3726
+ /* AllowAbs=*/ false );
3694
3727
3695
3728
return {{
3696
3729
[=](MachineInstrBuilder &MIB) {
@@ -4016,7 +4049,7 @@ InstructionSelector::ComplexRendererFns
4016
4049
AMDGPUInstructionSelector::selectVOP3OpSelMods (MachineOperand &Root) const {
4017
4050
Register Src;
4018
4051
unsigned Mods;
4019
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
4052
+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () );
4020
4053
4021
4054
// FIXME: Handle op_sel
4022
4055
return {{
@@ -4029,7 +4062,7 @@ InstructionSelector::ComplexRendererFns
4029
4062
AMDGPUInstructionSelector::selectVINTERPMods (MachineOperand &Root) const {
4030
4063
Register Src;
4031
4064
unsigned Mods;
4032
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
4065
+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
4033
4066
/* IsCanonicalizing=*/ true ,
4034
4067
/* AllowAbs=*/ false ,
4035
4068
/* OpSel=*/ false );
@@ -4047,7 +4080,7 @@ InstructionSelector::ComplexRendererFns
4047
4080
AMDGPUInstructionSelector::selectVINTERPModsHi (MachineOperand &Root) const {
4048
4081
Register Src;
4049
4082
unsigned Mods;
4050
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root,
4083
+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root. getReg () ,
4051
4084
/* IsCanonicalizing=*/ true ,
4052
4085
/* AllowAbs=*/ false ,
4053
4086
/* OpSel=*/ true );
@@ -5229,97 +5262,41 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
5229
5262
[=](MachineInstrBuilder &MIB) { MIB.addImm (*EncodedOffset); }}};
5230
5263
}
5231
5264
5232
- // Variant of stripBitCast that returns the instruction instead of a
5233
- // MachineOperand.
5234
- static MachineInstr *stripBitCast (MachineInstr *MI, MachineRegisterInfo &MRI) {
5235
- if (MI->getOpcode () == AMDGPU::G_BITCAST)
5236
- return getDefIgnoringCopies (MI->getOperand (1 ).getReg (), MRI);
5237
- return MI;
5238
- }
5239
-
5240
- // Figure out if this is really an extract of the high 16-bits of a dword,
5241
- // returns nullptr if it isn't.
5242
- static MachineInstr *isExtractHiElt (MachineInstr *Inst,
5243
- MachineRegisterInfo &MRI) {
5244
- Inst = stripBitCast (Inst, MRI);
5245
-
5246
- if (Inst->getOpcode () != AMDGPU::G_TRUNC)
5247
- return nullptr ;
5248
-
5249
- MachineInstr *TruncOp =
5250
- getDefIgnoringCopies (Inst->getOperand (1 ).getReg (), MRI);
5251
- TruncOp = stripBitCast (TruncOp, MRI);
5252
-
5253
- // G_LSHR x, (G_CONSTANT i32 16)
5254
- if (TruncOp->getOpcode () == AMDGPU::G_LSHR) {
5255
- auto SrlAmount = getIConstantVRegValWithLookThrough (
5256
- TruncOp->getOperand (2 ).getReg (), MRI);
5257
- if (SrlAmount && SrlAmount->Value .getZExtValue () == 16 ) {
5258
- MachineInstr *SrlOp =
5259
- getDefIgnoringCopies (TruncOp->getOperand (1 ).getReg (), MRI);
5260
- return stripBitCast (SrlOp, MRI);
5261
- }
5262
- }
5263
-
5264
- // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0)
5265
- // 1, 0 swaps the low/high 16 bits.
5266
- // 1, 1 sets the high 16 bits to be the same as the low 16.
5267
- // in any case, it selects the high elts.
5268
- if (TruncOp->getOpcode () == AMDGPU::G_SHUFFLE_VECTOR) {
5269
- assert (MRI.getType (TruncOp->getOperand (0 ).getReg ()) ==
5270
- LLT::fixed_vector (2 , 16 ));
5271
-
5272
- ArrayRef<int > Mask = TruncOp->getOperand (3 ).getShuffleMask ();
5273
- assert (Mask.size () == 2 );
5274
-
5275
- if (Mask[0 ] == 1 && Mask[1 ] <= 1 ) {
5276
- MachineInstr *LHS =
5277
- getDefIgnoringCopies (TruncOp->getOperand (1 ).getReg (), MRI);
5278
- return stripBitCast (LHS, MRI);
5279
- }
5280
- }
5281
-
5282
- return nullptr ;
5283
- }
5284
-
5285
5265
std::pair<Register, unsigned >
5286
5266
AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl (MachineOperand &Root,
5287
5267
bool &Matched) const {
5288
5268
Matched = false ;
5289
5269
5290
5270
Register Src;
5291
5271
unsigned Mods;
5292
- std::tie (Src, Mods) = selectVOP3ModsImpl (Root);
5293
-
5294
- MachineInstr *MI = getDefIgnoringCopies (Src, *MRI);
5295
- if (MI->getOpcode () == AMDGPU::G_FPEXT) {
5296
- MachineOperand *MO = &MI->getOperand (1 );
5297
- Src = MO->getReg ();
5298
- MI = getDefIgnoringCopies (Src, *MRI);
5272
+ std::tie (Src, Mods) = selectVOP3ModsImpl (Root.getReg ());
5299
5273
5274
+ if (mi_match (Src, *MRI, m_GFPExt (m_Reg (Src)))) {
5300
5275
assert (MRI->getType (Src) == LLT::scalar (16 ));
5301
5276
5302
- // See through bitcasts.
5303
- // FIXME: Would be nice to use stripBitCast here.
5304
- if (MI->getOpcode () == AMDGPU::G_BITCAST) {
5305
- MO = &MI->getOperand (1 );
5306
- Src = MO->getReg ();
5307
- MI = getDefIgnoringCopies (Src, *MRI);
5308
- }
5277
+ // Only change Src if src modifier could be gained. In such cases new Src
5278
+ // could be sgpr but this does not violate constant bus restriction for
5279
+ // instruction that is being selected.
5280
+ // Note: Src is not changed when there is only a simple sgpr to vgpr copy
5281
+ // since this could violate constant bus restriction.
5282
+ Register PeekSrc = stripCopy (Src, *MRI);
5309
5283
5310
5284
const auto CheckAbsNeg = [&]() {
5311
5285
// Be careful about folding modifiers if we already have an abs. fneg is
5312
5286
// applied last, so we don't want to apply an earlier fneg.
5313
5287
if ((Mods & SISrcMods::ABS) == 0 ) {
5314
5288
unsigned ModsTmp;
5315
- std::tie (Src, ModsTmp) = selectVOP3ModsImpl (*MO);
5316
- MI = getDefIgnoringCopies (Src, *MRI);
5289
+ std::tie (PeekSrc, ModsTmp) = selectVOP3ModsImpl (PeekSrc);
5317
5290
5318
- if ((ModsTmp & SISrcMods::NEG) != 0 )
5291
+ if ((ModsTmp & SISrcMods::NEG) != 0 ) {
5319
5292
Mods ^= SISrcMods::NEG;
5293
+ Src = PeekSrc;
5294
+ }
5320
5295
5321
- if ((ModsTmp & SISrcMods::ABS) != 0 )
5296
+ if ((ModsTmp & SISrcMods::ABS) != 0 ) {
5322
5297
Mods |= SISrcMods::ABS;
5298
+ Src = PeekSrc;
5299
+ }
5323
5300
}
5324
5301
};
5325
5302
@@ -5332,12 +5309,9 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
5332
5309
5333
5310
Mods |= SISrcMods::OP_SEL_1;
5334
5311
5335
- if (MachineInstr *ExtractHiEltMI = isExtractHiElt (MI, *MRI)) {
5312
+ if (isExtractHiElt (*MRI, PeekSrc, PeekSrc)) {
5313
+ Src = PeekSrc;
5336
5314
Mods |= SISrcMods::OP_SEL_0;
5337
- MI = ExtractHiEltMI;
5338
- MO = &MI->getOperand (0 );
5339
- Src = MO->getReg ();
5340
-
5341
5315
CheckAbsNeg ();
5342
5316
}
5343
5317
0 commit comments