@@ -37,20 +37,22 @@ STATISTIC(NumSDWAInstructionsPeepholed,
37
37
38
38
namespace {
39
39
40
+ bool isConvertibleToSDWA (MachineInstr &MI, const GCNSubtarget &ST,
41
+ const SIInstrInfo *TII);
40
42
class SDWAOperand ;
41
43
class SDWADstOperand ;
42
44
43
- class SIPeepholeSDWA : public MachineFunctionPass {
44
- public:
45
- using SDWAOperandsVector = SmallVector<SDWAOperand *, 4 >;
45
+ using SDWAOperandsVector = SmallVector<SDWAOperand *, 4 >;
46
+ using SDWAOperandsMap = MapVector<MachineInstr *, SDWAOperandsVector>;
46
47
48
+ class SIPeepholeSDWA : public MachineFunctionPass {
47
49
private:
48
50
MachineRegisterInfo *MRI;
49
51
const SIRegisterInfo *TRI;
50
52
const SIInstrInfo *TII;
51
53
52
54
MapVector<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
53
- MapVector<MachineInstr *, SDWAOperandsVector> PotentialMatches;
55
+ SDWAOperandsMap PotentialMatches;
54
56
SmallVector<MachineInstr *, 8 > ConvertedInstructions;
55
57
56
58
std::optional<int64_t > foldToImm (const MachineOperand &Op) const ;
@@ -65,7 +67,6 @@ class SIPeepholeSDWA : public MachineFunctionPass {
65
67
bool runOnMachineFunction (MachineFunction &MF) override ;
66
68
void matchSDWAOperands (MachineBasicBlock &MBB);
67
69
std::unique_ptr<SDWAOperand> matchSDWAOperand (MachineInstr &MI);
68
- bool isConvertibleToSDWA (MachineInstr &MI, const GCNSubtarget &ST) const ;
69
70
void pseudoOpConvertToVOP2 (MachineInstr &MI,
70
71
const GCNSubtarget &ST) const ;
71
72
bool convertToSDWA (MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
@@ -93,7 +94,9 @@ class SDWAOperand {
93
94
94
95
virtual ~SDWAOperand () = default ;
95
96
96
- virtual MachineInstr *potentialToConvert (const SIInstrInfo *TII) = 0;
97
+ virtual MachineInstr *potentialToConvert (const SIInstrInfo *TII,
98
+ const GCNSubtarget &ST,
99
+ SDWAOperandsMap *PotentialMatches = nullptr ) = 0;
97
100
virtual bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) = 0;
98
101
99
102
MachineOperand *getTargetOperand () const { return Target; }
@@ -126,7 +129,9 @@ class SDWASrcOperand : public SDWAOperand {
126
129
: SDWAOperand(TargetOp, ReplacedOp),
127
130
SrcSel (SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
128
131
129
- MachineInstr *potentialToConvert (const SIInstrInfo *TII) override ;
132
+ MachineInstr *potentialToConvert (const SIInstrInfo *TII,
133
+ const GCNSubtarget &ST,
134
+ SDWAOperandsMap *PotentialMatches = nullptr ) override ;
130
135
bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
131
136
132
137
SdwaSel getSrcSel () const { return SrcSel; }
@@ -153,7 +158,9 @@ class SDWADstOperand : public SDWAOperand {
153
158
SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
154
159
: SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
155
160
156
- MachineInstr *potentialToConvert (const SIInstrInfo *TII) override ;
161
+ MachineInstr *potentialToConvert (const SIInstrInfo *TII,
162
+ const GCNSubtarget &ST,
163
+ SDWAOperandsMap *PotentialMatches = nullptr ) override ;
157
164
bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
158
165
159
166
SdwaSel getDstSel () const { return DstSel; }
@@ -327,7 +334,33 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
327
334
return Mods;
328
335
}
329
336
330
- MachineInstr *SDWASrcOperand::potentialToConvert (const SIInstrInfo *TII) {
337
+ MachineInstr *SDWASrcOperand::potentialToConvert (const SIInstrInfo *TII,
338
+ const GCNSubtarget &ST,
339
+ SDWAOperandsMap *PotentialMatches) {
340
+ if (PotentialMatches != nullptr ) {
341
+ // Fill out the map for all uses if all can be converted
342
+ MachineOperand *Reg = getReplacedOperand ();
343
+ if (!Reg->isReg () || !Reg->isDef ())
344
+ return nullptr ;
345
+
346
+ for (MachineInstr &UseMI : getMRI ()->use_nodbg_instructions (Reg->getReg ()))
347
+ // Check that all instructions that use Reg can be converted
348
+ if (!isConvertibleToSDWA (UseMI, ST, TII))
349
+ return nullptr ;
350
+
351
+ // Now that it's guaranteed all uses are legal, iterate over the uses again
352
+ // to add them for later conversion.
353
+ for (MachineOperand &UseMO : getMRI ()->use_nodbg_operands (Reg->getReg ())) {
354
+ // Should not get a subregister here
355
+ assert (isSameReg (UseMO, *Reg));
356
+
357
+ SDWAOperandsMap &potentialMatchesMap = *PotentialMatches;
358
+ MachineInstr *UseMI = UseMO.getParent ();
359
+ potentialMatchesMap[UseMI].push_back (this );
360
+ }
361
+ return nullptr ;
362
+ }
363
+
331
364
// For SDWA src operand potential instruction is one that use register
332
365
// defined by parent instruction
333
366
MachineOperand *PotentialMO = findSingleRegUse (getReplacedOperand (), getMRI ());
@@ -420,7 +453,9 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
420
453
return true ;
421
454
}
422
455
423
- MachineInstr *SDWADstOperand::potentialToConvert (const SIInstrInfo *TII) {
456
+ MachineInstr *SDWADstOperand::potentialToConvert (const SIInstrInfo *TII,
457
+ const GCNSubtarget &ST,
458
+ SDWAOperandsMap *PotentialMatches) {
424
459
// For SDWA dst operand potential instruction is one that defines register
425
460
// that this operand uses
426
461
MachineRegisterInfo *MRI = getMRI ();
@@ -919,8 +954,10 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
919
954
MISucc.substituteRegister (CarryIn->getReg (), TRI->getVCC (), 0 , *TRI);
920
955
}
921
956
922
- bool SIPeepholeSDWA::isConvertibleToSDWA (MachineInstr &MI,
923
- const GCNSubtarget &ST) const {
957
+ namespace {
958
+ bool isConvertibleToSDWA (MachineInstr &MI,
959
+ const GCNSubtarget &ST,
960
+ const SIInstrInfo* TII) {
924
961
// Check if this is already an SDWA instruction
925
962
unsigned Opc = MI.getOpcode ();
926
963
if (TII->isSDWA (Opc))
@@ -980,6 +1017,7 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
980
1017
981
1018
return true ;
982
1019
}
1020
+ } // namespace
983
1021
984
1022
bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
985
1023
const SDWAOperandsVector &SDWAOperands) {
@@ -1215,7 +1253,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
1215
1253
matchSDWAOperands (MBB);
1216
1254
for (const auto &OperandPair : SDWAOperands) {
1217
1255
const auto &Operand = OperandPair.second ;
1218
- MachineInstr *PotentialMI = Operand->potentialToConvert (TII);
1256
+ MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST );
1219
1257
if (PotentialMI &&
1220
1258
(PotentialMI->getOpcode () == AMDGPU::V_ADD_CO_U32_e64 ||
1221
1259
PotentialMI->getOpcode () == AMDGPU::V_SUB_CO_U32_e64))
@@ -1228,8 +1266,8 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
1228
1266
1229
1267
for (const auto &OperandPair : SDWAOperands) {
1230
1268
const auto &Operand = OperandPair.second ;
1231
- MachineInstr *PotentialMI = Operand->potentialToConvert (TII);
1232
- if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST)) {
1269
+ MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST, &PotentialMatches );
1270
+ if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST, TII )) {
1233
1271
PotentialMatches[PotentialMI].push_back (Operand.get ());
1234
1272
}
1235
1273
}
0 commit comments