@@ -56,20 +56,8 @@ class GCNRewritePartialRegUsesImpl {
56
56
// / size. Return true if the change has been made.
57
57
bool rewriteReg (Register Reg) const ;
58
58
59
- // / Value type for SubRegMap below.
60
- struct SubRegInfo {
61
- // / Register class required to hold the value stored in the SubReg.
62
- const TargetRegisterClass *RC;
63
-
64
- // / Index for the right-shifted subregister. If 0 this is the "covering"
65
- // / subreg i.e. subreg that covers all others. Covering subreg becomes the
66
- // / whole register after the replacement.
67
- unsigned SubReg = AMDGPU::NoSubRegister;
68
- SubRegInfo (const TargetRegisterClass *RC_ = nullptr ) : RC(RC_) {}
69
- };
70
-
71
- // / Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
72
- using SubRegMap = SmallDenseMap<unsigned , SubRegInfo>;
59
+ // / Map OldSubReg -> NewSubReg. Used as in/out container.
60
+ using SubRegMap = SmallDenseMap<unsigned , unsigned >;
73
61
74
62
// / Given register class RC and the set of used subregs as keys in the SubRegs
75
63
// / map return new register class and indexes of right-shifted subregs as
@@ -78,24 +66,22 @@ class GCNRewritePartialRegUsesImpl {
78
66
const TargetRegisterClass *getMinSizeReg (const TargetRegisterClass *RC,
79
67
SubRegMap &SubRegs) const ;
80
68
81
- // / Given regclass RC and pairs of [OldSubReg, SubRegRC ] in SubRegs try to
69
+ // / Given regclass RC and pairs of [OldSubReg, NewSubReg ] in SubRegs try to
82
70
// / find new regclass such that:
83
71
// / 1. It has subregs obtained by shifting each OldSubReg by RShift number
84
72
// / of bits to the right. Every "shifted" subreg should have the same
85
73
// / SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
86
74
// / all other subregs in pairs. Basically such subreg becomes a whole
87
75
// / register.
88
- // / 2. Resulting register class contains registers of minimal size but not
89
- // / less than RegNumBits.
76
+ // / 2. Resulting register class contains registers of minimal size.
90
77
// /
91
- // / SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
78
+ // / SubRegs is map of OldSubReg -> NewSubReg and is used as in/out
92
79
// / parameter:
93
80
// / OldSubReg - input parameter,
94
- // / SubRegRC - input parameter (cannot be null),
95
81
// / NewSubReg - output, contains shifted subregs on return.
96
82
const TargetRegisterClass *
97
83
getRegClassWithShiftedSubregs (const TargetRegisterClass *RC, unsigned RShift,
98
- unsigned RegNumBits, unsigned CoverSubregIdx,
84
+ unsigned CoverSubregIdx,
99
85
SubRegMap &SubRegs) const ;
100
86
101
87
// / Update live intervals after rewriting OldReg to NewReg with SubRegs map
@@ -105,9 +91,6 @@ class GCNRewritePartialRegUsesImpl {
105
91
106
92
// / Helper methods.
107
93
108
- // / Return reg class expected by a MO's parent instruction for a given MO.
109
- const TargetRegisterClass *getOperandRegClass (MachineOperand &MO) const ;
110
-
111
94
// / Find right-shifted by RShift amount version of the SubReg if it exists,
112
95
// / return 0 otherwise.
113
96
unsigned shiftSubReg (unsigned SubReg, unsigned RShift) const ;
@@ -221,20 +204,23 @@ GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
221
204
222
205
const TargetRegisterClass *
223
206
GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs (
224
- const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits ,
225
- unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
207
+ const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx ,
208
+ SubRegMap &SubRegs) const {
226
209
227
210
unsigned RCAlign = TRI->getRegClassAlignmentNumBits (RC);
228
211
LLVM_DEBUG (dbgs () << " Shift " << RShift << " , reg align " << RCAlign
229
212
<< ' \n ' );
230
213
231
214
BitVector ClassMask (getAllocatableAndAlignedRegClassMask (RCAlign));
232
- for (auto &[OldSubReg, SRI] : SubRegs) {
233
- auto &[SubRegRC, NewSubReg] = SRI;
234
- assert (SubRegRC);
215
+ for (auto &[OldSubReg, NewSubReg] : SubRegs) {
216
+ LLVM_DEBUG (dbgs () << " " << TRI->getSubRegIndexName (OldSubReg) << ' :' );
235
217
236
- LLVM_DEBUG (dbgs () << " " << TRI->getSubRegIndexName (OldSubReg) << ' :'
237
- << TRI->getRegClassName (SubRegRC)
218
+ auto *SubRegRC = TRI->getSubRegisterClass (RC, OldSubReg);
219
+ if (!SubRegRC) {
220
+ LLVM_DEBUG (dbgs () << " couldn't find target regclass\n " );
221
+ return nullptr ;
222
+ }
223
+ LLVM_DEBUG (dbgs () << TRI->getRegClassName (SubRegRC)
238
224
<< (SubRegRC->isAllocatable () ? " " : " not alloc" )
239
225
<< " -> " );
240
226
@@ -266,27 +252,23 @@ GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
266
252
// ClassMask is the set of all register classes such that each class is
267
253
// allocatable, aligned, has all shifted subregs and each subreg has required
268
254
// register class (see SubRegRC above). Now select first (that is largest)
269
- // register class with registers of minimal but not less than RegNumBits size.
270
- // We have to check register size because we may encounter classes of smaller
271
- // registers like VReg_1 in some situations.
255
+ // register class with registers of minimal size.
272
256
const TargetRegisterClass *MinRC = nullptr ;
273
257
unsigned MinNumBits = std::numeric_limits<unsigned >::max ();
274
258
for (unsigned ClassID : ClassMask.set_bits ()) {
275
259
auto *RC = TRI->getRegClass (ClassID);
276
260
unsigned NumBits = TRI->getRegSizeInBits (*RC);
277
- if (NumBits < MinNumBits && NumBits >= RegNumBits ) {
261
+ if (NumBits < MinNumBits) {
278
262
MinNumBits = NumBits;
279
263
MinRC = RC;
280
264
}
281
- if (MinNumBits == RegNumBits)
282
- break ;
283
265
}
284
266
#ifndef NDEBUG
285
267
if (MinRC) {
286
268
assert (MinRC->isAllocatable () && TRI->isRegClassAligned (MinRC, RCAlign));
287
- for (auto [SubReg, SRI ] : SubRegs)
288
- // Check that all registers in MinRC support SRI.SubReg subregister.
289
- assert (MinRC == TRI->getSubClassWithSubReg (MinRC, SRI. SubReg ));
269
+ for (auto [OldSubReg, NewSubReg ] : SubRegs)
270
+ // Check that all registers in MinRC support NewSubReg subregister.
271
+ assert (MinRC == TRI->getSubClassWithSubReg (MinRC, NewSubReg ));
290
272
}
291
273
#endif
292
274
// There might be zero RShift - in this case we just trying to find smaller
@@ -317,8 +299,7 @@ GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
317
299
// If covering subreg is found shift everything so the covering subreg would
318
300
// be in the rightmost position.
319
301
if (CoverSubreg != AMDGPU::NoSubRegister)
320
- return getRegClassWithShiftedSubregs (RC, Offset, End - Offset, CoverSubreg,
321
- SubRegs);
302
+ return getRegClassWithShiftedSubregs (RC, Offset, CoverSubreg, SubRegs);
322
303
323
304
// Otherwise find subreg with maximum required alignment and shift it and all
324
305
// other subregs to the rightmost possible position with respect to the
@@ -344,7 +325,7 @@ GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
344
325
llvm_unreachable (" misaligned subreg" );
345
326
346
327
unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
347
- return getRegClassWithShiftedSubregs (RC, RShift, End - RShift, 0 , SubRegs);
328
+ return getRegClassWithShiftedSubregs (RC, RShift, 0 , SubRegs);
348
329
}
349
330
350
331
// Only the subrange's lanemasks of the original interval need to be modified.
@@ -390,7 +371,7 @@ void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
390
371
return ;
391
372
}
392
373
393
- if (unsigned NewSubReg = I->second . SubReg )
374
+ if (unsigned NewSubReg = I->second )
394
375
NewLI.createSubRangeFrom (Allocator,
395
376
TRI->getSubRegIndexLaneMask (NewSubReg), SR);
396
377
else // This is the covering subreg (0 index) - set it as main range.
@@ -404,53 +385,23 @@ void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
404
385
LIS->removeInterval (OldReg);
405
386
}
406
387
407
- const TargetRegisterClass *
408
- GCNRewritePartialRegUsesImpl::getOperandRegClass (MachineOperand &MO) const {
409
- MachineInstr *MI = MO.getParent ();
410
- return TII->getRegClass (TII->get (MI->getOpcode ()), MI->getOperandNo (&MO), TRI,
411
- *MI->getParent ()->getParent ());
412
- }
413
-
414
388
bool GCNRewritePartialRegUsesImpl::rewriteReg (Register Reg) const {
415
- auto Range = MRI->reg_nodbg_operands (Reg);
416
- if (Range.empty () || any_of (Range, [](MachineOperand &MO) {
417
- return MO.getSubReg () == AMDGPU::NoSubRegister; // Whole reg used. [1]
418
- }))
389
+
390
+ // Collect used subregs.
391
+ SubRegMap SubRegs;
392
+ for (MachineOperand &MO : MRI->reg_nodbg_operands (Reg)) {
393
+ if (MO.getSubReg () == AMDGPU::NoSubRegister)
394
+ return false ; // Whole reg used.
395
+ SubRegs.try_emplace (MO.getSubReg ());
396
+ }
397
+
398
+ if (SubRegs.empty ())
419
399
return false ;
420
400
421
401
auto *RC = MRI->getRegClass (Reg);
422
402
LLVM_DEBUG (dbgs () << " Try to rewrite partial reg " << printReg (Reg, TRI)
423
403
<< ' :' << TRI->getRegClassName (RC) << ' \n ' );
424
404
425
- // Collect used subregs and their reg classes infered from instruction
426
- // operands.
427
- SubRegMap SubRegs;
428
- for (MachineOperand &MO : Range) {
429
- const unsigned SubReg = MO.getSubReg ();
430
- assert (SubReg != AMDGPU::NoSubRegister); // Due to [1].
431
- LLVM_DEBUG (dbgs () << " " << TRI->getSubRegIndexName (SubReg) << ' :' );
432
-
433
- const auto [I, Inserted] = SubRegs.try_emplace (SubReg);
434
- const TargetRegisterClass *&SubRegRC = I->second .RC ;
435
-
436
- if (Inserted)
437
- SubRegRC = TRI->getSubRegisterClass (RC, SubReg);
438
-
439
- if (SubRegRC) {
440
- if (const TargetRegisterClass *OpDescRC = getOperandRegClass (MO)) {
441
- LLVM_DEBUG (dbgs () << TRI->getRegClassName (SubRegRC) << " & "
442
- << TRI->getRegClassName (OpDescRC) << " = " );
443
- SubRegRC = TRI->getCommonSubClass (SubRegRC, OpDescRC);
444
- }
445
- }
446
-
447
- if (!SubRegRC) {
448
- LLVM_DEBUG (dbgs () << " couldn't find target regclass\n " );
449
- return false ;
450
- }
451
- LLVM_DEBUG (dbgs () << TRI->getRegClassName (SubRegRC) << ' \n ' );
452
- }
453
-
454
405
auto *NewRC = getMinSizeReg (RC, SubRegs);
455
406
if (!NewRC) {
456
407
LLVM_DEBUG (dbgs () << " No improvement achieved\n " );
@@ -469,9 +420,9 @@ bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
469
420
// TODO: create some DI shift expression?
470
421
if (MO.isDebug () && MO.getSubReg () == 0 )
471
422
continue ;
472
- unsigned SubReg = SubRegs[MO.getSubReg ()]. SubReg ;
473
- MO.setSubReg (SubReg );
474
- if (SubReg == AMDGPU::NoSubRegister && MO.isDef ())
423
+ unsigned NewSubReg = SubRegs[MO.getSubReg ()];
424
+ MO.setSubReg (NewSubReg );
425
+ if (NewSubReg == AMDGPU::NoSubRegister && MO.isDef ())
475
426
MO.setIsUndef (false );
476
427
}
477
428
0 commit comments