Skip to content

Commit 667ba7f

Browse files
authored
[AMDGPU] Fix GCNRewritePartialRegUses pass: vector regclass is selected instead of scalar. (#69957)
For the following testcase: undef %1.sub1:sgpr_96 = COPY undef %0:sgpr_32 %3:vgpr_32 = V_LSHL_ADD_U32_e64 %1.sub1:sgpr_96, ... GCNRewritePartialRegUses produced: %4:vgpr_32 = COPY undef %1:sgpr_32 dead %2:vgpr_32 = V_LSHL_ADD_U32_e64 %4, ... Register class for %4 is incorrect: there should be sgpr_32 instead of vgpr_32 because the original %1 had scalar regclass. This patch fixes that. Note that GCNRewritePartialRegUses pass isn't enabled by default yet.
1 parent 206799f commit 667ba7f

File tree

3 files changed

+83
-75
lines changed

3 files changed

+83
-75
lines changed

llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -101,17 +101,16 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
101101
/// find new regclass such that:
102102
/// 1. It has subregs obtained by shifting each OldSubReg by RShift number
103103
/// of bits to the right. Every "shifted" subreg should have the same
104-
/// SubRegRC. SubRegRC can be null, in this case it initialized using
105-
/// getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
106-
/// "covers" all other subregs in pairs. Basically such subreg becomes a
107-
/// whole register.
104+
/// SubRegRC. If CoverSubregIdx is not zero it's a subreg that "covers"
105+
/// all other subregs in pairs. Basically such subreg becomes a whole
106+
/// register.
108107
/// 2. Resulting register class contains registers of minimal size but not
109108
/// less than RegNumBits.
110109
///
111110
/// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
112111
/// parameter:
113112
/// OldSubReg - input parameter,
114-
/// SubRegRC - in/out, should be changed for unknown regclass,
113+
/// SubRegRC - input parameter (cannot be null),
115114
/// NewSubReg - output, contains shifted subregs on return.
116115
const TargetRegisterClass *
117116
getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
@@ -228,26 +227,16 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
228227
BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
229228
for (auto &[OldSubReg, SRI] : SubRegs) {
230229
auto &[SubRegRC, NewSubReg] = SRI;
231-
232-
// Register class may be unknown, for example:
233-
// undef %0.sub4:sgpr_1024 = S_MOV_B32 01
234-
// %0.sub5:sgpr_1024 = S_MOV_B32 02
235-
// %1:vreg_64 = COPY %0.sub4_sub5
236-
// Register classes for subregs 'sub4' and 'sub5' are known from the
237-
// description of destination operand of S_MOV_B32 instruction but the
238-
// class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
239-
if (!SubRegRC)
240-
SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
241-
242-
if (!SubRegRC)
243-
return nullptr;
230+
assert(SubRegRC);
244231

245232
LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':'
246233
<< TRI->getRegClassName(SubRegRC)
247234
<< (SubRegRC->isAllocatable() ? "" : " not alloc")
248235
<< " -> ");
249236

250237
if (OldSubReg == CoverSubregIdx) {
238+
// Covering subreg will become a full register, RC should be allocatable.
239+
assert(SubRegRC->isAllocatable());
251240
NewSubReg = AMDGPU::NoSubRegister;
252241
LLVM_DEBUG(dbgs() << "whole reg");
253242
} else {
@@ -421,33 +410,42 @@ GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
421410

422411
bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
423412
auto Range = MRI->reg_nodbg_operands(Reg);
424-
if (Range.begin() == Range.end())
413+
if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
414+
return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
415+
}))
425416
return false;
426417

427-
for (MachineOperand &MO : Range) {
428-
if (MO.getSubReg() == AMDGPU::NoSubRegister) // Whole reg used, quit.
429-
return false;
430-
}
431-
432418
auto *RC = MRI->getRegClass(Reg);
433419
LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
434420
<< ':' << TRI->getRegClassName(RC) << '\n');
435421

436-
// Collect used subregs and constrained reg classes infered from instruction
422+
// Collect used subregs and their reg classes infered from instruction
437423
// operands.
438424
SubRegMap SubRegs;
439-
for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
440-
assert(MO.getSubReg() != AMDGPU::NoSubRegister);
441-
auto *OpDescRC = getOperandRegClass(MO);
442-
const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
443-
if (!Inserted && OpDescRC) {
444-
SubRegInfo &SRI = I->second;
445-
SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
446-
if (!SRI.RC) {
447-
LLVM_DEBUG(dbgs() << " Couldn't find common target regclass\n");
448-
return false;
425+
for (MachineOperand &MO : Range) {
426+
const unsigned SubReg = MO.getSubReg();
427+
assert(SubReg != AMDGPU::NoSubRegister); // Due to [1].
428+
LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(SubReg) << ':');
429+
430+
const auto [I, Inserted] = SubRegs.try_emplace(SubReg);
431+
const TargetRegisterClass *&SubRegRC = I->second.RC;
432+
433+
if (Inserted)
434+
SubRegRC = TRI->getSubRegisterClass(RC, SubReg);
435+
436+
if (SubRegRC) {
437+
if (const TargetRegisterClass *OpDescRC = getOperandRegClass(MO)) {
438+
LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << " & "
439+
<< TRI->getRegClassName(OpDescRC) << " = ");
440+
SubRegRC = TRI->getCommonSubClass(SubRegRC, OpDescRC);
449441
}
450442
}
443+
444+
if (!SubRegRC) {
445+
LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");
446+
return false;
447+
}
448+
LLVM_DEBUG(dbgs() << TRI->getRegClassName(SubRegRC) << '\n');
451449
}
452450

453451
auto *NewRC = getMinSizeReg(RC, SubRegs);

llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4341,9 +4341,9 @@ tracksRegLiveness: true
43414341
body: |
43424342
bb.0:
43434343
; CHECK-LABEL: name: test_sgpr_64_w32
4344-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4344+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
43454345
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4346-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4346+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
43474347
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
43484348
undef %0.sub0:sgpr_64 = S_MOV_B32 00
43494349
S_NOP 0, implicit %0.sub0
@@ -4358,11 +4358,11 @@ tracksRegLiveness: true
43584358
body: |
43594359
bb.0:
43604360
; CHECK-LABEL: name: test_sgpr_96_w32
4361-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4361+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
43624362
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4363-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4363+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
43644364
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4365-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 22
4365+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
43664366
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
43674367
undef %0.sub0:sgpr_96 = S_MOV_B32 00
43684368
S_NOP 0, implicit %0.sub0
@@ -4381,11 +4381,11 @@ tracksRegLiveness: true
43814381
body: |
43824382
bb.0:
43834383
; CHECK-LABEL: name: test_sgpr_128_w32
4384-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4384+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
43854385
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4386-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4386+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
43874387
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4388-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 23
4388+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
43894389
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
43904390
undef %0.sub0:sgpr_128 = S_MOV_B32 00
43914391
S_NOP 0, implicit %0.sub0
@@ -4425,11 +4425,11 @@ tracksRegLiveness: true
44254425
body: |
44264426
bb.0:
44274427
; CHECK-LABEL: name: test_sgpr_160_w32
4428-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4428+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
44294429
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4430-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4430+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
44314431
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4432-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 24
4432+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
44334433
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
44344434
undef %0.sub0:sgpr_160 = S_MOV_B32 00
44354435
S_NOP 0, implicit %0.sub0
@@ -4450,11 +4450,11 @@ tracksRegLiveness: true
44504450
body: |
44514451
bb.0:
44524452
; CHECK-LABEL: name: test_sgpr_192_w32
4453-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4453+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
44544454
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4455-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4455+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
44564456
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4457-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 25
4457+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
44584458
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
44594459
undef %0.sub0:sgpr_192 = S_MOV_B32 00
44604460
S_NOP 0, implicit %0.sub0
@@ -4503,11 +4503,11 @@ tracksRegLiveness: true
45034503
body: |
45044504
bb.0:
45054505
; CHECK-LABEL: name: test_sgpr_224_w32
4506-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4506+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
45074507
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4508-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4508+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
45094509
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4510-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 26
4510+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
45114511
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
45124512
undef %0.sub0:sgpr_224 = S_MOV_B32 00
45134513
S_NOP 0, implicit %0.sub0
@@ -4530,11 +4530,11 @@ tracksRegLiveness: true
45304530
body: |
45314531
bb.0:
45324532
; CHECK-LABEL: name: test_sgpr_256_w32
4533-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4533+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
45344534
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4535-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4535+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
45364536
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4537-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 27
4537+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
45384538
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
45394539
undef %0.sub0:sgpr_256 = S_MOV_B32 00
45404540
S_NOP 0, implicit %0.sub0
@@ -4612,11 +4612,11 @@ tracksRegLiveness: true
46124612
body: |
46134613
bb.0:
46144614
; CHECK-LABEL: name: test_sgpr_288_w32
4615-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4615+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
46164616
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4617-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4617+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
46184618
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4619-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 28
4619+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
46204620
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
46214621
undef %0.sub0:sgpr_288 = S_MOV_B32 00
46224622
S_NOP 0, implicit %0.sub0
@@ -4672,11 +4672,11 @@ tracksRegLiveness: true
46724672
body: |
46734673
bb.0:
46744674
; CHECK-LABEL: name: test_sgpr_320_w32
4675-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4675+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
46764676
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4677-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4677+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
46784678
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4679-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 29
4679+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
46804680
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
46814681
undef %0.sub0:sgpr_320 = S_MOV_B32 00
46824682
S_NOP 0, implicit %0.sub0
@@ -4763,11 +4763,11 @@ tracksRegLiveness: true
47634763
body: |
47644764
bb.0:
47654765
; CHECK-LABEL: name: test_sgpr_352_w32
4766-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4766+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
47674767
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4768-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4768+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
47694769
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4770-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 210
4770+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 210
47714771
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
47724772
undef %0.sub0:sgpr_352 = S_MOV_B32 00
47734773
S_NOP 0, implicit %0.sub0
@@ -4791,11 +4791,11 @@ tracksRegLiveness: true
47914791
body: |
47924792
bb.0:
47934793
; CHECK-LABEL: name: test_sgpr_384_w32
4794-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4794+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
47954795
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4796-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4796+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
47974797
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4798-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 211
4798+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 211
47994799
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
48004800
undef %0.sub0:sgpr_384 = S_MOV_B32 00
48014801
S_NOP 0, implicit %0.sub0
@@ -4929,11 +4929,11 @@ tracksRegLiveness: true
49294929
body: |
49304930
bb.0:
49314931
; CHECK-LABEL: name: test_sgpr_512_w32
4932-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
4932+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
49334933
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
4934-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
4934+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
49354935
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
4936-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 215
4936+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 215
49374937
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
49384938
undef %0.sub0:sgpr_512 = S_MOV_B32 00
49394939
S_NOP 0, implicit %0.sub0
@@ -5086,11 +5086,11 @@ tracksRegLiveness: true
50865086
body: |
50875087
bb.0:
50885088
; CHECK-LABEL: name: test_sgpr_1024_w32
5089-
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
5089+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
50905090
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_]]
5091-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11
5091+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
50925092
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]]
5093-
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 231
5093+
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 231
50945094
; CHECK-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]]
50955095
undef %0.sub0:sgpr_1024 = S_MOV_B32 00
50965096
S_NOP 0, implicit %0.sub0

llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
3-
# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs %s -o /dev/null 2>&1
43
---
54
name: test_subregs_composition_vreg_1024
65
tracksRegLiveness: true
@@ -82,9 +81,20 @@ tracksRegLiveness: true
8281
body: |
8382
bb.0:
8483
; CHECK-LABEL: name: test_subregs_regclass_defined_by_dst_operand_sreg_64_xexec
85-
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
84+
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
8685
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
8786
undef %0.sub2_sub3:sgpr_128 = S_LOAD_DWORDX2_IMM undef %1:sreg_64, 0, 0
8887
%2:vreg_64 = COPY %0.sub2_sub3:sgpr_128
8988
...
89+
---
90+
name: test_vgpr_selected_instead_of_sgpr_because_use_allows_both
91+
tracksRegLiveness: true
92+
body: |
93+
bb.0:
94+
; CHECK-LABEL: name: test_vgpr_selected_instead_of_sgpr_because_use_allows_both
95+
; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY undef %1:sgpr_32
96+
; CHECK-NEXT: dead [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[COPY]], 2, undef %3:vgpr_32, implicit $exec
97+
undef %1.sub1:sgpr_96 = COPY undef %0:sgpr_32
98+
%3:vgpr_32 = V_LSHL_ADD_U32_e64 %1.sub1:sgpr_96, 2, undef %2:vgpr_32, implicit $exec
99+
...
90100

0 commit comments

Comments
 (0)