Skip to content

Commit eb82363

Browse files
authored
[AMDGPU] Group multiple single use producers under one single use instruction. (#90713)
Previously each single use producer would be marked with a "S_SINGLEUSE_VDST 1" instruction. This patch adds support for larger immediates that encode multiple single use producers into one S_SINGLEUSE_VDST instruction.
1 parent e71eacc commit eb82363

File tree

2 files changed

+639
-25
lines changed

2 files changed

+639
-25
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp

Lines changed: 113 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616

1717
#include "AMDGPU.h"
1818
#include "GCNSubtarget.h"
19-
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
2019
#include "SIInstrInfo.h"
20+
#include "SIRegisterInfo.h"
2121
#include "llvm/ADT/DenseMap.h"
2222
#include "llvm/ADT/STLExtras.h"
23+
#include "llvm/ADT/SmallVector.h"
2324
#include "llvm/ADT/StringRef.h"
2425
#include "llvm/CodeGen/MachineBasicBlock.h"
2526
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,10 +29,11 @@
2829
#include "llvm/CodeGen/MachineInstrBuilder.h"
2930
#include "llvm/CodeGen/MachineOperand.h"
3031
#include "llvm/CodeGen/Register.h"
31-
#include "llvm/CodeGen/TargetSubtargetInfo.h"
3232
#include "llvm/IR/DebugLoc.h"
3333
#include "llvm/MC/MCRegister.h"
34+
#include "llvm/MC/MCRegisterInfo.h"
3435
#include "llvm/Pass.h"
36+
#include <array>
3537

3638
using namespace llvm;
3739

@@ -41,17 +43,110 @@ namespace {
4143
class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
4244
private:
4345
const SIInstrInfo *SII;
46+
class SingleUseInstruction {
47+
private:
48+
static const unsigned MaxSkipRange = 0b111;
49+
static const unsigned MaxNumberOfSkipRegions = 2;
50+
51+
unsigned LastEncodedPositionEnd;
52+
MachineInstr *ProducerInstr;
53+
54+
std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
55+
SmallVector<unsigned, MaxNumberOfSkipRegions> SkipRegions;
56+
57+
// Adds a skip region into the instruction.
58+
void skip(const unsigned ProducerPosition) {
59+
while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
60+
SkipRegions.push_back(MaxSkipRange);
61+
LastEncodedPositionEnd += MaxSkipRange;
62+
}
63+
SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
64+
LastEncodedPositionEnd = ProducerPosition;
65+
}
66+
67+
bool currentRegionHasSpace() {
68+
const auto Region = SkipRegions.size();
69+
// The first region has an extra bit of encoding space.
70+
return SingleUseRegions[Region] <
71+
((Region == MaxNumberOfSkipRegions) ? 0b1111 : 0b111);
72+
}
73+
74+
unsigned encodeImm() {
75+
// Handle the first Single Use Region separately as it has an extra bit
76+
// of encoding space.
77+
unsigned Imm = SingleUseRegions[SkipRegions.size()];
78+
unsigned ShiftAmount = 4;
79+
for (unsigned i = SkipRegions.size(); i > 0; i--) {
80+
Imm |= SkipRegions[i - 1] << ShiftAmount;
81+
ShiftAmount += 3;
82+
Imm |= SingleUseRegions[i - 1] << ShiftAmount;
83+
ShiftAmount += 3;
84+
}
85+
return Imm;
86+
}
87+
88+
public:
89+
SingleUseInstruction(const unsigned ProducerPosition,
90+
MachineInstr *Producer)
91+
: LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
92+
SingleUseRegions({1, 0, 0}) {}
93+
94+
// Returns false if adding a new single use producer failed. This happens
95+
// because it could not be encoded, either because there is no room to
96+
// encode another single use producer region or that this single use
97+
// producer is too far away to encode the amount of instructions to skip.
98+
bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
99+
// Producer is too far away to encode into this instruction or another
100+
// skip region is needed and SkipRegions.size() = 2 so there's no room for
101+
// another skip region, therefore a new instruction is needed.
102+
if (LastEncodedPositionEnd +
103+
(MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
104+
ProducerPosition)
105+
return false;
106+
107+
// If a skip region is needed.
108+
if (LastEncodedPositionEnd != ProducerPosition ||
109+
!currentRegionHasSpace()) {
110+
// If the current region is out of space therefore a skip region would
111+
// be needed, but there is no room for another skip region.
112+
if (SkipRegions.size() == MaxNumberOfSkipRegions)
113+
return false;
114+
skip(ProducerPosition);
115+
}
116+
117+
SingleUseRegions[SkipRegions.size()]++;
118+
LastEncodedPositionEnd = ProducerPosition + 1;
119+
ProducerInstr = MI;
120+
return true;
121+
}
122+
123+
auto emit(const SIInstrInfo *SII) {
124+
return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
125+
SII->get(AMDGPU::S_SINGLEUSE_VDST))
126+
.addImm(encodeImm());
127+
}
128+
};
44129

45130
public:
46131
static char ID;
47132

48133
AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
49134

50-
void emitSingleUseVDST(MachineInstr &MI) const {
51-
// Mark the following instruction as a single-use producer:
52-
// s_singleuse_vdst { supr0: 1 }
53-
BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
54-
.addImm(0x1);
135+
void insertSingleUseInstructions(
136+
ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
137+
SmallVector<SingleUseInstruction> Instructions;
138+
139+
for (auto &[Position, MI] : SingleUseProducers) {
140+
// Encode this position into the last single use instruction if possible.
141+
if (Instructions.empty() ||
142+
!Instructions.back().tryAddProducer(Position, MI)) {
143+
// If not, add a new instruction.
144+
Instructions.push_back(SingleUseInstruction(Position, MI));
145+
}
146+
}
147+
148+
for (auto &Instruction : Instructions)
149+
Instruction.emit(SII);
55150
}
56151

57152
bool runOnMachineFunction(MachineFunction &MF) override {
@@ -78,6 +173,10 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
78173
}
79174
}
80175

176+
SmallVector<std::pair<unsigned, MachineInstr *>>
177+
SingleUseProducerPositions;
178+
179+
unsigned VALUInstrCount = 0;
81180
for (MachineInstr &MI : reverse(MBB.instrs())) {
82181
// All registers in all operands need to be single use for an
83182
// instruction to be marked as a single use producer.
@@ -119,13 +218,16 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
119218
for (auto &UsedReg : RegisterUseCount)
120219
UsedReg.second = 2;
121220
}
122-
if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
123-
// TODO: Replace with candidate logging for instruction grouping
124-
// later.
125-
emitSingleUseVDST(MI);
221+
222+
if (!SIInstrInfo::isVALU(MI))
223+
continue;
224+
if (AllProducerOperandsAreSingleUse) {
225+
SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
126226
InstructionEmitted = true;
127227
}
228+
VALUInstrCount++;
128229
}
230+
insertSingleUseInstructions(SingleUseProducerPositions);
129231
}
130232
return InstructionEmitted;
131233
}

0 commit comments

Comments
 (0)