Skip to content

Commit edf2d0a

Browse files
authored
[AMDGPU] Introduce a pseudo mnemonic for S_DELAY_ALU in MIR. (#96004)
1 parent 9473e16 commit edf2d0a

File tree

5 files changed

+585
-2
lines changed

5 files changed

+585
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,157 @@
1717

1818
using namespace llvm;
1919

20+
void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
21+
std::optional<unsigned int> OpIdx, int64_t Imm) const {
22+
23+
switch (MI.getOpcode()) {
24+
case AMDGPU::S_DELAY_ALU:
25+
assert(OpIdx == 0);
26+
printSDelayAluImm(Imm, OS);
27+
break;
28+
default:
29+
MIRFormatter::printImm(OS, MI, OpIdx, Imm);
30+
break;
31+
}
32+
}
33+
34+
/// Implement target specific parsing of immediate mnemonics. The mnemonic is
35+
/// a string with a leading dot.
36+
bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
37+
const unsigned OpIdx,
38+
StringRef Src, int64_t &Imm,
39+
ErrorCallbackType ErrorCallback) const
40+
{
41+
42+
switch (OpCode) {
43+
case AMDGPU::S_DELAY_ALU:
44+
return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback);
45+
default:
46+
break;
47+
}
48+
return true; // Don't know what this is
49+
}
50+
51+
void AMDGPUMIRFormatter::printSDelayAluImm(int64_t Imm,
52+
llvm::raw_ostream &OS) const {
53+
// Construct an immediate string to represent the information encoded in the
54+
// s_delay_alu immediate.
55+
// .id0_<dep>[_skip_<count>_id1<dep>]
56+
constexpr int64_t None = 0;
57+
constexpr int64_t Same = 0;
58+
59+
uint64_t Id0 = (Imm & 0xF);
60+
uint64_t Skip = ((Imm >> 4) & 0x7);
61+
uint64_t Id1 = ((Imm >> 7) & 0xF);
62+
auto Outdep = [&](uint64_t Id) {
63+
if (Id == None)
64+
OS << "NONE";
65+
else if (Id < 5)
66+
OS << "VALU_DEP_" << Id;
67+
else if (Id < 8)
68+
OS << "TRANS32_DEP_" << Id - 4;
69+
else
70+
OS << "SALU_CYCLE_" << Id - 8;
71+
};
72+
73+
OS << ".id0_";
74+
Outdep(Id0);
75+
76+
// If the second inst is "same" and "none", no need to print the rest of the
77+
// string.
78+
if (Skip == Same && Id1 == None)
79+
return;
80+
81+
// Encode the second delay specification.
82+
OS << "_skip_";
83+
if (Skip == 0)
84+
OS << "SAME";
85+
else if (Skip == 1)
86+
OS << "NEXT";
87+
else
88+
OS << "SKIP_" << Skip - 1;
89+
90+
OS << "_id1_";
91+
Outdep(Id1);
92+
}
93+
94+
bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic(
95+
const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src,
96+
llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const
97+
{
98+
assert(OpIdx == 0);
99+
100+
Imm = 0;
101+
bool Expected = Src.consume_front(".id0_");
102+
if (!Expected)
103+
return ErrorCallback(Src.begin(), "Expected .id0_");
104+
105+
auto ExpectInt = [&](StringRef &Src, int64_t Offset) -> int64_t {
106+
int64_t Dep;
107+
if (!Src.consumeInteger(10, Dep))
108+
return Dep + Offset;
109+
110+
return -1;
111+
};
112+
113+
auto DecodeDelay = [&](StringRef &Src) -> int64_t {
114+
if (Src.consume_front("NONE"))
115+
return 0;
116+
if (Src.consume_front("VALU_DEP_"))
117+
return ExpectInt(Src, 0);
118+
if (Src.consume_front("TRANS32_DEP_"))
119+
return ExpectInt(Src, 4);
120+
if (Src.consume_front("SALU_CYCLE_"))
121+
return ExpectInt(Src, 8);
122+
123+
return -1;
124+
};
125+
126+
int64_t Delay0 = DecodeDelay(Src);
127+
int64_t Skip = 0;
128+
int64_t Delay1 = 0;
129+
if (Delay0 == -1)
130+
return ErrorCallback(Src.begin(), "Could not decode delay0");
131+
132+
133+
// Set the Imm so far, to that early return has the correct value.
134+
Imm = Delay0;
135+
136+
// If that was the end of the string, the second instruction is "same" and
137+
// "none"
138+
if (Src.begin() == Src.end())
139+
return false;
140+
141+
Expected = Src.consume_front("_skip_");
142+
if (!Expected)
143+
return ErrorCallback(Src.begin(), "Expected _skip_");
144+
145+
146+
if (Src.consume_front("SAME")) {
147+
Skip = 0;
148+
} else if (Src.consume_front("NEXT")) {
149+
Skip = 1;
150+
} else if (Src.consume_front("SKIP_")) {
151+
if (Src.consumeInteger(10, Skip)) {
152+
return ErrorCallback(Src.begin(), "Expected integer Skip value");
153+
}
154+
Skip += 1;
155+
} else {
156+
ErrorCallback(Src.begin(), "Unexpected Skip Value");
157+
}
158+
159+
Expected = Src.consume_front("_id1_");
160+
if (!Expected)
161+
return ErrorCallback(Src.begin(), "Expected _id1_");
162+
163+
Delay1 = DecodeDelay(Src);
164+
if (Delay1 == -1)
165+
return ErrorCallback(Src.begin(), "Could not decode delay1");
166+
167+
Imm = Imm | (Skip << 4) | (Delay1 << 7);
168+
return false;
169+
}
170+
20171
bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue(
21172
StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS,
22173
const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const {

llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,35 @@ class AMDGPUMIRFormatter final : public MIRFormatter {
2828
AMDGPUMIRFormatter() = default;
2929
virtual ~AMDGPUMIRFormatter() = default;
3030

31+
/// Implement target specific printing for machine operand immediate value, so
32+
/// that we can have more meaningful mnemonic than a 64-bit integer. Passing
33+
/// None to OpIdx means the index is unknown.
34+
virtual void printImm(raw_ostream &OS, const MachineInstr &MI,
35+
std::optional<unsigned> OpIdx,
36+
int64_t Imm) const override;
37+
38+
/// Implement target specific parsing of immediate mnemonics. The mnemonic is
39+
/// a string with a leading dot.
40+
virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
41+
StringRef Src, int64_t &Imm,
42+
ErrorCallbackType ErrorCallback) const override;
43+
3144
/// Implement target specific parsing of target custom pseudo source value.
3245
bool
3346
parseCustomPseudoSourceValue(StringRef Src, MachineFunction &MF,
3447
PerFunctionMIParsingState &PFS,
3548
const PseudoSourceValue *&PSV,
3649
ErrorCallbackType ErrorCallback) const override;
50+
51+
private:
52+
/// Print the string to represent s_delay_alu immediate value
53+
void printSDelayAluImm(int64_t Imm, llvm::raw_ostream &OS) const;
54+
55+
/// Parse the immediate pseudo literal for s_delay_alu
56+
bool parseSDelayAluImmMnemonic(
57+
const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src,
58+
llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const;
59+
3760
};
3861

3962
} // end namespace llvm
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s
3+
4+
---
5+
name: valu_dep_1
6+
body: |
7+
bb.0:
8+
; CHECK-LABEL: name: valu_dep_1
9+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
10+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1
11+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
12+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
13+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
14+
...
15+
16+
---
17+
name: valu_dep_2
18+
body: |
19+
bb.0:
20+
; CHECK-LABEL: name: valu_dep_2
21+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
22+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
23+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2
24+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
25+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
26+
$vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
27+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
28+
...
29+
30+
---
31+
name: valu_dep_3
32+
body: |
33+
bb.0:
34+
; CHECK-LABEL: name: valu_dep_3
35+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
36+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
37+
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
38+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_3
39+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
40+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
41+
$vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
42+
$vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
43+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
44+
...
45+
46+
---
47+
name: valu_dep_4
48+
body: |
49+
bb.0:
50+
; CHECK-LABEL: name: valu_dep_4
51+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
52+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
53+
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
54+
; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
55+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_4
56+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
57+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
58+
$vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
59+
$vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec
60+
$vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
61+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
62+
...
63+
64+
---
65+
name: trans32_dep_1
66+
body: |
67+
bb.0:
68+
; CHECK-LABEL: name: trans32_dep_1
69+
; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
70+
; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1
71+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
72+
$vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
73+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
74+
...
75+
76+
---
77+
name: trans32_dep_2
78+
body: |
79+
bb.0:
80+
; CHECK-LABEL: name: trans32_dep_2
81+
; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
82+
; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
83+
; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_2
84+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
85+
$vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
86+
$vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
87+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
88+
...
89+
90+
---
91+
name: trans32_dep_3
92+
body: |
93+
bb.0:
94+
; CHECK-LABEL: name: trans32_dep_3
95+
; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
96+
; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
97+
; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
98+
; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_3
99+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
100+
$vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
101+
$vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode
102+
$vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode
103+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
104+
...
105+
106+
---
107+
name: salu_cycle_1
108+
body: |
109+
bb.0:
110+
; CHECK-LABEL: name: salu_cycle_1
111+
; CHECK: $sgpr0 = S_MOV_B32 0
112+
; CHECK-NEXT: S_DELAY_ALU .id0_SALU_CYCLE_1
113+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
114+
$sgpr0 = S_MOV_B32 0
115+
$vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
116+
...
117+
118+
---
119+
name: valu_dep_1_same_trans32_dep_1
120+
body: |
121+
bb.0:
122+
; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1
123+
; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
124+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
125+
; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1
126+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
127+
$vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode
128+
$vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
129+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
130+
...
131+
132+
---
133+
name: valu_dep_1_same_salu_cycle_1
134+
body: |
135+
bb.0:
136+
; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1
137+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
138+
; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
139+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1
140+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
141+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
142+
$sgpr0 = S_MOV_B32 0
143+
$vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec
144+
...
145+
146+
---
147+
name: valu_dep_1_next_valu_dep_1
148+
body: |
149+
bb.0:
150+
; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1
151+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
152+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1
153+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
154+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
155+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
156+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
157+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
158+
...
159+
160+
---
161+
name: valu_dep_2_next_valu_dep_2
162+
body: |
163+
bb.0:
164+
; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2
165+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
166+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
167+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2_skip_NEXT_id1_VALU_DEP_2
168+
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
169+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
170+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
171+
$vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
172+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
173+
$vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
174+
...
175+
176+
---
177+
name: valu_dep_2_skip_valu_dep_2
178+
body: |
179+
bb.0:
180+
; CHECK-LABEL: name: valu_dep_2_skip_valu_dep_2
181+
; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
182+
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
183+
; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2_skip_SKIP_1_id1_VALU_DEP_2
184+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
185+
; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
186+
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
187+
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
188+
$vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
189+
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
190+
$vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec
191+
$vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
192+
...

0 commit comments

Comments
 (0)