Skip to content

Commit f465a2c

Browse files
authored
[AMDGPU] Add new 64-bit SALU instructions (#74449)
1 parent 6704d6a commit f465a2c

File tree

4 files changed

+491
-0
lines changed

4 files changed

+491
-0
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,20 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32",
578578
} // End isCommutable = 1
579579
} // End Defs = [SCC]
580580

581+
let SubtargetPredicate = isGFX12Plus in {
582+
def S_ADD_U64 : SOP2_64<"s_add_u64">{
583+
let isCommutable = 1;
584+
}
585+
586+
def S_SUB_U64 : SOP2_64<"s_sub_u64">;
587+
588+
def S_MUL_U64 : SOP2_64 <"s_mul_u64",
589+
[(set i64:$sdst, (UniformBinFrag<mul> i64:$src0, i64:$src1))]> {
590+
let isCommutable = 1;
591+
}
592+
593+
} // End SubtargetPredicate = isGFX12Plus
594+
581595
def SelectPat : PatFrag <
582596
(ops node:$src1, node:$src2),
583597
(select SCC, $src1, $src2),
@@ -2072,6 +2086,9 @@ defm S_MUL_HI_I32 : SOP2_Real_gfx11_gfx12<0x02e>;
20722086
defm S_CSELECT_B32 : SOP2_Real_gfx11_gfx12<0x030>;
20732087
defm S_CSELECT_B64 : SOP2_Real_gfx11_gfx12<0x031>;
20742088
defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11_gfx12<0x035>;
2089+
defm S_ADD_NC_U64 : SOP2_Real_Renamed_gfx12<0x053, S_ADD_U64, "s_add_nc_u64">;
2090+
defm S_SUB_NC_U64 : SOP2_Real_Renamed_gfx12<0x054, S_SUB_U64, "s_sub_nc_u64">;
2091+
defm S_MUL_U64 : SOP2_Real_gfx12<0x055>;
20752092

20762093
//===----------------------------------------------------------------------===//
20772094
// SOP2 - GFX1150, GFX12

llvm/test/MC/AMDGPU/gfx12_asm_sop2.s

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,239 @@
11
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck -check-prefix=GFX12 %s
22

3+
s_add_nc_u64 s[0:1], s[2:3], s[4:5]
4+
// GFX12: encoding: [0x02,0x04,0x80,0xa9]
5+
6+
s_add_nc_u64 s[100:101], s[102:103], s[104:105]
7+
// GFX12: encoding: [0x66,0x68,0xe4,0xa9]
8+
9+
s_add_nc_u64 s[0:1], s[104:105], s[102:103]
10+
// GFX12: encoding: [0x68,0x66,0x80,0xa9]
11+
12+
s_add_nc_u64 s[104:105], s[0:1], s[102:103]
13+
// GFX12: encoding: [0x00,0x66,0xe8,0xa9]
14+
15+
s_add_nc_u64 s[104:105], s[102:103], s[2:3]
16+
// GFX12: encoding: [0x66,0x02,0xe8,0xa9]
17+
18+
s_add_nc_u64 s[104:105], s[0:1], s[2:3]
19+
// GFX12: encoding: [0x00,0x02,0xe8,0xa9]
20+
21+
s_add_nc_u64 s[0:1], s[102:103], s[2:3]
22+
// GFX12: encoding: [0x66,0x02,0x80,0xa9]
23+
24+
s_add_nc_u64 s[0:1], s[2:3], s[102:103]
25+
// GFX12: encoding: [0x02,0x66,0x80,0xa9]
26+
27+
s_add_nc_u64 exec, s[0:1], s[2:3]
28+
// GFX12: encoding: [0x00,0x02,0xfe,0xa9]
29+
30+
s_add_nc_u64 vcc, s[0:1], s[2:3]
31+
// GFX12: encoding: [0x00,0x02,0xea,0xa9]
32+
33+
s_add_nc_u64 s[0:1], exec, s[2:3]
34+
// GFX12: encoding: [0x7e,0x02,0x80,0xa9]
35+
36+
s_add_nc_u64 s[0:1], vcc, s[2:3]
37+
// GFX12: encoding: [0x6a,0x02,0x80,0xa9]
38+
39+
s_add_nc_u64 s[0:1], 0, s[2:3]
40+
// GFX12: encoding: [0x80,0x02,0x80,0xa9]
41+
42+
s_add_nc_u64 s[0:1], -1, s[2:3]
43+
// GFX12: encoding: [0xc1,0x02,0x80,0xa9]
44+
45+
s_add_nc_u64 s[0:1], 0.5, s[2:3]
46+
// GFX12: encoding: [0xf0,0x02,0x80,0xa9]
47+
48+
s_add_nc_u64 s[0:1], -4.0, s[2:3]
49+
// GFX12: encoding: [0xf7,0x02,0x80,0xa9]
50+
51+
s_add_nc_u64 s[0:1], 0x3f717273, s[2:3]
52+
// GFX12: encoding: [0xff,0x02,0x80,0xa9,0x73,0x72,0x71,0x3f]
53+
54+
s_add_nc_u64 s[0:1], 0xaf123456, s[2:3]
55+
// GFX12: encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]
56+
57+
s_add_nc_u64 s[0:1], s[2:3], exec
58+
// GFX12: encoding: [0x02,0x7e,0x80,0xa9]
59+
60+
s_add_nc_u64 s[0:1], s[2:3], vcc
61+
// GFX12: encoding: [0x02,0x6a,0x80,0xa9]
62+
63+
s_add_nc_u64 s[0:1], s[2:3], 0
64+
// GFX12: encoding: [0x02,0x80,0x80,0xa9]
65+
66+
s_add_nc_u64 s[0:1], s[2:3], -1
67+
// GFX12: encoding: [0x02,0xc1,0x80,0xa9]
68+
69+
s_add_nc_u64 s[0:1], s[2:3], 0.5
70+
// GFX12: encoding: [0x02,0xf0,0x80,0xa9]
71+
72+
s_add_nc_u64 s[0:1], s[2:3], -4.0
73+
// GFX12: encoding: [0x02,0xf7,0x80,0xa9]
74+
75+
s_add_nc_u64 s[0:1], s[2:3], 0x3f717273
76+
// GFX12: encoding: [0x02,0xff,0x80,0xa9,0x73,0x72,0x71,0x3f]
77+
78+
s_add_nc_u64 s[0:1], s[2:3], 0xaf123456
79+
// GFX12: encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]
80+
81+
s_sub_nc_u64 s[0:1], s[2:3], s[4:5]
82+
// GFX12: encoding: [0x02,0x04,0x00,0xaa]
83+
84+
s_sub_nc_u64 s[100:101], s[102:103], s[104:105]
85+
// GFX12: encoding: [0x66,0x68,0x64,0xaa]
86+
87+
s_sub_nc_u64 s[0:1], s[104:105], s[102:103]
88+
// GFX12: encoding: [0x68,0x66,0x00,0xaa]
89+
90+
s_sub_nc_u64 s[104:105], s[0:1], s[102:103]
91+
// GFX12: encoding: [0x00,0x66,0x68,0xaa]
92+
93+
s_sub_nc_u64 s[104:105], s[102:103], s[2:3]
94+
// GFX12: encoding: [0x66,0x02,0x68,0xaa]
95+
96+
s_sub_nc_u64 s[104:105], s[0:1], s[2:3]
97+
// GFX12: encoding: [0x00,0x02,0x68,0xaa]
98+
99+
s_sub_nc_u64 s[0:1], s[102:103], s[2:3]
100+
// GFX12: encoding: [0x66,0x02,0x00,0xaa]
101+
102+
s_sub_nc_u64 s[0:1], s[2:3], s[102:103]
103+
// GFX12: encoding: [0x02,0x66,0x00,0xaa]
104+
105+
s_sub_nc_u64 exec, s[0:1], s[2:3]
106+
// GFX12: encoding: [0x00,0x02,0x7e,0xaa]
107+
108+
s_sub_nc_u64 vcc, s[0:1], s[2:3]
109+
// GFX12: encoding: [0x00,0x02,0x6a,0xaa]
110+
111+
s_sub_nc_u64 s[0:1], exec, s[2:3]
112+
// GFX12: encoding: [0x7e,0x02,0x00,0xaa]
113+
114+
s_sub_nc_u64 s[0:1], vcc, s[2:3]
115+
// GFX12: encoding: [0x6a,0x02,0x00,0xaa]
116+
117+
s_sub_nc_u64 s[0:1], 0, s[2:3]
118+
// GFX12: encoding: [0x80,0x02,0x00,0xaa]
119+
120+
s_sub_nc_u64 s[0:1], -1, s[2:3]
121+
// GFX12: encoding: [0xc1,0x02,0x00,0xaa]
122+
123+
s_sub_nc_u64 s[0:1], 0.5, s[2:3]
124+
// GFX12: encoding: [0xf0,0x02,0x00,0xaa]
125+
126+
s_sub_nc_u64 s[0:1], -4.0, s[2:3]
127+
// GFX12: encoding: [0xf7,0x02,0x00,0xaa]
128+
129+
s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3]
130+
// GFX12: encoding: [0xff,0x02,0x00,0xaa,0x73,0x72,0x71,0x3f]
131+
132+
s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3]
133+
// GFX12: encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]
134+
135+
s_sub_nc_u64 s[0:1], s[2:3], exec
136+
// GFX12: encoding: [0x02,0x7e,0x00,0xaa]
137+
138+
s_sub_nc_u64 s[0:1], s[2:3], vcc
139+
// GFX12: encoding: [0x02,0x6a,0x00,0xaa]
140+
141+
s_sub_nc_u64 s[0:1], s[2:3], 0
142+
// GFX12: encoding: [0x02,0x80,0x00,0xaa]
143+
144+
s_sub_nc_u64 s[0:1], s[2:3], -1
145+
// GFX12: encoding: [0x02,0xc1,0x00,0xaa]
146+
147+
s_sub_nc_u64 s[0:1], s[2:3], 0.5
148+
// GFX12: encoding: [0x02,0xf0,0x00,0xaa]
149+
150+
s_sub_nc_u64 s[0:1], s[2:3], -4.0
151+
// GFX12: encoding: [0x02,0xf7,0x00,0xaa]
152+
153+
s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273
154+
// GFX12: encoding: [0x02,0xff,0x00,0xaa,0x73,0x72,0x71,0x3f]
155+
156+
s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456
157+
// GFX12: encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]
158+
159+
s_mul_u64 s[0:1], s[2:3], s[4:5]
160+
// GFX12: encoding: [0x02,0x04,0x80,0xaa]
161+
162+
s_mul_u64 s[100:101], s[102:103], s[104:105]
163+
// GFX12: encoding: [0x66,0x68,0xe4,0xaa]
164+
165+
s_mul_u64 s[0:1], s[104:105], s[102:103]
166+
// GFX12: encoding: [0x68,0x66,0x80,0xaa]
167+
168+
s_mul_u64 s[104:105], s[0:1], s[102:103]
169+
// GFX12: encoding: [0x00,0x66,0xe8,0xaa]
170+
171+
s_mul_u64 s[104:105], s[102:103], s[2:3]
172+
// GFX12: encoding: [0x66,0x02,0xe8,0xaa]
173+
174+
s_mul_u64 s[104:105], s[0:1], s[2:3]
175+
// GFX12: encoding: [0x00,0x02,0xe8,0xaa]
176+
177+
s_mul_u64 s[0:1], s[102:103], s[2:3]
178+
// GFX12: encoding: [0x66,0x02,0x80,0xaa]
179+
180+
s_mul_u64 s[0:1], s[2:3], s[102:103]
181+
// GFX12: encoding: [0x02,0x66,0x80,0xaa]
182+
183+
s_mul_u64 exec, s[0:1], s[2:3]
184+
// GFX12: encoding: [0x00,0x02,0xfe,0xaa]
185+
186+
s_mul_u64 vcc, s[0:1], s[2:3]
187+
// GFX12: encoding: [0x00,0x02,0xea,0xaa]
188+
189+
s_mul_u64 s[0:1], exec, s[2:3]
190+
// GFX12: encoding: [0x7e,0x02,0x80,0xaa]
191+
192+
s_mul_u64 s[0:1], vcc, s[2:3]
193+
// GFX12: encoding: [0x6a,0x02,0x80,0xaa]
194+
195+
s_mul_u64 s[0:1], 0, s[2:3]
196+
// GFX12: encoding: [0x80,0x02,0x80,0xaa]
197+
198+
s_mul_u64 s[0:1], -1, s[2:3]
199+
// GFX12: encoding: [0xc1,0x02,0x80,0xaa]
200+
201+
s_mul_u64 s[0:1], 0.5, s[2:3]
202+
// GFX12: encoding: [0xf0,0x02,0x80,0xaa]
203+
204+
s_mul_u64 s[0:1], -4.0, s[2:3]
205+
// GFX12: encoding: [0xf7,0x02,0x80,0xaa]
206+
207+
s_mul_u64 s[0:1], 0x3f717273, s[2:3]
208+
// GFX12: encoding: [0xff,0x02,0x80,0xaa,0x73,0x72,0x71,0x3f]
209+
210+
s_mul_u64 s[0:1], 0xaf123456, s[2:3]
211+
// GFX12: encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]
212+
213+
s_mul_u64 s[0:1], s[2:3], exec
214+
// GFX12: encoding: [0x02,0x7e,0x80,0xaa]
215+
216+
s_mul_u64 s[0:1], s[2:3], vcc
217+
// GFX12: encoding: [0x02,0x6a,0x80,0xaa]
218+
219+
s_mul_u64 s[0:1], s[2:3], 0
220+
// GFX12: encoding: [0x02,0x80,0x80,0xaa]
221+
222+
s_mul_u64 s[0:1], s[2:3], -1
223+
// GFX12: encoding: [0x02,0xc1,0x80,0xaa]
224+
225+
s_mul_u64 s[0:1], s[2:3], 0.5
226+
// GFX12: encoding: [0x02,0xf0,0x80,0xaa]
227+
228+
s_mul_u64 s[0:1], s[2:3], -4.0
229+
// GFX12: encoding: [0x02,0xf7,0x80,0xaa]
230+
231+
s_mul_u64 s[0:1], s[2:3], 0x3f717273
232+
// GFX12: encoding: [0x02,0xff,0x80,0xaa,0x73,0x72,0x71,0x3f]
233+
234+
s_mul_u64 s[0:1], s[2:3], 0xaf123456
235+
// GFX12: encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]
236+
3237
s_add_f32 s5, s1, s2
4238
// GFX12: encoding: [0x01,0x02,0x05,0xa0]
5239

llvm/test/MC/AMDGPU/gfx12_asm_sop2_alias.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ s_add_i32 s0, s1, s2
66
s_add_u32 s0, s1, s2
77
// GFX12: encoding: [0x01,0x02,0x00,0x80]
88

9+
s_add_u64 s[0:1], s[2:3], s[4:5]
10+
// GFX12: encoding: [0x02,0x04,0x80,0xa9]
11+
912
s_addc_u32 s0, s1, s2
1013
// GFX12: encoding: [0x01,0x02,0x00,0x82]
1114

@@ -15,6 +18,9 @@ s_sub_i32 s0, s1, s2
1518
s_sub_u32 s0, s1, s2
1619
// GFX12: encoding: [0x01,0x02,0x80,0x80]
1720

21+
s_sub_u64 s[0:1], s[2:3], s[4:5]
22+
// GFX12: encoding: [0x02,0x04,0x00,0xaa]
23+
1824
s_subb_u32 s0, s1, s2
1925
// GFX12: encoding: [0x01,0x02,0x80,0x82]
2026

0 commit comments

Comments
 (0)