Skip to content

Commit 0ec524b

Browse files
authored
[AMDGPU][MC][True16] Support V_RCP/SQRT/RSQ/LOG/EXP_F16. (#81131)
[AMDGPU][MC][True16] Support V_RCP/SQRT/RSQ/LOG/EXP_F16. Also add missing v_ceil/floor_f16 tests. Includes #80892.
1 parent 17a1b8f commit 0ec524b

21 files changed

+2375
-743
lines changed

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,10 +933,15 @@ defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_
933933
defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
934934
defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
935935
defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
936+
defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
936937
defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
938+
defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
937939
defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
940+
defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
938941
defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
942+
defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
939943
defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
944+
defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
940945
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
941946
defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
942947
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;

llvm/test/MC/AMDGPU/gfx11_asm_vop1-fake16.s

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,225 @@ v_ceil_f16 v5, src_scc
9393

9494
v_ceil_f16 v127, 0xfe0b
9595
// GFX11: encoding: [0xff,0xb8,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
96+
97+
v_rcp_f16 v5, v1
98+
// GFX11: encoding: [0x01,0xa9,0x0a,0x7e]
99+
100+
v_rcp_f16 v5, v127
101+
// GFX11: encoding: [0x7f,0xa9,0x0a,0x7e]
102+
103+
v_rcp_f16 v5, s1
104+
// GFX11: encoding: [0x01,0xa8,0x0a,0x7e]
105+
106+
v_rcp_f16 v5, s105
107+
// GFX11: encoding: [0x69,0xa8,0x0a,0x7e]
108+
109+
v_rcp_f16 v5, vcc_lo
110+
// GFX11: encoding: [0x6a,0xa8,0x0a,0x7e]
111+
112+
v_rcp_f16 v5, vcc_hi
113+
// GFX11: encoding: [0x6b,0xa8,0x0a,0x7e]
114+
115+
v_rcp_f16 v5, ttmp15
116+
// GFX11: encoding: [0x7b,0xa8,0x0a,0x7e]
117+
118+
v_rcp_f16 v5, m0
119+
// GFX11: encoding: [0x7d,0xa8,0x0a,0x7e]
120+
121+
v_rcp_f16 v5, exec_lo
122+
// GFX11: encoding: [0x7e,0xa8,0x0a,0x7e]
123+
124+
v_rcp_f16 v5, exec_hi
125+
// GFX11: encoding: [0x7f,0xa8,0x0a,0x7e]
126+
127+
v_rcp_f16 v5, null
128+
// GFX11: encoding: [0x7c,0xa8,0x0a,0x7e]
129+
130+
v_rcp_f16 v5, -1
131+
// GFX11: encoding: [0xc1,0xa8,0x0a,0x7e]
132+
133+
v_rcp_f16 v5, 0.5
134+
// GFX11: encoding: [0xf0,0xa8,0x0a,0x7e]
135+
136+
v_rcp_f16 v5, src_scc
137+
// GFX11: encoding: [0xfd,0xa8,0x0a,0x7e]
138+
139+
v_rcp_f16 v127, 0xfe0b
140+
// GFX11: encoding: [0xff,0xa8,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
141+
142+
v_sqrt_f16 v5, v1
143+
// GFX11: encoding: [0x01,0xab,0x0a,0x7e]
144+
145+
v_sqrt_f16 v5, v127
146+
// GFX11: encoding: [0x7f,0xab,0x0a,0x7e]
147+
148+
v_sqrt_f16 v5, s1
149+
// GFX11: encoding: [0x01,0xaa,0x0a,0x7e]
150+
151+
v_sqrt_f16 v5, s105
152+
// GFX11: encoding: [0x69,0xaa,0x0a,0x7e]
153+
154+
v_sqrt_f16 v5, vcc_lo
155+
// GFX11: encoding: [0x6a,0xaa,0x0a,0x7e]
156+
157+
v_sqrt_f16 v5, vcc_hi
158+
// GFX11: encoding: [0x6b,0xaa,0x0a,0x7e]
159+
160+
v_sqrt_f16 v5, ttmp15
161+
// GFX11: encoding: [0x7b,0xaa,0x0a,0x7e]
162+
163+
v_sqrt_f16 v5, m0
164+
// GFX11: encoding: [0x7d,0xaa,0x0a,0x7e]
165+
166+
v_sqrt_f16 v5, exec_lo
167+
// GFX11: encoding: [0x7e,0xaa,0x0a,0x7e]
168+
169+
v_sqrt_f16 v5, exec_hi
170+
// GFX11: encoding: [0x7f,0xaa,0x0a,0x7e]
171+
172+
v_sqrt_f16 v5, null
173+
// GFX11: encoding: [0x7c,0xaa,0x0a,0x7e]
174+
175+
v_sqrt_f16 v5, -1
176+
// GFX11: encoding: [0xc1,0xaa,0x0a,0x7e]
177+
178+
v_sqrt_f16 v5, 0.5
179+
// GFX11: encoding: [0xf0,0xaa,0x0a,0x7e]
180+
181+
v_sqrt_f16 v5, src_scc
182+
// GFX11: encoding: [0xfd,0xaa,0x0a,0x7e]
183+
184+
v_sqrt_f16 v127, 0xfe0b
185+
// GFX11: encoding: [0xff,0xaa,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
186+
187+
v_rsq_f16 v5, v1
188+
// GFX11: encoding: [0x01,0xad,0x0a,0x7e]
189+
190+
v_rsq_f16 v5, v127
191+
// GFX11: encoding: [0x7f,0xad,0x0a,0x7e]
192+
193+
v_rsq_f16 v5, s1
194+
// GFX11: encoding: [0x01,0xac,0x0a,0x7e]
195+
196+
v_rsq_f16 v5, s105
197+
// GFX11: encoding: [0x69,0xac,0x0a,0x7e]
198+
199+
v_rsq_f16 v5, vcc_lo
200+
// GFX11: encoding: [0x6a,0xac,0x0a,0x7e]
201+
202+
v_rsq_f16 v5, vcc_hi
203+
// GFX11: encoding: [0x6b,0xac,0x0a,0x7e]
204+
205+
v_rsq_f16 v5, ttmp15
206+
// GFX11: encoding: [0x7b,0xac,0x0a,0x7e]
207+
208+
v_rsq_f16 v5, m0
209+
// GFX11: encoding: [0x7d,0xac,0x0a,0x7e]
210+
211+
v_rsq_f16 v5, exec_lo
212+
// GFX11: encoding: [0x7e,0xac,0x0a,0x7e]
213+
214+
v_rsq_f16 v5, exec_hi
215+
// GFX11: encoding: [0x7f,0xac,0x0a,0x7e]
216+
217+
v_rsq_f16 v5, null
218+
// GFX11: encoding: [0x7c,0xac,0x0a,0x7e]
219+
220+
v_rsq_f16 v5, -1
221+
// GFX11: encoding: [0xc1,0xac,0x0a,0x7e]
222+
223+
v_rsq_f16 v5, 0.5
224+
// GFX11: encoding: [0xf0,0xac,0x0a,0x7e]
225+
226+
v_rsq_f16 v5, src_scc
227+
// GFX11: encoding: [0xfd,0xac,0x0a,0x7e]
228+
229+
v_log_f16 v5, v1
230+
// GFX11: encoding: [0x01,0xaf,0x0a,0x7e]
231+
232+
v_log_f16 v5, v127
233+
// GFX11: encoding: [0x7f,0xaf,0x0a,0x7e]
234+
235+
v_log_f16 v5, s1
236+
// GFX11: encoding: [0x01,0xae,0x0a,0x7e]
237+
238+
v_log_f16 v5, s105
239+
// GFX11: encoding: [0x69,0xae,0x0a,0x7e]
240+
241+
v_log_f16 v5, vcc_lo
242+
// GFX11: encoding: [0x6a,0xae,0x0a,0x7e]
243+
244+
v_log_f16 v5, vcc_hi
245+
// GFX11: encoding: [0x6b,0xae,0x0a,0x7e]
246+
247+
v_log_f16 v5, ttmp15
248+
// GFX11: encoding: [0x7b,0xae,0x0a,0x7e]
249+
250+
v_log_f16 v5, m0
251+
// GFX11: encoding: [0x7d,0xae,0x0a,0x7e]
252+
253+
v_log_f16 v5, exec_lo
254+
// GFX11: encoding: [0x7e,0xae,0x0a,0x7e]
255+
256+
v_log_f16 v5, exec_hi
257+
// GFX11: encoding: [0x7f,0xae,0x0a,0x7e]
258+
259+
v_log_f16 v5, null
260+
// GFX11: encoding: [0x7c,0xae,0x0a,0x7e]
261+
262+
v_log_f16 v5, -1
263+
// GFX11: encoding: [0xc1,0xae,0x0a,0x7e]
264+
265+
v_log_f16 v5, 0.5
266+
// GFX11: encoding: [0xf0,0xae,0x0a,0x7e]
267+
268+
v_log_f16 v5, src_scc
269+
// GFX11: encoding: [0xfd,0xae,0x0a,0x7e]
270+
271+
v_log_f16 v127, 0xfe0b
272+
// GFX11: encoding: [0xff,0xae,0xfe,0x7e,0x0b,0xfe,0x00,0x00]
273+
274+
v_exp_f16 v5, v1
275+
// GFX11: encoding: [0x01,0xb1,0x0a,0x7e]
276+
277+
v_exp_f16 v5, v127
278+
// GFX11: encoding: [0x7f,0xb1,0x0a,0x7e]
279+
280+
v_exp_f16 v5, s1
281+
// GFX11: encoding: [0x01,0xb0,0x0a,0x7e]
282+
283+
v_exp_f16 v5, s105
284+
// GFX11: encoding: [0x69,0xb0,0x0a,0x7e]
285+
286+
v_exp_f16 v5, vcc_lo
287+
// GFX11: encoding: [0x6a,0xb0,0x0a,0x7e]
288+
289+
v_exp_f16 v5, vcc_hi
290+
// GFX11: encoding: [0x6b,0xb0,0x0a,0x7e]
291+
292+
v_exp_f16 v5, ttmp15
293+
// GFX11: encoding: [0x7b,0xb0,0x0a,0x7e]
294+
295+
v_exp_f16 v5, m0
296+
// GFX11: encoding: [0x7d,0xb0,0x0a,0x7e]
297+
298+
v_exp_f16 v5, exec_lo
299+
// GFX11: encoding: [0x7e,0xb0,0x0a,0x7e]
300+
301+
v_exp_f16 v5, exec_hi
302+
// GFX11: encoding: [0x7f,0xb0,0x0a,0x7e]
303+
304+
v_exp_f16 v5, null
305+
// GFX11: encoding: [0x7c,0xb0,0x0a,0x7e]
306+
307+
v_exp_f16 v5, -1
308+
// GFX11: encoding: [0xc1,0xb0,0x0a,0x7e]
309+
310+
v_exp_f16 v5, 0.5
311+
// GFX11: encoding: [0xf0,0xb0,0x0a,0x7e]
312+
313+
v_exp_f16 v5, src_scc
314+
// GFX11: encoding: [0xfd,0xb0,0x0a,0x7e]
315+
316+
v_exp_f16 v127, 0xfe0b
317+
// GFX11: encoding: [0xff,0xb0,0xfe,0x7e,0x0b,0xfe,0x00,0x00]

0 commit comments

Comments
 (0)