@@ -266,16 +266,51 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
266
266
ret void
267
267
}
268
268
269
- ; FIXME
270
- ; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
271
- ; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
272
- ; ret <3 x half> %res
273
- ; }
269
+ define amdgpu_gs <3 x half > @v_fptrunc_round_v3f32_to_v3f16_upward (<3 x float > %a ) {
270
+ ; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
271
+ ; SDAG: ; %bb.0:
272
+ ; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
273
+ ; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
274
+ ; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
275
+ ; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
276
+ ; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
277
+ ; SDAG-NEXT: ; return to shader part epilog
278
+ ;
279
+ ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
280
+ ; GISEL: ; %bb.0:
281
+ ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
282
+ ; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
283
+ ; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
284
+ ; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
285
+ ; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
286
+ ; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
287
+ ; GISEL-NEXT: ; return to shader part epilog
288
+ %res = call <3 x half > @llvm.fptrunc.round.v3f16.v3f32 (<3 x float > %a , metadata !"round.upward" )
289
+ ret <3 x half > %res
290
+ }
274
291
275
- ; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
276
- ; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
277
- ; ret <3 x half> %res
278
- ; }
292
+ define amdgpu_gs <3 x half > @v_fptrunc_round_v3f32_to_v3f16_downward (<3 x float > %a ) {
293
+ ; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
294
+ ; SDAG: ; %bb.0:
295
+ ; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
296
+ ; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
297
+ ; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
298
+ ; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
299
+ ; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
300
+ ; SDAG-NEXT: ; return to shader part epilog
301
+ ;
302
+ ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
303
+ ; GISEL: ; %bb.0:
304
+ ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
305
+ ; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
306
+ ; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
307
+ ; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
308
+ ; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
309
+ ; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
310
+ ; GISEL-NEXT: ; return to shader part epilog
311
+ %res = call <3 x half > @llvm.fptrunc.round.v3f16.v3f32 (<3 x float > %a , metadata !"round.downward" )
312
+ ret <3 x half > %res
313
+ }
279
314
280
315
define amdgpu_gs <4 x half > @v_fptrunc_round_v4f32_to_v4f16_upward (<4 x float > %a ) {
281
316
; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward:
0 commit comments