3
3
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
4
4
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
5
5
6
- define amdgpu_gs void @test_fptrunc_round_upward (float %a , i32 %data0 , < 4 x i32 > %data1 , ptr addrspace ( 1 ) %out ) {
7
- ; CHECK-LABEL: test_fptrunc_round_upward :
6
+ define amdgpu_gs half @v_fptrunc_round_f32_to_f16_upward (float %a ) {
7
+ ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward :
8
8
; CHECK: ; %bb.0:
9
9
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
10
10
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
11
- ; CHECK-NEXT: global_store_short v[6:7], v0, off
12
- ; CHECK-NEXT: s_endpgm
11
+ ; CHECK-NEXT: ; return to shader part epilog
13
12
%res = call half @llvm.fptrunc.round.f16.f32 (float %a , metadata !"round.upward" )
14
- store half %res , ptr addrspace (1 ) %out , align 4
15
- ret void
13
+ ret half %res
16
14
}
17
15
18
- define amdgpu_gs void @test_fptrunc_round_downward (float %a , i32 %data0 , < 4 x i32 > %data1 , ptr addrspace ( 1 ) %out ) {
19
- ; CHECK-LABEL: test_fptrunc_round_downward :
16
+ define amdgpu_gs half @v_fptrunc_round_f32_to_f16_downward (float %a ) {
17
+ ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward :
20
18
; CHECK: ; %bb.0:
21
19
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
22
20
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
23
- ; CHECK-NEXT: global_store_short v[6:7], v0, off
24
- ; CHECK-NEXT: s_endpgm
21
+ ; CHECK-NEXT: ; return to shader part epilog
25
22
%res = call half @llvm.fptrunc.round.f16.f32 (float %a , metadata !"round.downward" )
26
- store half %res , ptr addrspace (1 ) %out , align 4
27
- ret void
23
+ ret half %res
28
24
}
29
25
30
- define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls (float %a , float %b , i32 %data0 , < 4 x i32 > %data1 , ptr addrspace (1 ) %out ) {
31
- ; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls :
26
+ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls (float %a , float %b , ptr addrspace (1 ) %out ) {
27
+ ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls :
32
28
; CHECK: ; %bb.0:
33
29
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
34
30
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
35
- ; CHECK-NEXT: v_cvt_f16_f32_e32 v2 , v1
31
+ ; CHECK-NEXT: v_cvt_f16_f32_e32 v4 , v1
36
32
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
37
33
; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
38
34
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
39
- ; CHECK-NEXT: v_add_f16_e32 v0, v0, v2
35
+ ; CHECK-NEXT: v_add_f16_e32 v0, v0, v4
40
36
; CHECK-NEXT: v_add_f16_e32 v0, v1, v0
41
- ; CHECK-NEXT: global_store_short v[7:8 ], v0, off
37
+ ; CHECK-NEXT: global_store_short v[2:3 ], v0, off
42
38
; CHECK-NEXT: s_endpgm
43
39
%res1 = call half @llvm.fptrunc.round.f16.f32 (float %a , metadata !"round.upward" )
44
40
%res2 = call half @llvm.fptrunc.round.f16.f32 (float %b , metadata !"round.upward" )
@@ -49,4 +45,56 @@ define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float
49
45
ret void
50
46
}
51
47
52
- declare half @llvm.fptrunc.round.f16.f32 (float , metadata )
48
+ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward (float inreg %a , ptr addrspace (1 ) %out ) {
49
+ ; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward:
50
+ ; CHECK: ; %bb.0:
51
+ ; CHECK-NEXT: v_mov_b32_e32 v0, s0
52
+ ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
53
+ ; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
54
+ ; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
55
+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
56
+ ; CHECK-NEXT: ; return to shader part epilog
57
+ %res = call half @llvm.fptrunc.round.f16.f32 (float %a , metadata !"round.upward" )
58
+ %bitcast = bitcast half %res to i16
59
+ %ret = zext i16 %bitcast to i32
60
+ ret i32 %ret
61
+ }
62
+
63
+ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward (float inreg %a , ptr addrspace (1 ) %out ) {
64
+ ; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward:
65
+ ; CHECK: ; %bb.0:
66
+ ; CHECK-NEXT: v_mov_b32_e32 v0, s0
67
+ ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
68
+ ; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
69
+ ; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
70
+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
71
+ ; CHECK-NEXT: ; return to shader part epilog
72
+ %res = call half @llvm.fptrunc.round.f16.f32 (float %a , metadata !"round.downward" )
73
+ %bitcast = bitcast half %res to i16
74
+ %ret = zext i16 %bitcast to i32
75
+ ret i32 %ret
76
+ }
77
+
78
+ define amdgpu_gs void @s_fptrunc_round_f32_to_f16_upward_multiple_calls (float inreg %a , float inreg %b , ptr addrspace (1 ) %out ) {
79
+ ; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls:
80
+ ; CHECK: ; %bb.0:
81
+ ; CHECK-NEXT: v_mov_b32_e32 v2, s0
82
+ ; CHECK-NEXT: v_mov_b32_e32 v3, s1
83
+ ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
84
+ ; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2
85
+ ; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v3
86
+ ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
87
+ ; CHECK-NEXT: v_cvt_f16_f32_e32 v3, v3
88
+ ; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
89
+ ; CHECK-NEXT: v_add_f16_e32 v2, v2, v4
90
+ ; CHECK-NEXT: v_add_f16_e32 v2, v3, v2
91
+ ; CHECK-NEXT: global_store_short v[0:1], v2, off
92
+ ; CHECK-NEXT: s_endpgm
93
+ %res1 = call half @llvm.fptrunc.round.f16.f32 (float %a , metadata !"round.upward" )
94
+ %res2 = call half @llvm.fptrunc.round.f16.f32 (float %b , metadata !"round.upward" )
95
+ %res3 = call half @llvm.fptrunc.round.f16.f32 (float %b , metadata !"round.downward" )
96
+ %res4 = fadd half %res1 , %res2
97
+ %res5 = fadd half %res3 , %res4
98
+ store half %res5 , ptr addrspace (1 ) %out , align 4
99
+ ret void
100
+ }
0 commit comments