Skip to content

Commit a13ff06

Browse files
committed
AMDGPU: Remove dead arguments in test and add SGPR variants
Also cleanup to avoid the memory noise by using return values in the trivial cases.
1 parent 333aad7 commit a13ff06

File tree

1 file changed

+66
-18
lines changed

1 file changed

+66
-18
lines changed

llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll

Lines changed: 66 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,42 +3,38 @@
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
44
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck %s
55

6-
define amdgpu_gs void @test_fptrunc_round_upward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
7-
; CHECK-LABEL: test_fptrunc_round_upward:
6+
define amdgpu_gs half @v_fptrunc_round_f32_to_f16_upward(float %a) {
7+
; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward:
88
; CHECK: ; %bb.0:
99
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
1010
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
11-
; CHECK-NEXT: global_store_short v[6:7], v0, off
12-
; CHECK-NEXT: s_endpgm
11+
; CHECK-NEXT: ; return to shader part epilog
1312
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
14-
store half %res, ptr addrspace(1) %out, align 4
15-
ret void
13+
ret half %res
1614
}
1715

18-
define amdgpu_gs void @test_fptrunc_round_downward(float %a, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
19-
; CHECK-LABEL: test_fptrunc_round_downward:
16+
define amdgpu_gs half @v_fptrunc_round_f32_to_f16_downward(float %a) {
17+
; CHECK-LABEL: v_fptrunc_round_f32_to_f16_downward:
2018
; CHECK: ; %bb.0:
2119
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
2220
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
23-
; CHECK-NEXT: global_store_short v[6:7], v0, off
24-
; CHECK-NEXT: s_endpgm
21+
; CHECK-NEXT: ; return to shader part epilog
2522
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward")
26-
store half %res, ptr addrspace(1) %out, align 4
27-
ret void
23+
ret half %res
2824
}
2925

30-
define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float %b, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
31-
; CHECK-LABEL: test_fptrunc_round_upward_multiple_calls:
26+
define amdgpu_gs void @v_fptrunc_round_f32_to_f16_upward_multiple_calls(float %a, float %b, ptr addrspace(1) %out) {
27+
; CHECK-LABEL: v_fptrunc_round_f32_to_f16_upward_multiple_calls:
3228
; CHECK: ; %bb.0:
3329
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
3430
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
35-
; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v1
31+
; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v1
3632
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
3733
; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
3834
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
39-
; CHECK-NEXT: v_add_f16_e32 v0, v0, v2
35+
; CHECK-NEXT: v_add_f16_e32 v0, v0, v4
4036
; CHECK-NEXT: v_add_f16_e32 v0, v1, v0
41-
; CHECK-NEXT: global_store_short v[7:8], v0, off
37+
; CHECK-NEXT: global_store_short v[2:3], v0, off
4238
; CHECK-NEXT: s_endpgm
4339
%res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
4440
%res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward")
@@ -49,4 +45,56 @@ define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float
4945
ret void
5046
}
5147

52-
declare half @llvm.fptrunc.round.f16.f32(float, metadata)
48+
define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addrspace(1) %out) {
49+
; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward:
50+
; CHECK: ; %bb.0:
51+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
52+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
53+
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
54+
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
55+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
56+
; CHECK-NEXT: ; return to shader part epilog
57+
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
58+
%bitcast = bitcast half %res to i16
59+
%ret = zext i16 %bitcast to i32
60+
ret i32 %ret
61+
}
62+
63+
define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr addrspace(1) %out) {
64+
; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward:
65+
; CHECK: ; %bb.0:
66+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
67+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
68+
; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
69+
; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
70+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
71+
; CHECK-NEXT: ; return to shader part epilog
72+
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward")
73+
%bitcast = bitcast half %res to i16
74+
%ret = zext i16 %bitcast to i32
75+
ret i32 %ret
76+
}
77+
78+
define amdgpu_gs void @s_fptrunc_round_f32_to_f16_upward_multiple_calls(float inreg %a, float inreg %b, ptr addrspace(1) %out) {
79+
; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward_multiple_calls:
80+
; CHECK: ; %bb.0:
81+
; CHECK-NEXT: v_mov_b32_e32 v2, s0
82+
; CHECK-NEXT: v_mov_b32_e32 v3, s1
83+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
84+
; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2
85+
; CHECK-NEXT: v_cvt_f16_f32_e32 v4, v3
86+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
87+
; CHECK-NEXT: v_cvt_f16_f32_e32 v3, v3
88+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
89+
; CHECK-NEXT: v_add_f16_e32 v2, v2, v4
90+
; CHECK-NEXT: v_add_f16_e32 v2, v3, v2
91+
; CHECK-NEXT: global_store_short v[0:1], v2, off
92+
; CHECK-NEXT: s_endpgm
93+
%res1 = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
94+
%res2 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.upward")
95+
%res3 = call half @llvm.fptrunc.round.f16.f32(float %b, metadata !"round.downward")
96+
%res4 = fadd half %res1, %res2
97+
%res5 = fadd half %res3, %res4
98+
store half %res5, ptr addrspace(1) %out, align 4
99+
ret void
100+
}

0 commit comments

Comments
 (0)