1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3
3
- ; GCN-LABEL: {{^}}test_i128_vreg:
4
- ; GCN: v_add_i32_e32 v[[LO:[0-9]+]], vcc,
5
- ; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
6
- ; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
7
- ; GCN-NEXT: v_addc_u32_e32 v[[HI:[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
8
- ; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI]]],
9
4
define amdgpu_kernel void @test_i128_vreg (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %inA , ptr addrspace (1 ) noalias %inB ) {
5
+ ; GCN-LABEL: test_i128_vreg:
6
+ ; GCN: ; %bb.0:
7
+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
8
+ ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
9
+ ; GCN-NEXT: s_mov_b32 s11, 0xf000
10
+ ; GCN-NEXT: s_mov_b32 s14, 0
11
+ ; GCN-NEXT: v_lshlrev_b32_e32 v4, 4, v0
12
+ ; GCN-NEXT: v_mov_b32_e32 v5, 0
13
+ ; GCN-NEXT: s_mov_b32 s15, s11
14
+ ; GCN-NEXT: s_mov_b64 s[6:7], s[14:15]
15
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
16
+ ; GCN-NEXT: s_mov_b64 s[12:13], s[2:3]
17
+ ; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[12:15], 0 addr64
18
+ ; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[4:5], s[4:7], 0 addr64
19
+ ; GCN-NEXT: s_mov_b32 s10, -1
20
+ ; GCN-NEXT: s_mov_b32 s8, s0
21
+ ; GCN-NEXT: s_mov_b32 s9, s1
22
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
23
+ ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
24
+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
25
+ ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc
26
+ ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
27
+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
28
+ ; GCN-NEXT: s_endpgm
10
29
%tid = call i32 @llvm.amdgcn.workitem.id.x () readnone
11
30
%a_ptr = getelementptr i128 , ptr addrspace (1 ) %inA , i32 %tid
12
31
%b_ptr = getelementptr i128 , ptr addrspace (1 ) %inB , i32 %tid
@@ -18,36 +37,74 @@ define amdgpu_kernel void @test_i128_vreg(ptr addrspace(1) noalias %out, ptr add
18
37
}
19
38
20
39
; Check that the SGPR add operand is correctly moved to a VGPR.
21
- ; GCN-LABEL: {{^}}sgpr_operand:
22
- ; GCN: s_add_u32
23
- ; GCN: s_addc_u32
24
- ; GCN: s_addc_u32
25
- ; GCN: s_addc_u32
26
40
define amdgpu_kernel void @sgpr_operand (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , i128 %a ) {
41
+ ; GCN-LABEL: sgpr_operand:
42
+ ; GCN: ; %bb.0:
43
+ ; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
44
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
45
+ ; GCN-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
46
+ ; GCN-NEXT: s_mov_b32 s3, 0xf000
47
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
48
+ ; GCN-NEXT: s_add_u32 s4, s8, s4
49
+ ; GCN-NEXT: s_addc_u32 s5, s9, s5
50
+ ; GCN-NEXT: s_addc_u32 s6, s10, s6
51
+ ; GCN-NEXT: s_addc_u32 s7, s11, s7
52
+ ; GCN-NEXT: s_mov_b32 s2, -1
53
+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
54
+ ; GCN-NEXT: v_mov_b32_e32 v1, s5
55
+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
56
+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
57
+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
58
+ ; GCN-NEXT: s_endpgm
27
59
%foo = load i128 , ptr addrspace (1 ) %in , align 8
28
60
%result = add i128 %foo , %a
29
61
store i128 %result , ptr addrspace (1 ) %out
30
62
ret void
31
63
}
32
64
33
- ; GCN-LABEL: {{^}}sgpr_operand_reversed:
34
- ; GCN: s_add_u32
35
- ; GCN: s_addc_u32
36
- ; GCN: s_addc_u32
37
- ; GCN: s_addc_u32
38
65
define amdgpu_kernel void @sgpr_operand_reversed (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , i128 %a ) {
66
+ ; GCN-LABEL: sgpr_operand_reversed:
67
+ ; GCN: ; %bb.0:
68
+ ; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
69
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
70
+ ; GCN-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
71
+ ; GCN-NEXT: s_mov_b32 s3, 0xf000
72
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
73
+ ; GCN-NEXT: s_add_u32 s4, s4, s8
74
+ ; GCN-NEXT: s_addc_u32 s5, s5, s9
75
+ ; GCN-NEXT: s_addc_u32 s6, s6, s10
76
+ ; GCN-NEXT: s_addc_u32 s7, s7, s11
77
+ ; GCN-NEXT: s_mov_b32 s2, -1
78
+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
79
+ ; GCN-NEXT: v_mov_b32_e32 v1, s5
80
+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
81
+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
82
+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
83
+ ; GCN-NEXT: s_endpgm
39
84
%foo = load i128 , ptr addrspace (1 ) %in , align 8
40
85
%result = add i128 %a , %foo
41
86
store i128 %result , ptr addrspace (1 ) %out
42
87
ret void
43
88
}
44
89
45
- ; GCN-LABEL: {{^}}test_sreg:
46
- ; GCN: s_add_u32
47
- ; GCN: s_addc_u32
48
- ; GCN: s_addc_u32
49
- ; GCN: s_addc_u32
50
90
define amdgpu_kernel void @test_sreg (ptr addrspace (1 ) noalias %out , i128 %a , i128 %b ) {
91
+ ; GCN-LABEL: test_sreg:
92
+ ; GCN: ; %bb.0:
93
+ ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xb
94
+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
95
+ ; GCN-NEXT: s_mov_b32 s3, 0xf000
96
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
97
+ ; GCN-NEXT: s_add_u32 s4, s8, s12
98
+ ; GCN-NEXT: s_addc_u32 s5, s9, s13
99
+ ; GCN-NEXT: s_addc_u32 s6, s10, s14
100
+ ; GCN-NEXT: s_addc_u32 s7, s11, s15
101
+ ; GCN-NEXT: s_mov_b32 s2, -1
102
+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
103
+ ; GCN-NEXT: v_mov_b32_e32 v1, s5
104
+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
105
+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
106
+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
107
+ ; GCN-NEXT: s_endpgm
51
108
%result = add i128 %a , %b
52
109
store i128 %result , ptr addrspace (1 ) %out
53
110
ret void
0 commit comments