Skip to content

Commit e21b7e2

Browse files
authored
[AMDGPU][NFC] Check more autogenerated llc tests for COV5 (#75219)
Regenerate a few more llc tests to check for COV5 instead of the default ABI version.
1 parent fde91d1 commit e21b7e2

24 files changed

+1222
-1172
lines changed

llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
1111
; GCN-LABEL: name: extract_w_offset_vgpr
1212
; GCN: bb.0.entry:
1313
; GCN-NEXT: successors: %bb.1(0x80000000)
14-
; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
14+
; GCN-NEXT: liveins: $vgpr0, $sgpr2_sgpr3
1515
; GCN-NEXT: {{ $}}
1616
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0
17-
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
17+
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
1818
; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1
1919
; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
2020
; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440
@@ -109,3 +109,6 @@ entry:
109109
store i32 %value, ptr addrspace(1) %out
110110
ret void
111111
}
112+
113+
!llvm.module.flags = !{!0}
114+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll

Lines changed: 58 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -52,44 +52,44 @@ define <2 x i64> @f1() #0 {
5252
define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) {
5353
; GFX11-LABEL: f2:
5454
; GFX11: ; %bb.0: ; %bb
55-
; GFX11-NEXT: s_mov_b64 s[16:17], s[4:5]
55+
; GFX11-NEXT: s_load_b32 s21, s[2:3], 0x24
5656
; GFX11-NEXT: v_mov_b32_e32 v31, v0
57-
; GFX11-NEXT: s_load_b32 s24, s[16:17], 0x24
5857
; GFX11-NEXT: s_mov_b32 s12, s13
59-
; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7]
60-
; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
61-
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
58+
; GFX11-NEXT: s_mov_b64 s[10:11], s[4:5]
6259
; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1]
63-
; GFX11-NEXT: s_mov_b32 s3, 0
60+
; GFX11-NEXT: s_mov_b32 s6, 0
61+
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
6462
; GFX11-NEXT: s_mov_b32 s0, -1
65-
; GFX11-NEXT: s_mov_b32 s18, exec_lo
63+
; GFX11-NEXT: s_mov_b32 s20, exec_lo
6664
; GFX11-NEXT: s_mov_b32 s32, 0
6765
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
68-
; GFX11-NEXT: v_mul_lo_u32 v0, s24, v0
66+
; GFX11-NEXT: v_mul_lo_u32 v0, s21, v0
6967
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
7068
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
7169
; GFX11-NEXT: s_cbranch_execz .LBB2_13
7270
; GFX11-NEXT: ; %bb.1: ; %bb14
73-
; GFX11-NEXT: s_load_b128 s[20:23], s[16:17], 0x2c
74-
; GFX11-NEXT: s_mov_b32 s19, 0
71+
; GFX11-NEXT: s_load_b128 s[16:19], s[2:3], 0x2c
7572
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
76-
; GFX11-NEXT: s_bitcmp1_b32 s21, 0
77-
; GFX11-NEXT: s_cselect_b32 s25, -1, 0
78-
; GFX11-NEXT: s_bitcmp0_b32 s21, 0
73+
; GFX11-NEXT: s_bitcmp1_b32 s17, 0
74+
; GFX11-NEXT: s_cselect_b32 s22, -1, 0
75+
; GFX11-NEXT: s_bitcmp0_b32 s17, 0
76+
; GFX11-NEXT: s_mov_b32 s17, 0
7977
; GFX11-NEXT: s_cbranch_scc0 .LBB2_3
8078
; GFX11-NEXT: ; %bb.2: ; %bb15
81-
; GFX11-NEXT: s_add_u32 s8, s16, 0x58
82-
; GFX11-NEXT: s_addc_u32 s9, s17, 0
79+
; GFX11-NEXT: s_add_u32 s8, s2, 0x58
80+
; GFX11-NEXT: s_addc_u32 s9, s3, 0
8381
; GFX11-NEXT: s_getpc_b64 s[0:1]
8482
; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4
8583
; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12
8684
; GFX11-NEXT: s_mov_b32 s13, s14
8785
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
88-
; GFX11-NEXT: s_mov_b32 s3, s14
86+
; GFX11-NEXT: s_mov_b32 s23, s14
8987
; GFX11-NEXT: s_mov_b32 s14, s15
88+
; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
9089
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
9190
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
92-
; GFX11-NEXT: s_mov_b32 s14, s3
91+
; GFX11-NEXT: s_mov_b32 s14, s23
92+
; GFX11-NEXT: s_mov_b64 s[2:3], s[6:7]
9393
; GFX11-NEXT: s_mov_b32 s1, -1
9494
; GFX11-NEXT: s_cbranch_execz .LBB2_4
9595
; GFX11-NEXT: s_branch .LBB2_12
@@ -98,66 +98,66 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
9898
; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
9999
; GFX11-NEXT: s_cbranch_vccnz .LBB2_12
100100
; GFX11-NEXT: .LBB2_4: ; %bb16
101-
; GFX11-NEXT: s_load_b32 s2, s[16:17], 0x54
102-
; GFX11-NEXT: s_bitcmp1_b32 s23, 0
101+
; GFX11-NEXT: s_load_b32 s6, s[2:3], 0x54
102+
; GFX11-NEXT: s_bitcmp1_b32 s19, 0
103103
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
104-
; GFX11-NEXT: s_and_b32 s3, s23, 1
104+
; GFX11-NEXT: s_and_b32 s7, s19, 1
105105
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
106-
; GFX11-NEXT: s_bitcmp1_b32 s2, 0
107-
; GFX11-NEXT: s_mov_b32 s2, -1
106+
; GFX11-NEXT: s_bitcmp1_b32 s6, 0
107+
; GFX11-NEXT: s_mov_b32 s6, -1
108108
; GFX11-NEXT: s_cselect_b32 s8, -1, 0
109-
; GFX11-NEXT: s_cmp_eq_u32 s3, 0
109+
; GFX11-NEXT: s_cmp_eq_u32 s7, 0
110110
; GFX11-NEXT: s_cbranch_scc0 .LBB2_8
111111
; GFX11-NEXT: ; %bb.5: ; %bb18.preheader
112-
; GFX11-NEXT: s_load_b128 s[28:31], s[16:17], 0x44
112+
; GFX11-NEXT: s_load_b128 s[24:27], s[2:3], 0x44
113113
; GFX11-NEXT: v_mov_b32_e32 v2, 0
114114
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
115-
; GFX11-NEXT: s_mul_hi_u32 s2, s29, s28
116-
; GFX11-NEXT: s_mul_i32 s3, s29, s28
115+
; GFX11-NEXT: s_mul_hi_u32 s6, s25, s24
116+
; GFX11-NEXT: s_mul_i32 s7, s25, s24
117117
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
118-
; GFX11-NEXT: v_alignbit_b32 v0, s2, s3, 1
119-
; GFX11-NEXT: s_mov_b32 s3, 0
120-
; GFX11-NEXT: v_readfirstlane_b32 s2, v0
121-
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s25
118+
; GFX11-NEXT: v_alignbit_b32 v0, s6, s7, 1
119+
; GFX11-NEXT: s_mov_b32 s7, 0
120+
; GFX11-NEXT: v_readfirstlane_b32 s6, v0
121+
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s22
122122
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
123-
; GFX11-NEXT: s_or_b32 s2, s2, 1
124-
; GFX11-NEXT: s_lshr_b32 s2, s2, s30
123+
; GFX11-NEXT: s_or_b32 s6, s6, 1
124+
; GFX11-NEXT: s_lshr_b32 s6, s6, s26
125125
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
126-
; GFX11-NEXT: s_mul_i32 s2, s2, s22
127-
; GFX11-NEXT: s_mul_i32 s2, s2, s20
126+
; GFX11-NEXT: s_mul_i32 s6, s6, s18
127+
; GFX11-NEXT: s_mul_i32 s6, s6, s16
128128
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
129-
; GFX11-NEXT: s_or_b32 s2, s24, s2
130-
; GFX11-NEXT: s_lshl_b64 s[20:21], s[2:3], 1
131-
; GFX11-NEXT: global_load_u16 v1, v2, s[20:21]
129+
; GFX11-NEXT: s_or_b32 s6, s21, s6
130+
; GFX11-NEXT: s_lshl_b64 s[18:19], s[6:7], 1
131+
; GFX11-NEXT: global_load_u16 v1, v2, s[18:19]
132132
; GFX11-NEXT: s_waitcnt vmcnt(0)
133133
; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1
134134
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
135135
; GFX11-NEXT: .p2align 6
136136
; GFX11-NEXT: .LBB2_6: ; %bb18
137137
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
138-
; GFX11-NEXT: v_cmp_ne_u16_e64 s2, s3, 0
138+
; GFX11-NEXT: v_cmp_ne_u16_e64 s6, s7, 0
139139
; GFX11-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v2
140140
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
141-
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
141+
; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
142142
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
143143
; GFX11-NEXT: s_and_b32 vcc_lo, s8, vcc_lo
144144
; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, v3, s0
145145
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
146146
; GFX11-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo
147147
; GFX11-NEXT: s_mov_b32 vcc_lo, 0
148-
; GFX11-NEXT: v_readfirstlane_b32 s2, v3
148+
; GFX11-NEXT: v_readfirstlane_b32 s6, v3
149149
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
150150
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
151-
; GFX11-NEXT: s_bitcmp1_b32 s2, 0
152-
; GFX11-NEXT: s_cselect_b32 s2, 0x100, 0
151+
; GFX11-NEXT: s_bitcmp1_b32 s6, 0
152+
; GFX11-NEXT: s_cselect_b32 s6, 0x100, 0
153153
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
154-
; GFX11-NEXT: s_or_b32 s3, s2, s3
154+
; GFX11-NEXT: s_or_b32 s7, s6, s7
155155
; GFX11-NEXT: s_cbranch_vccz .LBB2_6
156156
; GFX11-NEXT: ; %bb.7: ; %Flow
157-
; GFX11-NEXT: s_mov_b32 s2, 0
157+
; GFX11-NEXT: s_mov_b32 s6, 0
158158
; GFX11-NEXT: .LBB2_8: ; %Flow12
159159
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
160-
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
160+
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s6
161161
; GFX11-NEXT: s_cbranch_vccz .LBB2_12
162162
; GFX11-NEXT: ; %bb.9:
163163
; GFX11-NEXT: s_xor_b32 s0, s8, -1
@@ -167,17 +167,17 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
167167
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
168168
; GFX11-NEXT: s_cbranch_vccz .LBB2_10
169169
; GFX11-NEXT: ; %bb.11: ; %Flow6
170-
; GFX11-NEXT: s_mov_b32 s19, -1
170+
; GFX11-NEXT: s_mov_b32 s17, -1
171171
; GFX11-NEXT: .LBB2_12: ; %Flow11
172-
; GFX11-NEXT: s_and_b32 s3, s1, exec_lo
173-
; GFX11-NEXT: s_or_not1_b32 s0, s19, exec_lo
172+
; GFX11-NEXT: s_and_b32 s6, s1, exec_lo
173+
; GFX11-NEXT: s_or_not1_b32 s0, s17, exec_lo
174174
; GFX11-NEXT: .LBB2_13: ; %Flow9
175-
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18
176-
; GFX11-NEXT: s_and_saveexec_b32 s18, s0
175+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s20
176+
; GFX11-NEXT: s_and_saveexec_b32 s7, s0
177177
; GFX11-NEXT: s_cbranch_execz .LBB2_15
178178
; GFX11-NEXT: ; %bb.14: ; %bb43
179-
; GFX11-NEXT: s_add_u32 s8, s16, 0x58
180-
; GFX11-NEXT: s_addc_u32 s9, s17, 0
179+
; GFX11-NEXT: s_add_u32 s8, s2, 0x58
180+
; GFX11-NEXT: s_addc_u32 s9, s3, 0
181181
; GFX11-NEXT: s_getpc_b64 s[0:1]
182182
; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4
183183
; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12
@@ -186,10 +186,10 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
186186
; GFX11-NEXT: s_mov_b32 s14, s15
187187
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
188188
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
189-
; GFX11-NEXT: s_or_b32 s3, s3, exec_lo
189+
; GFX11-NEXT: s_or_b32 s6, s6, exec_lo
190190
; GFX11-NEXT: .LBB2_15: ; %Flow14
191-
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18
192-
; GFX11-NEXT: s_and_saveexec_b32 s0, s3
191+
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s7
192+
; GFX11-NEXT: s_and_saveexec_b32 s0, s6
193193
; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock
194194
; GFX11-NEXT: ; divergent unreachable
195195
; GFX11-NEXT: ; %bb.17: ; %UnifiedReturnBlock
@@ -246,3 +246,6 @@ bb43:
246246
}
247247

248248
attributes #0 = { noinline optnone }
249+
250+
!llvm.module.flags = !{!0}
251+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,22 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
99
; CHECK: ; %bb.0:
1010
; CHECK-NEXT: s_mov_b32 s32, 0x180000
1111
; CHECK-NEXT: s_mov_b32 s33, 0
12-
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
13-
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
14-
; CHECK-NEXT: s_add_u32 s0, s0, s17
12+
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15
13+
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
14+
; CHECK-NEXT: s_add_u32 s0, s0, s15
1515
; CHECK-NEXT: s_addc_u32 s1, s1, 0
1616
; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
17-
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
18-
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
19-
; CHECK-NEXT: s_add_i32 s12, s33, 0x100200
20-
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s12 ; 4-byte Folded Spill
21-
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
22-
; CHECK-NEXT: s_mov_b32 s13, s15
23-
; CHECK-NEXT: s_mov_b32 s12, s14
24-
; CHECK-NEXT: v_readlane_b32 s14, v3, 0
25-
; CHECK-NEXT: s_mov_b64 s[16:17], s[8:9]
17+
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
2618
; CHECK-NEXT: v_mov_b32_e32 v3, v2
2719
; CHECK-NEXT: v_mov_b32_e32 v2, v1
2820
; CHECK-NEXT: v_mov_b32_e32 v1, v0
2921
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
3022
; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
3123
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s8 ; 4-byte Folded Reload
3224
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
33-
; CHECK-NEXT: s_load_dword s8, s[16:17], 0x0
25+
; CHECK-NEXT: s_load_dword s8, s[6:7], 0x0
3426
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
35-
; CHECK-NEXT: v_writelane_b32 v0, s8, 1
27+
; CHECK-NEXT: v_writelane_b32 v0, s8, 0
3628
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
3729
; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
3830
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s8 ; 4-byte Folded Spill
@@ -42,28 +34,29 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
4234
; CHECK-NEXT: ;;#ASMEND
4335
; CHECK-NEXT: s_add_i32 s8, s33, 0x100100
4436
; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s8 ; 4-byte Folded Spill
45-
; CHECK-NEXT: s_mov_b64 s[18:19], 8
46-
; CHECK-NEXT: s_mov_b32 s8, s16
47-
; CHECK-NEXT: s_mov_b32 s9, s17
48-
; CHECK-NEXT: s_mov_b32 s16, s18
49-
; CHECK-NEXT: s_mov_b32 s15, s19
50-
; CHECK-NEXT: s_add_u32 s8, s8, s16
51-
; CHECK-NEXT: s_addc_u32 s15, s9, s15
37+
; CHECK-NEXT: s_mov_b64 s[16:17], 8
38+
; CHECK-NEXT: s_mov_b32 s8, s6
39+
; CHECK-NEXT: s_mov_b32 s6, s7
40+
; CHECK-NEXT: s_mov_b32 s9, s16
41+
; CHECK-NEXT: s_mov_b32 s7, s17
42+
; CHECK-NEXT: s_add_u32 s8, s8, s9
43+
; CHECK-NEXT: s_addc_u32 s6, s6, s7
5244
; CHECK-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9
53-
; CHECK-NEXT: s_mov_b32 s9, s15
45+
; CHECK-NEXT: s_mov_b32 s9, s6
5446
; CHECK-NEXT: v_mov_b32_e32 v0, 0x2000
55-
; CHECK-NEXT: ; implicit-def: $sgpr15
56-
; CHECK-NEXT: s_getpc_b64 s[16:17]
57-
; CHECK-NEXT: s_add_u32 s16, s16, device_func@gotpcrel32@lo+4
58-
; CHECK-NEXT: s_addc_u32 s17, s17, device_func@gotpcrel32@hi+12
59-
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
47+
; CHECK-NEXT: ; implicit-def: $sgpr6
48+
; CHECK-NEXT: s_getpc_b64 s[6:7]
49+
; CHECK-NEXT: s_add_u32 s6, s6, device_func@gotpcrel32@lo+4
50+
; CHECK-NEXT: s_addc_u32 s7, s7, device_func@gotpcrel32@hi+12
51+
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[6:7], 0x0
6052
; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
6153
; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
62-
; CHECK-NEXT: s_mov_b32 s15, 20
63-
; CHECK-NEXT: v_lshlrev_b32_e64 v3, s15, v3
64-
; CHECK-NEXT: s_mov_b32 s15, 10
65-
; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2
54+
; CHECK-NEXT: s_mov_b32 s6, 20
55+
; CHECK-NEXT: v_lshlrev_b32_e64 v3, s6, v3
56+
; CHECK-NEXT: s_mov_b32 s6, 10
57+
; CHECK-NEXT: v_lshlrev_b32_e64 v2, s6, v2
6658
; CHECK-NEXT: v_or3_b32 v31, v1, v2, v3
59+
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
6760
; CHECK-NEXT: ; implicit-def: $sgpr15
6861
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
6962
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
@@ -76,7 +69,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
7669
; CHECK-NEXT: s_add_i32 s4, s33, 0x100100
7770
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload
7871
; CHECK-NEXT: s_waitcnt vmcnt(1)
79-
; CHECK-NEXT: v_readlane_b32 s4, v0, 1
72+
; CHECK-NEXT: v_readlane_b32 s4, v0, 0
8073
; CHECK-NEXT: s_mov_b32 s5, 0
8174
; CHECK-NEXT: s_cmp_eq_u32 s4, s5
8275
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4000
@@ -120,3 +113,6 @@ end:
120113
declare void @device_func(ptr addrspace(5))
121114

122115
attributes #0 = { nounwind "frame-pointer"="all" }
116+
117+
!llvm.module.flags = !{!0}
118+
!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}

0 commit comments

Comments
 (0)