Skip to content

Commit a3c8eb9

Browse files
committed
Undo changes in AMDGPUSubtarget.cpp to reduce impact on test files.
Those code changes will be in a follow-up PR.
1 parent 6798dc4 commit a3c8eb9

File tree

97 files changed

+644
-12190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+644
-12190
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll

Lines changed: 28 additions & 340 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll

Lines changed: 30 additions & 360 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll

Lines changed: 30 additions & 39 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
1212
; GFX8V4: ; %bb.0:
1313
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1414
; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x40
15-
; GFX8V4-NEXT: s_add_i32 s12, s12, s17
16-
; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
17-
; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
15+
; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
1816
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
1917
; GFX8V4-NEXT: s_mov_b32 s4, s0
2018
; GFX8V4-NEXT: s_mov_b32 s5, s3
@@ -25,7 +23,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
2523
; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
2624
; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
2725
; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
28-
; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
2926
; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
3027
; GFX8V4-NEXT: flat_store_dword v[0:1], v2
3128
; GFX8V4-NEXT: s_waitcnt vmcnt(0)
@@ -40,9 +37,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
4037
; GFX8V5: ; %bb.0:
4138
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4239
; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0xc8
43-
; GFX8V5-NEXT: s_add_i32 s10, s10, s15
44-
; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
45-
; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
40+
; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
4641
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
4742
; GFX8V5-NEXT: s_mov_b32 s4, s0
4843
; GFX8V5-NEXT: s_mov_b32 s5, s2
@@ -52,7 +47,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
5247
; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
5348
; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
5449
; GFX8V5-NEXT: s_cselect_b64 s[0:1], s[2:3], 0
55-
; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
5650
; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
5751
; GFX8V5-NEXT: flat_store_dword v[0:1], v2
5852
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
@@ -66,10 +60,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
6660
; GFX9V4-LABEL: addrspacecast:
6761
; GFX9V4: ; %bb.0:
6862
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
69-
; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s12, s17
70-
; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
7163
; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
7264
; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
65+
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
7366
; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
7467
; GFX9V4-NEXT: s_mov_b32 s2, s0
7568
; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
@@ -78,7 +71,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
7871
; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
7972
; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
8073
; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
81-
; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
8274
; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
8375
; GFX9V4-NEXT: flat_store_dword v[0:1], v2
8476
; GFX9V4-NEXT: s_waitcnt vmcnt(0)
@@ -92,10 +84,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
9284
; GFX9V5-LABEL: addrspacecast:
9385
; GFX9V5: ; %bb.0:
9486
; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
95-
; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s10, s15
96-
; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
9787
; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
9888
; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
89+
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
9990
; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
10091
; GFX9V5-NEXT: s_mov_b32 s2, s0
10192
; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
@@ -104,7 +95,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
10495
; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
10596
; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
10697
; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
107-
; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
10898
; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
10999
; GFX9V5-NEXT: flat_store_dword v[0:1], v2
110100
; GFX9V5-NEXT: s_waitcnt vmcnt(0)
@@ -127,9 +117,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
127117
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
128118
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
129119
; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x40
130-
; GFX8V4-NEXT: s_add_i32 s12, s12, s17
131-
; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
132-
; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
133120
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
134121
; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
135122
; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0
@@ -143,9 +130,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
143130
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
144131
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
145132
; GFX8V5-NEXT: s_load_dword s0, s[6:7], 0xcc
146-
; GFX8V5-NEXT: s_add_i32 s10, s10, s15
147-
; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
148-
; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
149133
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
150134
; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
151135
; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0
@@ -189,9 +173,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
189173
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
190174
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
191175
; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x44
192-
; GFX8V4-NEXT: s_add_i32 s12, s12, s17
193-
; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
194-
; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
195176
; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
196177
; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
197178
; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0
@@ -205,9 +186,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
205186
; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
206187
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
207188
; GFX8V5-NEXT: s_load_dword s0, s[6:7], 0xc8
208-
; GFX8V5-NEXT: s_add_i32 s10, s10, s15
209-
; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
210-
; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
211189
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
212190
; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
213191
; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0
@@ -291,10 +269,7 @@ define amdgpu_kernel void @llvm_debugtrap() {
291269
define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
292270
; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
293271
; GFX8V4: ; %bb.0:
294-
; GFX8V4-NEXT: s_add_i32 s12, s12, s17
295272
; GFX8V4-NEXT: v_mov_b32_e32 v0, s6
296-
; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
297-
; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
298273
; GFX8V4-NEXT: v_mov_b32_e32 v1, s7
299274
; GFX8V4-NEXT: s_add_u32 s0, s8, 8
300275
; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
@@ -320,9 +295,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
320295
;
321296
; GFX8V5-LABEL: llvm_amdgcn_queue_ptr:
322297
; GFX8V5: ; %bb.0:
323-
; GFX8V5-NEXT: s_add_i32 s10, s10, s15
324-
; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
325-
; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
326298
; GFX8V5-NEXT: s_add_u32 s0, s6, 8
327299
; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
328300
; GFX8V5-NEXT: s_addc_u32 s1, s7, 0

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr
99
; GCN: ; %bb.0:
1010
; GCN-NEXT: s_load_dwordx4 s[20:23], s[6:7], 0x0
1111
; GCN-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x10
12-
; GCN-NEXT: s_add_u32 s0, s0, s15
12+
; GCN-NEXT: s_add_u32 s0, s0, s13
1313
; GCN-NEXT: s_addc_u32 s1, s1, 0
1414
; GCN-NEXT: v_mov_b32_e32 v64, 0
1515
; GCN-NEXT: s_waitcnt lgkmcnt(0)

llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,13 @@ define amdgpu_kernel void @use_lds_globals(ptr addrspace(1) %out, ptr addrspace(
1111
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
1212
; CHECK-NEXT: v_mov_b32_e32 v0, 4
1313
; CHECK-NEXT: s_mov_b32 m0, -1
14-
; CHECK-NEXT: s_add_i32 s10, s10, s15
1514
; CHECK-NEXT: ds_read_b32 v2, v0
16-
; CHECK-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
15+
; CHECK-NEXT: v_mov_b32_e32 v3, 9
1716
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
1817
; CHECK-NEXT: s_add_u32 s0, s0, 4
1918
; CHECK-NEXT: s_addc_u32 s1, s1, 0
2019
; CHECK-NEXT: v_mov_b32_e32 v0, s0
21-
; CHECK-NEXT: s_mov_b32 flat_scratch_lo, s11
2220
; CHECK-NEXT: v_mov_b32_e32 v1, s1
23-
; CHECK-NEXT: v_mov_b32_e32 v3, 9
2421
; CHECK-NEXT: flat_store_dword v[0:1], v2
2522
; CHECK-NEXT: v_mov_b32_e32 v0, 0x200
2623
; CHECK-NEXT: ds_write_b32 v0, v3

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) {
66
; GCN: ; %bb.0: ; %entry
77
; GCN-NEXT: s_load_dword s2, s[6:7], 0x0
88
; GCN-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0xa
9-
; GCN-NEXT: s_add_i32 s10, s10, s15
10-
; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
11-
; GCN-NEXT: s_mov_b32 flat_scratch_lo, s11
129
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1310
; GCN-NEXT: s_cmp_eq_u32 s2, 0
1411
; GCN-NEXT: s_cselect_b32 s2, 1, 0

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,6 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
4242
; CI: ; %bb.0:
4343
; CI-NEXT: s_load_dword s2, s[6:7], 0x2
4444
; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
45-
; CI-NEXT: s_add_i32 s10, s10, s15
46-
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
47-
; CI-NEXT: s_mov_b32 flat_scratch_lo, s11
4845
; CI-NEXT: s_waitcnt lgkmcnt(0)
4946
; CI-NEXT: v_mov_b32_e32 v0, s2
5047
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
@@ -62,9 +59,6 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
6259
; VI: ; %bb.0:
6360
; VI-NEXT: s_load_dword s2, s[6:7], 0x8
6461
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
65-
; VI-NEXT: s_add_i32 s10, s10, s15
66-
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
67-
; VI-NEXT: s_mov_b32 flat_scratch_lo, s11
6862
; VI-NEXT: s_waitcnt lgkmcnt(0)
6963
; VI-NEXT: v_mov_b32_e32 v0, s2
7064
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
@@ -82,8 +76,6 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
8276
; GFX9: ; %bb.0:
8377
; GFX9-NEXT: s_load_dword s2, s[6:7], 0x8
8478
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
85-
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s10, s15
86-
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
8779
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
8880
; GFX9-NEXT: v_mov_b32_e32 v0, s2
8981
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
@@ -93,10 +85,6 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
9385
;
9486
; GFX10-LABEL: s_trig_preop_f64:
9587
; GFX10: ; %bb.0:
96-
; GFX10-NEXT: s_add_u32 s10, s10, s15
97-
; GFX10-NEXT: s_addc_u32 s11, s11, 0
98-
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
99-
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
10088
; GFX10-NEXT: s_clause 0x1
10189
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
10290
; GFX10-NEXT: s_load_dword s2, s[6:7], 0x8
@@ -125,9 +113,6 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
125113
; CI-LABEL: s_trig_preop_f64_imm:
126114
; CI: ; %bb.0:
127115
; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
128-
; CI-NEXT: s_add_i32 s10, s10, s15
129-
; CI-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
130-
; CI-NEXT: s_mov_b32 flat_scratch_lo, s11
131116
; CI-NEXT: s_waitcnt lgkmcnt(0)
132117
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
133118
; CI-NEXT: s_add_u32 s0, s0, 4
@@ -143,9 +128,6 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
143128
; VI-LABEL: s_trig_preop_f64_imm:
144129
; VI: ; %bb.0:
145130
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
146-
; VI-NEXT: s_add_i32 s10, s10, s15
147-
; VI-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
148-
; VI-NEXT: s_mov_b32 flat_scratch_lo, s11
149131
; VI-NEXT: s_waitcnt lgkmcnt(0)
150132
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
151133
; VI-NEXT: s_add_u32 s0, s0, 4
@@ -161,8 +143,6 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
161143
; GFX9-LABEL: s_trig_preop_f64_imm:
162144
; GFX9: ; %bb.0:
163145
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
164-
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s10, s15
165-
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
166146
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
167147
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
168148
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
@@ -171,10 +151,6 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
171151
;
172152
; GFX10-LABEL: s_trig_preop_f64_imm:
173153
; GFX10: ; %bb.0:
174-
; GFX10-NEXT: s_add_u32 s10, s10, s15
175-
; GFX10-NEXT: s_addc_u32 s11, s11, 0
176-
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
177-
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
178154
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
179155
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
180156
; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7

llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
77
; GFX8-LABEL: sdivrem_i32:
88
; GFX8: ; %bb.0:
99
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x10
10-
; GFX8-NEXT: s_add_i32 s10, s10, s15
11-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
12-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
1310
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1411
; GFX8-NEXT: s_ashr_i32 s8, s5, 31
1512
; GFX8-NEXT: s_add_i32 s0, s5, s8
@@ -148,9 +145,6 @@ define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1)
148145
define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) {
149146
; GFX8-LABEL: sdivrem_i64:
150147
; GFX8: ; %bb.0:
151-
; GFX8-NEXT: s_add_i32 s10, s10, s15
152-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
153-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
154148
; GFX8-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0
155149
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
156150
; GFX8-NEXT: s_ashr_i32 s2, s9, 31
@@ -622,9 +616,6 @@ define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1)
622616
define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) {
623617
; GFX8-LABEL: sdivrem_v2i32:
624618
; GFX8: ; %bb.0:
625-
; GFX8-NEXT: s_add_i32 s10, s10, s15
626-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
627-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
628619
; GFX8-NEXT: s_load_dwordx8 s[4:11], s[6:7], 0x0
629620
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
630621
; GFX8-NEXT: s_ashr_i32 s2, s10, 31
@@ -854,9 +845,6 @@ define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1
854845
define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) {
855846
; GFX8-LABEL: sdivrem_v4i32:
856847
; GFX8: ; %bb.0:
857-
; GFX8-NEXT: s_add_i32 s10, s10, s15
858-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
859-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
860848
; GFX8-NEXT: s_load_dwordx8 s[8:15], s[6:7], 0x10
861849
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
862850
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1283,9 +1271,6 @@ define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1
12831271
define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) {
12841272
; GFX8-LABEL: sdivrem_v2i64:
12851273
; GFX8: ; %bb.0:
1286-
; GFX8-NEXT: s_add_i32 s10, s10, s15
1287-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
1288-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
12891274
; GFX8-NEXT: s_load_dwordx8 s[8:15], s[6:7], 0x0
12901275
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x20
12911276
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -2203,9 +2188,6 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out
22032188
; GFX8-LABEL: sdiv_i8:
22042189
; GFX8: ; %bb.0:
22052190
; GFX8-NEXT: s_load_dword s4, s[6:7], 0x10
2206-
; GFX8-NEXT: s_add_i32 s10, s10, s15
2207-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
2208-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
22092191
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
22102192
; GFX8-NEXT: s_bfe_i32 s0, s4, 0x80008
22112193
; GFX8-NEXT: s_ashr_i32 s5, s0, 31
@@ -2351,9 +2333,6 @@ define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1)
23512333
; GFX8-LABEL: sdivrem_v2i8:
23522334
; GFX8: ; %bb.0:
23532335
; GFX8-NEXT: s_load_dword s2, s[6:7], 0x10
2354-
; GFX8-NEXT: s_add_i32 s10, s10, s15
2355-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
2356-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
23572336
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
23582337
; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80010
23592338
; GFX8-NEXT: s_ashr_i32 s3, s0, 31
@@ -2618,9 +2597,6 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %ou
26182597
; GFX8-LABEL: sdiv_i16:
26192598
; GFX8: ; %bb.0:
26202599
; GFX8-NEXT: s_load_dword s4, s[6:7], 0x10
2621-
; GFX8-NEXT: s_add_i32 s10, s10, s15
2622-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
2623-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
26242600
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
26252601
; GFX8-NEXT: s_bfe_i32 s0, s4, 0x100010
26262602
; GFX8-NEXT: s_ashr_i32 s5, s0, 31
@@ -2766,9 +2742,6 @@ define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1
27662742
; GFX8-LABEL: sdivrem_v2i16:
27672743
; GFX8: ; %bb.0:
27682744
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x10
2769-
; GFX8-NEXT: s_add_i32 s10, s10, s15
2770-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
2771-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
27722745
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
27732746
; GFX8-NEXT: s_sext_i32_i16 s0, s3
27742747
; GFX8-NEXT: s_ashr_i32 s8, s0, 31
@@ -3030,9 +3003,6 @@ define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %
30303003
; GFX8-LABEL: sdivrem_i3:
30313004
; GFX8: ; %bb.0:
30323005
; GFX8-NEXT: s_load_dword s4, s[6:7], 0x10
3033-
; GFX8-NEXT: s_add_i32 s10, s10, s15
3034-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
3035-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
30363006
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
30373007
; GFX8-NEXT: s_bfe_i32 s0, s4, 0x30008
30383008
; GFX8-NEXT: s_ashr_i32 s5, s0, 31
@@ -3184,9 +3154,6 @@ define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1)
31843154
; GFX8-LABEL: sdivrem_i27:
31853155
; GFX8: ; %bb.0:
31863156
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x10
3187-
; GFX8-NEXT: s_add_i32 s10, s10, s15
3188-
; GFX8-NEXT: s_mov_b32 flat_scratch_lo, s11
3189-
; GFX8-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
31903157
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
31913158
; GFX8-NEXT: s_bfe_i32 s0, s5, 0x1b0000
31923159
; GFX8-NEXT: s_ashr_i32 s5, s0, 31

0 commit comments

Comments
 (0)