@@ -279,33 +279,19 @@ end:
279
279
}
280
280
281
281
define amdgpu_cs void @test_sink_smem_offset_neg400 (ptr addrspace (4 ) inreg %ptr , i32 inreg %val ) {
282
- ; GFX678-LABEL: test_sink_smem_offset_neg400:
283
- ; GFX678: ; %bb.0: ; %entry
284
- ; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70
285
- ; GFX678-NEXT: s_addc_u32 s1, s1, -1
286
- ; GFX678-NEXT: .LBB5_1: ; %loop
287
- ; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1
288
- ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
289
- ; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0
290
- ; GFX678-NEXT: s_add_i32 s2, s2, -1
291
- ; GFX678-NEXT: s_cmp_lg_u32 s2, 0
292
- ; GFX678-NEXT: s_cbranch_scc1 .LBB5_1
293
- ; GFX678-NEXT: ; %bb.2: ; %end
294
- ; GFX678-NEXT: s_endpgm
295
- ;
296
- ; GFX9-LABEL: test_sink_smem_offset_neg400:
297
- ; GFX9: ; %bb.0: ; %entry
298
- ; GFX9-NEXT: .LBB5_1: ; %loop
299
- ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
300
- ; GFX9-NEXT: s_add_i32 s2, s2, -1
301
- ; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70
302
- ; GFX9-NEXT: s_addc_u32 s5, s1, -1
303
- ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
304
- ; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0
305
- ; GFX9-NEXT: s_cmp_lg_u32 s2, 0
306
- ; GFX9-NEXT: s_cbranch_scc1 .LBB5_1
307
- ; GFX9-NEXT: ; %bb.2: ; %end
308
- ; GFX9-NEXT: s_endpgm
282
+ ; GFX6789-LABEL: test_sink_smem_offset_neg400:
283
+ ; GFX6789: ; %bb.0: ; %entry
284
+ ; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70
285
+ ; GFX6789-NEXT: s_addc_u32 s1, s1, -1
286
+ ; GFX6789-NEXT: .LBB5_1: ; %loop
287
+ ; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
288
+ ; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
289
+ ; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
290
+ ; GFX6789-NEXT: s_add_i32 s2, s2, -1
291
+ ; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
292
+ ; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1
293
+ ; GFX6789-NEXT: ; %bb.2: ; %end
294
+ ; GFX6789-NEXT: s_endpgm
309
295
;
310
296
; GFX12-LABEL: test_sink_smem_offset_neg400:
311
297
; GFX12: ; %bb.0: ; %entry
@@ -337,3 +323,52 @@ loop:
337
323
end:
338
324
ret void
339
325
}
326
+
327
+ ; Same for address space 6, constant 32-bit.
328
+ define amdgpu_cs void @test_sink_smem_offset_neg400_32bit (ptr addrspace (6 ) inreg %ptr , i32 inreg %val ) {
329
+ ; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
330
+ ; GFX6789: ; %bb.0: ; %entry
331
+ ; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70
332
+ ; GFX6789-NEXT: s_mov_b32 s3, 0
333
+ ; GFX6789-NEXT: .LBB6_1: ; %loop
334
+ ; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
335
+ ; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
336
+ ; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0
337
+ ; GFX6789-NEXT: s_add_i32 s1, s1, -1
338
+ ; GFX6789-NEXT: s_cmp_lg_u32 s1, 0
339
+ ; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1
340
+ ; GFX6789-NEXT: ; %bb.2: ; %end
341
+ ; GFX6789-NEXT: s_endpgm
342
+ ;
343
+ ; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
344
+ ; GFX12: ; %bb.0: ; %entry
345
+ ; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70
346
+ ; GFX12-NEXT: s_mov_b32 s3, 0
347
+ ; GFX12-NEXT: .LBB6_1: ; %loop
348
+ ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
349
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
350
+ ; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0
351
+ ; GFX12-NEXT: s_add_co_i32 s1, s1, -1
352
+ ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
353
+ ; GFX12-NEXT: s_cmp_lg_u32 s1, 0
354
+ ; GFX12-NEXT: s_cbranch_scc1 .LBB6_1
355
+ ; GFX12-NEXT: ; %bb.2: ; %end
356
+ ; GFX12-NEXT: s_endpgm
357
+ entry:
358
+ %gep = getelementptr i8 , ptr addrspace (6 ) %ptr , i64 -400
359
+ br label %loop
360
+
361
+ loop:
362
+ %count = phi i32 [ %dec , %loop ], [ %val , %entry ]
363
+ %dec = sub i32 %count , 1
364
+ %load = load volatile i32 , ptr addrspace (6 ) %gep
365
+ %cond = icmp eq i32 %dec , 0
366
+ br i1 %cond , label %end , label %loop
367
+
368
+ end:
369
+ ret void
370
+ }
371
+
372
+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
373
+ ; GFX678: {{.*}}
374
+ ; GFX9: {{.*}}
0 commit comments