|
4 | 4 | ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
|
5 | 5 | ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
|
6 | 6 | ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100
|
| 7 | +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12 |
7 | 8 |
|
8 | 9 | ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
|
9 | 10 | ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7
|
10 | 11 | ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
|
11 | 12 | ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
|
12 | 13 | ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
|
| 14 | +; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12 |
13 | 15 |
|
14 | 16 | declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg)
|
15 | 17 | declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg)
|
@@ -70,6 +72,18 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc
|
70 | 72 | ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
71 | 73 | ; GFX1100-NEXT: s_endpgm
|
72 | 74 | ;
|
| 75 | +; GFX12-LABEL: raw_buffer_atomic_min_noret_f32: |
| 76 | +; GFX12: ; %bb.0: ; %main_body |
| 77 | +; GFX12-NEXT: s_clause 0x1 |
| 78 | +; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 |
| 79 | +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 |
| 80 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 81 | +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 |
| 82 | +; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen |
| 83 | +; GFX12-NEXT: s_nop 0 |
| 84 | +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 85 | +; GFX12-NEXT: s_endpgm |
| 86 | +; |
73 | 87 | ; G_SI-LABEL: raw_buffer_atomic_min_noret_f32:
|
74 | 88 | ; G_SI: ; %bb.0: ; %main_body
|
75 | 89 | ; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
|
@@ -170,6 +184,15 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, floa
|
170 | 184 | ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
171 | 185 | ; GFX1100-NEXT: s_endpgm
|
172 | 186 | ;
|
| 187 | +; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32: |
| 188 | +; GFX12: ; %bb.0: ; %main_body |
| 189 | +; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN |
| 190 | +; GFX12-NEXT: s_wait_loadcnt 0x0 |
| 191 | +; GFX12-NEXT: global_store_b32 v[0:1], v0, off |
| 192 | +; GFX12-NEXT: s_nop 0 |
| 193 | +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 194 | +; GFX12-NEXT: s_endpgm |
| 195 | +; |
173 | 196 | ; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32:
|
174 | 197 | ; G_SI: ; %bb.0: ; %main_body
|
175 | 198 | ; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
|
@@ -292,6 +315,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre
|
292 | 315 | ; GFX1100-NEXT: ds_store_b32 v1, v0
|
293 | 316 | ; GFX1100-NEXT: s_endpgm
|
294 | 317 | ;
|
| 318 | +; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: |
| 319 | +; GFX12: ; %bb.0: ; %main_body |
| 320 | +; GFX12-NEXT: s_clause 0x1 |
| 321 | +; GFX12-NEXT: s_load_b96 s[4:6], s[0:1], 0x34 |
| 322 | +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 |
| 323 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 324 | +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 |
| 325 | +; GFX12-NEXT: s_mov_b32 s4, 4 |
| 326 | +; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN |
| 327 | +; GFX12-NEXT: v_mov_b32_e32 v1, s6 |
| 328 | +; GFX12-NEXT: s_wait_loadcnt 0x0 |
| 329 | +; GFX12-NEXT: ds_store_b32 v1, v0 |
| 330 | +; GFX12-NEXT: s_endpgm |
| 331 | +; |
295 | 332 | ; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
|
296 | 333 | ; G_SI: ; %bb.0: ; %main_body
|
297 | 334 | ; G_SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
|
@@ -427,6 +464,18 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc
|
427 | 464 | ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
428 | 465 | ; GFX1100-NEXT: s_endpgm
|
429 | 466 | ;
|
| 467 | +; GFX12-LABEL: raw_buffer_atomic_max_noret_f32: |
| 468 | +; GFX12: ; %bb.0: ; %main_body |
| 469 | +; GFX12-NEXT: s_clause 0x1 |
| 470 | +; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 |
| 471 | +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 |
| 472 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 473 | +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 |
| 474 | +; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen |
| 475 | +; GFX12-NEXT: s_nop 0 |
| 476 | +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 477 | +; GFX12-NEXT: s_endpgm |
| 478 | +; |
430 | 479 | ; G_SI-LABEL: raw_buffer_atomic_max_noret_f32:
|
431 | 480 | ; G_SI: ; %bb.0: ; %main_body
|
432 | 481 | ; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
|
@@ -527,6 +576,15 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, floa
|
527 | 576 | ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
528 | 577 | ; GFX1100-NEXT: s_endpgm
|
529 | 578 | ;
|
| 579 | +; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32: |
| 580 | +; GFX12: ; %bb.0: ; %main_body |
| 581 | +; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN |
| 582 | +; GFX12-NEXT: s_wait_loadcnt 0x0 |
| 583 | +; GFX12-NEXT: global_store_b32 v[0:1], v0, off |
| 584 | +; GFX12-NEXT: s_nop 0 |
| 585 | +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 586 | +; GFX12-NEXT: s_endpgm |
| 587 | +; |
530 | 588 | ; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32:
|
531 | 589 | ; G_SI: ; %bb.0: ; %main_body
|
532 | 590 | ; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
|
@@ -641,6 +699,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inre
|
641 | 699 | ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
642 | 700 | ; GFX1100-NEXT: s_endpgm
|
643 | 701 | ;
|
| 702 | +; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: |
| 703 | +; GFX12: ; %bb.0: ; %main_body |
| 704 | +; GFX12-NEXT: s_load_b256 s[0:7], s[0:1], 0x24 |
| 705 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 706 | +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 |
| 707 | +; GFX12-NEXT: s_mov_b32 s4, 4 |
| 708 | +; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN |
| 709 | +; GFX12-NEXT: v_mov_b32_e32 v1, 0 |
| 710 | +; GFX12-NEXT: s_wait_loadcnt 0x0 |
| 711 | +; GFX12-NEXT: global_store_b32 v1, v0, s[6:7] |
| 712 | +; GFX12-NEXT: s_nop 0 |
| 713 | +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 714 | +; GFX12-NEXT: s_endpgm |
| 715 | +; |
644 | 716 | ; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
|
645 | 717 | ; G_SI: ; %bb.0: ; %main_body
|
646 | 718 | ; G_SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9
|
|
0 commit comments