Skip to content

Commit 856d1c4

Browse files
authored
[AMDGPU] Fix predicates for BUFFER_ATOMIC_FMIN/FMAX patterns (#89066)
Use OtherPredicates to avoid interfering with other uses of SubtargetPredicate for GFX12.
1 parent 41b7341 commit 856d1c4

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1726,7 +1726,7 @@ let SubtargetPredicate = isGFX12Plus in {
17261726
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
17271727
}
17281728

1729-
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
1729+
let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
17301730
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
17311731
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
17321732
}

llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
55
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
66
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100
7+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12
78

89
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
910
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7
1011
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
1112
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
1213
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
14+
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12
1315

1416
declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg)
1517
declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg)
@@ -70,6 +72,18 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc
7072
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
7173
; GFX1100-NEXT: s_endpgm
7274
;
75+
; GFX12-LABEL: raw_buffer_atomic_min_noret_f32:
76+
; GFX12: ; %bb.0: ; %main_body
77+
; GFX12-NEXT: s_clause 0x1
78+
; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34
79+
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
80+
; GFX12-NEXT: s_wait_kmcnt 0x0
81+
; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
82+
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen
83+
; GFX12-NEXT: s_nop 0
84+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
85+
; GFX12-NEXT: s_endpgm
86+
;
7387
; G_SI-LABEL: raw_buffer_atomic_min_noret_f32:
7488
; G_SI: ; %bb.0: ; %main_body
7589
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -170,6 +184,15 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, floa
170184
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
171185
; GFX1100-NEXT: s_endpgm
172186
;
187+
; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32:
188+
; GFX12: ; %bb.0: ; %main_body
189+
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
190+
; GFX12-NEXT: s_wait_loadcnt 0x0
191+
; GFX12-NEXT: global_store_b32 v[0:1], v0, off
192+
; GFX12-NEXT: s_nop 0
193+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
194+
; GFX12-NEXT: s_endpgm
195+
;
173196
; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32:
174197
; G_SI: ; %bb.0: ; %main_body
175198
; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
@@ -292,6 +315,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre
292315
; GFX1100-NEXT: ds_store_b32 v1, v0
293316
; GFX1100-NEXT: s_endpgm
294317
;
318+
; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
319+
; GFX12: ; %bb.0: ; %main_body
320+
; GFX12-NEXT: s_clause 0x1
321+
; GFX12-NEXT: s_load_b96 s[4:6], s[0:1], 0x34
322+
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
323+
; GFX12-NEXT: s_wait_kmcnt 0x0
324+
; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
325+
; GFX12-NEXT: s_mov_b32 s4, 4
326+
; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN
327+
; GFX12-NEXT: v_mov_b32_e32 v1, s6
328+
; GFX12-NEXT: s_wait_loadcnt 0x0
329+
; GFX12-NEXT: ds_store_b32 v1, v0
330+
; GFX12-NEXT: s_endpgm
331+
;
295332
; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
296333
; G_SI: ; %bb.0: ; %main_body
297334
; G_SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
@@ -427,6 +464,18 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc
427464
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
428465
; GFX1100-NEXT: s_endpgm
429466
;
467+
; GFX12-LABEL: raw_buffer_atomic_max_noret_f32:
468+
; GFX12: ; %bb.0: ; %main_body
469+
; GFX12-NEXT: s_clause 0x1
470+
; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34
471+
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
472+
; GFX12-NEXT: s_wait_kmcnt 0x0
473+
; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
474+
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
475+
; GFX12-NEXT: s_nop 0
476+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
477+
; GFX12-NEXT: s_endpgm
478+
;
430479
; G_SI-LABEL: raw_buffer_atomic_max_noret_f32:
431480
; G_SI: ; %bb.0: ; %main_body
432481
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -527,6 +576,15 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, floa
527576
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
528577
; GFX1100-NEXT: s_endpgm
529578
;
579+
; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32:
580+
; GFX12: ; %bb.0: ; %main_body
581+
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
582+
; GFX12-NEXT: s_wait_loadcnt 0x0
583+
; GFX12-NEXT: global_store_b32 v[0:1], v0, off
584+
; GFX12-NEXT: s_nop 0
585+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
586+
; GFX12-NEXT: s_endpgm
587+
;
530588
; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32:
531589
; G_SI: ; %bb.0: ; %main_body
532590
; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
@@ -641,6 +699,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inre
641699
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
642700
; GFX1100-NEXT: s_endpgm
643701
;
702+
; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
703+
; GFX12: ; %bb.0: ; %main_body
704+
; GFX12-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
705+
; GFX12-NEXT: s_wait_kmcnt 0x0
706+
; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
707+
; GFX12-NEXT: s_mov_b32 s4, 4
708+
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN
709+
; GFX12-NEXT: v_mov_b32_e32 v1, 0
710+
; GFX12-NEXT: s_wait_loadcnt 0x0
711+
; GFX12-NEXT: global_store_b32 v1, v0, s[6:7]
712+
; GFX12-NEXT: s_nop 0
713+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
714+
; GFX12-NEXT: s_endpgm
715+
;
644716
; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
645717
; G_SI: ; %bb.0: ; %main_body
646718
; G_SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9

0 commit comments

Comments
 (0)