1
1
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2
- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
2
+ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
3
3
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
4
4
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
5
5
6
6
define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic (ptr %ptr , float %data ) {
7
- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
8
- ; GFX940 : bb.1 (%ir-block.0):
9
- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
10
- ; GFX940 -NEXT: {{ $}}
11
- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12
- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13
- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
14
- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
15
- ; GFX940 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
16
- ; GFX940 -NEXT: S_ENDPGM 0
7
+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
8
+ ; GFX942 : bb.1 (%ir-block.0):
9
+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
10
+ ; GFX942 -NEXT: {{ $}}
11
+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12
+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13
+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
14
+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
15
+ ; GFX942 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
16
+ ; GFX942 -NEXT: S_ENDPGM 0
17
17
;
18
18
; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
19
19
; GFX11: bb.1 (%ir-block.0):
@@ -30,17 +30,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
30
30
}
31
31
32
32
define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic (ptr %ptr , float %data ) {
33
- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
34
- ; GFX940 : bb.1 (%ir-block.0):
35
- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
36
- ; GFX940 -NEXT: {{ $}}
37
- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38
- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
39
- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
40
- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41
- ; GFX940 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
42
- ; GFX940 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
43
- ; GFX940 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
33
+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
34
+ ; GFX942 : bb.1 (%ir-block.0):
35
+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
36
+ ; GFX942 -NEXT: {{ $}}
37
+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38
+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
39
+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
40
+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41
+ ; GFX942 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
42
+ ; GFX942 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
43
+ ; GFX942 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
44
44
;
45
45
; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
46
46
; GFX11: bb.1 (%ir-block.0):
@@ -58,16 +58,16 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
58
58
}
59
59
60
60
define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw (ptr %ptr , float %data ) {
61
- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
62
- ; GFX940 : bb.1 (%ir-block.0):
63
- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
64
- ; GFX940 -NEXT: {{ $}}
65
- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
66
- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
67
- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
68
- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
69
- ; GFX940 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
70
- ; GFX940 -NEXT: S_ENDPGM 0
61
+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
62
+ ; GFX942 : bb.1 (%ir-block.0):
63
+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
64
+ ; GFX942 -NEXT: {{ $}}
65
+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
66
+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
67
+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
68
+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
69
+ ; GFX942 -NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
70
+ ; GFX942 -NEXT: S_ENDPGM 0
71
71
;
72
72
; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
73
73
; GFX11: bb.1 (%ir-block.0):
@@ -84,17 +84,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da
84
84
}
85
85
86
86
define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw (ptr %ptr , float %data ) {
87
- ; GFX940 -LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
88
- ; GFX940 : bb.1 (%ir-block.0):
89
- ; GFX940 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
90
- ; GFX940 -NEXT: {{ $}}
91
- ; GFX940 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
92
- ; GFX940 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
93
- ; GFX940 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
94
- ; GFX940 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
95
- ; GFX940 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
96
- ; GFX940 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
97
- ; GFX940 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
87
+ ; GFX942 -LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
88
+ ; GFX942 : bb.1 (%ir-block.0):
89
+ ; GFX942 -NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
90
+ ; GFX942 -NEXT: {{ $}}
91
+ ; GFX942 -NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
92
+ ; GFX942 -NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
93
+ ; GFX942 -NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
94
+ ; GFX942 -NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
95
+ ; GFX942 -NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
96
+ ; GFX942 -NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
97
+ ; GFX942 -NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
98
98
;
99
99
; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
100
100
; GFX11: bb.1 (%ir-block.0):
0 commit comments