Skip to content

Commit a33a84e

Browse files
authored
[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test (#125711)
[AMDGPU][NFC] Replace gfx940 and gfx941 with gfx942 in llvm/test gfx940 and gfx941 are no longer supported. This is one of a series of PRs to remove them from the code base. This PR uses gfx942 instead of gfx940 and gfx941 in the test RUN-lines (unless there is already a RUN-line for gfx942). The only notable difference in the test output is that gfx942 does not force the use of sc0 and sc1 on stores while gfx940 and gfx941 do (cf. https://reviews.llvm.org/D149986). For SWDEV-512631
1 parent c4ed95c commit a33a84e

File tree

277 files changed

+302728
-302980
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

277 files changed

+302728
-302980
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll

Lines changed: 224 additions & 224 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll

Lines changed: 224 additions & 224 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll

Lines changed: 116 additions & 116 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll

Lines changed: 124 additions & 124 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll

Lines changed: 310 additions & 310 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll

Lines changed: 116 additions & 116 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll

Lines changed: 124 additions & 124 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
2+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX942 %s
33
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
44
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
55

66
define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
7-
; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
8-
; GFX940: bb.1 (%ir-block.0):
9-
; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
10-
; GFX940-NEXT: {{ $}}
11-
; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12-
; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13-
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
14-
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
15-
; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
16-
; GFX940-NEXT: S_ENDPGM 0
7+
; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
8+
; GFX942: bb.1 (%ir-block.0):
9+
; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
10+
; GFX942-NEXT: {{ $}}
11+
; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12+
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13+
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
14+
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
15+
; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
16+
; GFX942-NEXT: S_ENDPGM 0
1717
;
1818
; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
1919
; GFX11: bb.1 (%ir-block.0):
@@ -30,17 +30,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
3030
}
3131

3232
define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) {
33-
; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
34-
; GFX940: bb.1 (%ir-block.0):
35-
; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
36-
; GFX940-NEXT: {{ $}}
37-
; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38-
; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
39-
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
40-
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41-
; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
42-
; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
43-
; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
33+
; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
34+
; GFX942: bb.1 (%ir-block.0):
35+
; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
36+
; GFX942-NEXT: {{ $}}
37+
; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
38+
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
39+
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
40+
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41+
; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
42+
; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
43+
; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
4444
;
4545
; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
4646
; GFX11: bb.1 (%ir-block.0):
@@ -58,16 +58,16 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
5858
}
5959

6060
define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) {
61-
; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
62-
; GFX940: bb.1 (%ir-block.0):
63-
; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
64-
; GFX940-NEXT: {{ $}}
65-
; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
66-
; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
67-
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
68-
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
69-
; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
70-
; GFX940-NEXT: S_ENDPGM 0
61+
; GFX942-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
62+
; GFX942: bb.1 (%ir-block.0):
63+
; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
64+
; GFX942-NEXT: {{ $}}
65+
; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
66+
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
67+
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
68+
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
69+
; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
70+
; GFX942-NEXT: S_ENDPGM 0
7171
;
7272
; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
7373
; GFX11: bb.1 (%ir-block.0):
@@ -84,17 +84,17 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %da
8484
}
8585

8686
define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) {
87-
; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
88-
; GFX940: bb.1 (%ir-block.0):
89-
; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
90-
; GFX940-NEXT: {{ $}}
91-
; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
92-
; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
93-
; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
94-
; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
95-
; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
96-
; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
97-
; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
87+
; GFX942-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
88+
; GFX942: bb.1 (%ir-block.0):
89+
; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
90+
; GFX942-NEXT: {{ $}}
91+
; GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
92+
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
93+
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
94+
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
95+
; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
96+
; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
97+
; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
9898
;
9999
; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
100100
; GFX11: bb.1 (%ir-block.0):

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,43 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
3-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
2+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
3+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
44

55
define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) {
6-
; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
7-
; GFX90A_GFX940: bb.1 (%ir-block.0):
8-
; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
9-
; GFX90A_GFX940-NEXT: {{ $}}
10-
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
11-
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
12-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
13-
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
14-
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
15-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
16-
; GFX90A_GFX940-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
17-
; GFX90A_GFX940-NEXT: S_ENDPGM 0
6+
; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
7+
; GFX90A_GFX942: bb.1 (%ir-block.0):
8+
; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
9+
; GFX90A_GFX942-NEXT: {{ $}}
10+
; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
11+
; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
12+
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
13+
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
14+
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
15+
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
16+
; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
17+
; GFX90A_GFX942-NEXT: S_ENDPGM 0
1818
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1919
ret void
2020
}
2121

2222
define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) {
23-
; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
24-
; GFX90A_GFX940: bb.1 (%ir-block.0):
25-
; GFX90A_GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
26-
; GFX90A_GFX940-NEXT: {{ $}}
27-
; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
28-
; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
29-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
30-
; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
31-
; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
32-
; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
33-
; GFX90A_GFX940-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
34-
; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
35-
; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
36-
; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
37-
; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
38-
; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
39-
; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
40-
; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
23+
; GFX90A_GFX942-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
24+
; GFX90A_GFX942: bb.1 (%ir-block.0):
25+
; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
26+
; GFX90A_GFX942-NEXT: {{ $}}
27+
; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
28+
; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
29+
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
30+
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
31+
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
32+
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
33+
; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
34+
; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
35+
; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
36+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
37+
; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
38+
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
39+
; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
40+
; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
4141
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
4242
ret double %ret
4343
}

0 commit comments

Comments
 (0)