Skip to content

Commit d075deb

Browse files
authored
[AMDGPU] Fix chain handling when lowering barrier intrinsics (#109799)
Previously we would fail an assertion in RemoveNodeFromCSEMaps after lowering: t3: ch = llvm.amdgcn.s.barrier.join t0, TargetConstant:i64<2973>, Constant:i32<0> to: t6: ch = S_BARRIER_JOIN_IMM TargetConstant:i32<0>
1 parent f12d72d commit d075deb

File tree

2 files changed

+117
-109
lines changed

2 files changed

+117
-109
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9365,6 +9365,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
93659365
Opc = AMDGPU::S_GET_BARRIER_STATE_IMM;
93669366
SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
93679367
Ops.push_back(K);
9368+
Ops.push_back(Chain);
93689369
} else {
93699370
Opc = AMDGPU::S_GET_BARRIER_STATE_M0;
93709371
SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(2));
@@ -9967,7 +9968,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
99679968
0);
99689969
}
99699970
Ops.push_back(copyToM0(DAG, Chain, DL, M0Val).getValue(0));
9970-
} else if (!IsInlinableBarID) {
9971+
} else if (IsInlinableBarID) {
9972+
Ops.push_back(Chain);
9973+
} else {
99719974
Ops.push_back(copyToM0(DAG, Chain, DL, BarOp).getValue(0));
99729975
}
99739976

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll

Lines changed: 113 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -768,17 +768,19 @@ define void @test5_s_barrier_init_m0(i32 %arg1 ,i32 %arg2) {
768768
}
769769

770770
define amdgpu_kernel void @test1_s_barrier_join(ptr addrspace(1) %out) #0 {
771+
;
771772
; GFX12-SDAG-LABEL: test1_s_barrier_join:
772773
; GFX12-SDAG: ; %bb.0: ; %entry
773774
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
774775
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
775-
; GFX12-SDAG-NEXT: s_barrier_join -1
776-
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
776+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
777+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
777778
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
778-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
779779
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
780780
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
781-
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
781+
; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
782+
; GFX12-SDAG-NEXT: s_barrier_join -1
783+
; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
782784
; GFX12-SDAG-NEXT: s_nop 0
783785
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
784786
; GFX12-SDAG-NEXT: s_endpgm
@@ -810,17 +812,19 @@ entry:
810812
}
811813

812814
define amdgpu_kernel void @test2_s_barrier_join(ptr addrspace(1) %out) #0 {
815+
;
813816
; GFX12-SDAG-LABEL: test2_s_barrier_join:
814817
; GFX12-SDAG: ; %bb.0: ; %entry
815818
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
816819
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
817-
; GFX12-SDAG-NEXT: s_barrier_join 1
818-
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
820+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
821+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
819822
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
820-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
821823
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
822824
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
823-
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
825+
; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
826+
; GFX12-SDAG-NEXT: s_barrier_join 1
827+
; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
824828
; GFX12-SDAG-NEXT: s_nop 0
825829
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
826830
; GFX12-SDAG-NEXT: s_endpgm
@@ -852,17 +856,19 @@ entry:
852856
}
853857

854858
define amdgpu_kernel void @test3_s_barrier_join(ptr addrspace(1) %out) #0 {
859+
;
855860
; GFX12-SDAG-LABEL: test3_s_barrier_join:
856861
; GFX12-SDAG: ; %bb.0: ; %entry
857862
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
858863
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
859-
; GFX12-SDAG-NEXT: s_barrier_join 0
860-
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
864+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
865+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
861866
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
862-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
863867
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
864868
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
865-
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
869+
; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
870+
; GFX12-SDAG-NEXT: s_barrier_join 0
871+
; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
866872
; GFX12-SDAG-NEXT: s_nop 0
867873
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
868874
; GFX12-SDAG-NEXT: s_endpgm
@@ -967,6 +973,20 @@ define void @test5_s_barrier_join_m0(i32 %arg) {
967973
ret void
968974
}
969975

976+
define void @test6_s_barrier_join_0() {
977+
; GFX12-LABEL: test6_s_barrier_join_0:
978+
; GFX12: ; %bb.0:
979+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
980+
; GFX12-NEXT: s_wait_expcnt 0x0
981+
; GFX12-NEXT: s_wait_samplecnt 0x0
982+
; GFX12-NEXT: s_wait_bvhcnt 0x0
983+
; GFX12-NEXT: s_wait_kmcnt 0x0
984+
; GFX12-NEXT: s_barrier_join 0
985+
; GFX12-NEXT: s_setpc_b64 s[30:31]
986+
call void @llvm.amdgcn.s.barrier.join(i32 0)
987+
ret void
988+
}
989+
970990
define amdgpu_kernel void @test1_s_barrier_leave(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %out) #0 {
971991
; GFX12-SDAG-LABEL: test1_s_barrier_leave:
972992
; GFX12-SDAG: ; %bb.0: ; %entry
@@ -1026,17 +1046,19 @@ entry:
10261046
}
10271047

10281048
define amdgpu_kernel void @test1_s_wakeup_barrier(ptr addrspace(1) %out) #0 {
1049+
;
10291050
; GFX12-SDAG-LABEL: test1_s_wakeup_barrier:
10301051
; GFX12-SDAG: ; %bb.0: ; %entry
10311052
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
10321053
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1033-
; GFX12-SDAG-NEXT: s_wakeup_barrier -1
1034-
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1054+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1055+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
10351056
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1036-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10371057
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
10381058
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1039-
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1059+
; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1060+
; GFX12-SDAG-NEXT: s_wakeup_barrier -1
1061+
; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
10401062
; GFX12-SDAG-NEXT: s_nop 0
10411063
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
10421064
; GFX12-SDAG-NEXT: s_endpgm
@@ -1068,17 +1090,19 @@ entry:
10681090
}
10691091

10701092
define amdgpu_kernel void @test2_s_wakeup_barrier(ptr addrspace(1) %out) #0 {
1093+
;
10711094
; GFX12-SDAG-LABEL: test2_s_wakeup_barrier:
10721095
; GFX12-SDAG: ; %bb.0: ; %entry
10731096
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
10741097
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1075-
; GFX12-SDAG-NEXT: s_wakeup_barrier 1
1076-
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1098+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1099+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
10771100
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1078-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10791101
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
10801102
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1081-
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1103+
; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1104+
; GFX12-SDAG-NEXT: s_wakeup_barrier 1
1105+
; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
10821106
; GFX12-SDAG-NEXT: s_nop 0
10831107
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
10841108
; GFX12-SDAG-NEXT: s_endpgm
@@ -1110,17 +1134,19 @@ entry:
11101134
}
11111135

11121136
define amdgpu_kernel void @test3_s_wakeup_barrier(ptr addrspace(1) %out) #0 {
1137+
;
11131138
; GFX12-SDAG-LABEL: test3_s_wakeup_barrier:
11141139
; GFX12-SDAG: ; %bb.0: ; %entry
11151140
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
11161141
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1117-
; GFX12-SDAG-NEXT: s_wakeup_barrier 0
1118-
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1142+
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1143+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
11191144
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1120-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
11211145
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
11221146
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1123-
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1147+
; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1148+
; GFX12-SDAG-NEXT: s_wakeup_barrier 0
1149+
; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
11241150
; GFX12-SDAG-NEXT: s_nop 0
11251151
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
11261152
; GFX12-SDAG-NEXT: s_endpgm
@@ -1226,34 +1252,21 @@ define void @test5_s_wakeup_barrier_m0(i32 %arg) {
12261252
}
12271253

12281254
define amdgpu_kernel void @test1_s_get_barrier_state(ptr addrspace(1) %out) #0 {
1229-
; GFX12-SDAG-LABEL: test1_s_get_barrier_state:
1230-
; GFX12-SDAG: ; %bb.0: ; %entry
1231-
; GFX12-SDAG-NEXT: s_get_barrier_state s4, -1
1232-
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1233-
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1234-
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1235-
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1236-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1237-
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1238-
; GFX12-SDAG-NEXT: s_nop 0
1239-
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1240-
; GFX12-SDAG-NEXT: s_endpgm
1241-
;
1242-
; GFX12-GISEL-LABEL: test1_s_get_barrier_state:
1243-
; GFX12-GISEL: ; %bb.0: ; %entry
1244-
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1245-
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1246-
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1247-
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1248-
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1249-
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1250-
; GFX12-GISEL-NEXT: s_get_barrier_state s2, -1
1251-
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1252-
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1253-
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1254-
; GFX12-GISEL-NEXT: s_nop 0
1255-
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1256-
; GFX12-GISEL-NEXT: s_endpgm
1255+
; GFX12-LABEL: test1_s_get_barrier_state:
1256+
; GFX12: ; %bb.0: ; %entry
1257+
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1258+
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1259+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1260+
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1261+
; GFX12-NEXT: s_wait_kmcnt 0x0
1262+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1263+
; GFX12-NEXT: s_get_barrier_state s2, -1
1264+
; GFX12-NEXT: s_wait_kmcnt 0x0
1265+
; GFX12-NEXT: v_mov_b32_e32 v1, s2
1266+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1267+
; GFX12-NEXT: s_nop 0
1268+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1269+
; GFX12-NEXT: s_endpgm
12571270
entry:
12581271
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
12591272
%tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp
@@ -1264,34 +1277,21 @@ entry:
12641277
}
12651278

12661279
define amdgpu_kernel void @test2_s_get_barrier_state(ptr addrspace(1) %out) #0 {
1267-
; GFX12-SDAG-LABEL: test2_s_get_barrier_state:
1268-
; GFX12-SDAG: ; %bb.0: ; %entry
1269-
; GFX12-SDAG-NEXT: s_get_barrier_state s4, 1
1270-
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1271-
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1272-
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1273-
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1274-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1275-
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1276-
; GFX12-SDAG-NEXT: s_nop 0
1277-
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1278-
; GFX12-SDAG-NEXT: s_endpgm
1279-
;
1280-
; GFX12-GISEL-LABEL: test2_s_get_barrier_state:
1281-
; GFX12-GISEL: ; %bb.0: ; %entry
1282-
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1283-
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1284-
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1285-
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1286-
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1287-
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1288-
; GFX12-GISEL-NEXT: s_get_barrier_state s2, 1
1289-
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1290-
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1291-
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1292-
; GFX12-GISEL-NEXT: s_nop 0
1293-
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1294-
; GFX12-GISEL-NEXT: s_endpgm
1280+
; GFX12-LABEL: test2_s_get_barrier_state:
1281+
; GFX12: ; %bb.0: ; %entry
1282+
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1283+
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1284+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1285+
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1286+
; GFX12-NEXT: s_wait_kmcnt 0x0
1287+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1288+
; GFX12-NEXT: s_get_barrier_state s2, 1
1289+
; GFX12-NEXT: s_wait_kmcnt 0x0
1290+
; GFX12-NEXT: v_mov_b32_e32 v1, s2
1291+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1292+
; GFX12-NEXT: s_nop 0
1293+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1294+
; GFX12-NEXT: s_endpgm
12951295
entry:
12961296
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
12971297
%tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp
@@ -1302,34 +1302,21 @@ entry:
13021302
}
13031303

13041304
define amdgpu_kernel void @test3_s_get_barrier_state(ptr addrspace(1) %out) #0 {
1305-
; GFX12-SDAG-LABEL: test3_s_get_barrier_state:
1306-
; GFX12-SDAG: ; %bb.0: ; %entry
1307-
; GFX12-SDAG-NEXT: s_get_barrier_state s4, 0
1308-
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1309-
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1310-
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1311-
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1312-
; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1313-
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1314-
; GFX12-SDAG-NEXT: s_nop 0
1315-
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1316-
; GFX12-SDAG-NEXT: s_endpgm
1317-
;
1318-
; GFX12-GISEL-LABEL: test3_s_get_barrier_state:
1319-
; GFX12-GISEL: ; %bb.0: ; %entry
1320-
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1321-
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1322-
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1323-
; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1324-
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1325-
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1326-
; GFX12-GISEL-NEXT: s_get_barrier_state s2, 0
1327-
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1328-
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1329-
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1330-
; GFX12-GISEL-NEXT: s_nop 0
1331-
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1332-
; GFX12-GISEL-NEXT: s_endpgm
1305+
; GFX12-LABEL: test3_s_get_barrier_state:
1306+
; GFX12: ; %bb.0: ; %entry
1307+
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1308+
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1309+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1310+
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1311+
; GFX12-NEXT: s_wait_kmcnt 0x0
1312+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1313+
; GFX12-NEXT: s_get_barrier_state s2, 0
1314+
; GFX12-NEXT: s_wait_kmcnt 0x0
1315+
; GFX12-NEXT: v_mov_b32_e32 v1, s2
1316+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1317+
; GFX12-NEXT: s_nop 0
1318+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1319+
; GFX12-NEXT: s_endpgm
13331320
entry:
13341321
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
13351322
%tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp
@@ -1401,6 +1388,24 @@ define i32 @test5_s_get_barrier_state_m0(i32 %arg) {
14011388
ret i32 %state
14021389
}
14031390

1391+
define i32 @test6_s_get_barrier_state_0() {
1392+
; GFX12-LABEL: test6_s_get_barrier_state_0:
1393+
; GFX12: ; %bb.0:
1394+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1395+
; GFX12-NEXT: s_wait_expcnt 0x0
1396+
; GFX12-NEXT: s_wait_samplecnt 0x0
1397+
; GFX12-NEXT: s_wait_bvhcnt 0x0
1398+
; GFX12-NEXT: s_wait_kmcnt 0x0
1399+
; GFX12-NEXT: s_get_barrier_state s0, 0
1400+
; GFX12-NEXT: s_wait_kmcnt 0x0
1401+
; GFX12-NEXT: s_wait_alu 0xfffe
1402+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1403+
; GFX12-NEXT: v_mov_b32_e32 v0, s0
1404+
; GFX12-NEXT: s_setpc_b64 s[30:31]
1405+
%state = call i32 @llvm.amdgcn.s.get.barrier.state(i32 0)
1406+
ret i32 %state
1407+
}
1408+
14041409
define amdgpu_kernel void @test_barrier_convert(ptr addrspace(1) %out) #0 {
14051410
; GFX12-SDAG-LABEL: test_barrier_convert:
14061411
; GFX12-SDAG: ; %bb.0: ; %entry

0 commit comments

Comments
 (0)