@@ -768,17 +768,19 @@ define void @test5_s_barrier_init_m0(i32 %arg1 ,i32 %arg2) {
768
768
}
769
769
770
770
define amdgpu_kernel void @test1_s_barrier_join (ptr addrspace (1 ) %out ) #0 {
771
+ ;
771
772
; GFX12-SDAG-LABEL: test1_s_barrier_join:
772
773
; GFX12-SDAG: ; %bb.0: ; %entry
773
774
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
774
775
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
775
- ; GFX12-SDAG-NEXT: s_barrier_join -1
776
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
776
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
777
+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
777
778
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
778
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
779
779
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
780
780
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
781
- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
781
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
782
+ ; GFX12-SDAG-NEXT: s_barrier_join -1
783
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
782
784
; GFX12-SDAG-NEXT: s_nop 0
783
785
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
784
786
; GFX12-SDAG-NEXT: s_endpgm
@@ -810,17 +812,19 @@ entry:
810
812
}
811
813
812
814
define amdgpu_kernel void @test2_s_barrier_join (ptr addrspace (1 ) %out ) #0 {
815
+ ;
813
816
; GFX12-SDAG-LABEL: test2_s_barrier_join:
814
817
; GFX12-SDAG: ; %bb.0: ; %entry
815
818
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
816
819
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
817
- ; GFX12-SDAG-NEXT: s_barrier_join 1
818
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
820
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
821
+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
819
822
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
820
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
821
823
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
822
824
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
823
- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
825
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
826
+ ; GFX12-SDAG-NEXT: s_barrier_join 1
827
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
824
828
; GFX12-SDAG-NEXT: s_nop 0
825
829
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
826
830
; GFX12-SDAG-NEXT: s_endpgm
@@ -852,17 +856,19 @@ entry:
852
856
}
853
857
854
858
define amdgpu_kernel void @test3_s_barrier_join (ptr addrspace (1 ) %out ) #0 {
859
+ ;
855
860
; GFX12-SDAG-LABEL: test3_s_barrier_join:
856
861
; GFX12-SDAG: ; %bb.0: ; %entry
857
862
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
858
863
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
859
- ; GFX12-SDAG-NEXT: s_barrier_join 0
860
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
864
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
865
+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
861
866
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
862
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
863
867
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
864
868
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
865
- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
869
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
870
+ ; GFX12-SDAG-NEXT: s_barrier_join 0
871
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
866
872
; GFX12-SDAG-NEXT: s_nop 0
867
873
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
868
874
; GFX12-SDAG-NEXT: s_endpgm
@@ -967,6 +973,20 @@ define void @test5_s_barrier_join_m0(i32 %arg) {
967
973
ret void
968
974
}
969
975
976
+ define void @test6_s_barrier_join_0 () {
977
+ ; GFX12-LABEL: test6_s_barrier_join_0:
978
+ ; GFX12: ; %bb.0:
979
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
980
+ ; GFX12-NEXT: s_wait_expcnt 0x0
981
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
982
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
983
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
984
+ ; GFX12-NEXT: s_barrier_join 0
985
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
986
+ call void @llvm.amdgcn.s.barrier.join (i32 0 )
987
+ ret void
988
+ }
989
+
970
990
define amdgpu_kernel void @test1_s_barrier_leave (ptr addrspace (1 ) %a , ptr addrspace (1 ) %b , ptr addrspace (1 ) %c , ptr addrspace (1 ) %out ) #0 {
971
991
; GFX12-SDAG-LABEL: test1_s_barrier_leave:
972
992
; GFX12-SDAG: ; %bb.0: ; %entry
@@ -1026,17 +1046,19 @@ entry:
1026
1046
}
1027
1047
1028
1048
define amdgpu_kernel void @test1_s_wakeup_barrier (ptr addrspace (1 ) %out ) #0 {
1049
+ ;
1029
1050
; GFX12-SDAG-LABEL: test1_s_wakeup_barrier:
1030
1051
; GFX12-SDAG: ; %bb.0: ; %entry
1031
1052
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1032
1053
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1033
- ; GFX12-SDAG-NEXT: s_wakeup_barrier -1
1034
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1054
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1055
+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
1035
1056
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1036
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1037
1057
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
1038
1058
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1039
- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1059
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1060
+ ; GFX12-SDAG-NEXT: s_wakeup_barrier -1
1061
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
1040
1062
; GFX12-SDAG-NEXT: s_nop 0
1041
1063
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1042
1064
; GFX12-SDAG-NEXT: s_endpgm
@@ -1068,17 +1090,19 @@ entry:
1068
1090
}
1069
1091
1070
1092
define amdgpu_kernel void @test2_s_wakeup_barrier (ptr addrspace (1 ) %out ) #0 {
1093
+ ;
1071
1094
; GFX12-SDAG-LABEL: test2_s_wakeup_barrier:
1072
1095
; GFX12-SDAG: ; %bb.0: ; %entry
1073
1096
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1074
1097
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1075
- ; GFX12-SDAG-NEXT: s_wakeup_barrier 1
1076
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1098
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1099
+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
1077
1100
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1078
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1079
1101
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
1080
1102
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1081
- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1103
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1104
+ ; GFX12-SDAG-NEXT: s_wakeup_barrier 1
1105
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
1082
1106
; GFX12-SDAG-NEXT: s_nop 0
1083
1107
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1084
1108
; GFX12-SDAG-NEXT: s_endpgm
@@ -1110,17 +1134,19 @@ entry:
1110
1134
}
1111
1135
1112
1136
define amdgpu_kernel void @test3_s_wakeup_barrier (ptr addrspace (1 ) %out ) #0 {
1137
+ ;
1113
1138
; GFX12-SDAG-LABEL: test3_s_wakeup_barrier:
1114
1139
; GFX12-SDAG: ; %bb.0: ; %entry
1115
1140
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1116
1141
; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1117
- ; GFX12-SDAG-NEXT: s_wakeup_barrier 0
1118
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1142
+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1143
+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
1119
1144
; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1120
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1121
1145
; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
1122
1146
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1123
- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1147
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1148
+ ; GFX12-SDAG-NEXT: s_wakeup_barrier 0
1149
+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
1124
1150
; GFX12-SDAG-NEXT: s_nop 0
1125
1151
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1126
1152
; GFX12-SDAG-NEXT: s_endpgm
@@ -1226,34 +1252,21 @@ define void @test5_s_wakeup_barrier_m0(i32 %arg) {
1226
1252
}
1227
1253
1228
1254
define amdgpu_kernel void @test1_s_get_barrier_state (ptr addrspace (1 ) %out ) #0 {
1229
- ; GFX12-SDAG-LABEL: test1_s_get_barrier_state:
1230
- ; GFX12-SDAG: ; %bb.0: ; %entry
1231
- ; GFX12-SDAG-NEXT: s_get_barrier_state s4, -1
1232
- ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1233
- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1234
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1235
- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1236
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1237
- ; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1238
- ; GFX12-SDAG-NEXT: s_nop 0
1239
- ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1240
- ; GFX12-SDAG-NEXT: s_endpgm
1241
- ;
1242
- ; GFX12-GISEL-LABEL: test1_s_get_barrier_state:
1243
- ; GFX12-GISEL: ; %bb.0: ; %entry
1244
- ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1245
- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1246
- ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1247
- ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1248
- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1249
- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1250
- ; GFX12-GISEL-NEXT: s_get_barrier_state s2, -1
1251
- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1252
- ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1253
- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1254
- ; GFX12-GISEL-NEXT: s_nop 0
1255
- ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1256
- ; GFX12-GISEL-NEXT: s_endpgm
1255
+ ; GFX12-LABEL: test1_s_get_barrier_state:
1256
+ ; GFX12: ; %bb.0: ; %entry
1257
+ ; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1258
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1259
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1260
+ ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1261
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1262
+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1263
+ ; GFX12-NEXT: s_get_barrier_state s2, -1
1264
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1265
+ ; GFX12-NEXT: v_mov_b32_e32 v1, s2
1266
+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1267
+ ; GFX12-NEXT: s_nop 0
1268
+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1269
+ ; GFX12-NEXT: s_endpgm
1257
1270
entry:
1258
1271
%tmp = call i32 @llvm.amdgcn.workitem.id.x ()
1259
1272
%tmp1 = getelementptr i32 , ptr addrspace (1 ) %out , i32 %tmp
@@ -1264,34 +1277,21 @@ entry:
1264
1277
}
1265
1278
1266
1279
define amdgpu_kernel void @test2_s_get_barrier_state (ptr addrspace (1 ) %out ) #0 {
1267
- ; GFX12-SDAG-LABEL: test2_s_get_barrier_state:
1268
- ; GFX12-SDAG: ; %bb.0: ; %entry
1269
- ; GFX12-SDAG-NEXT: s_get_barrier_state s4, 1
1270
- ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1271
- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1272
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1273
- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1274
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1275
- ; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1276
- ; GFX12-SDAG-NEXT: s_nop 0
1277
- ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1278
- ; GFX12-SDAG-NEXT: s_endpgm
1279
- ;
1280
- ; GFX12-GISEL-LABEL: test2_s_get_barrier_state:
1281
- ; GFX12-GISEL: ; %bb.0: ; %entry
1282
- ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1283
- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1284
- ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1285
- ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1286
- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1287
- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1288
- ; GFX12-GISEL-NEXT: s_get_barrier_state s2, 1
1289
- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1290
- ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1291
- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1292
- ; GFX12-GISEL-NEXT: s_nop 0
1293
- ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1294
- ; GFX12-GISEL-NEXT: s_endpgm
1280
+ ; GFX12-LABEL: test2_s_get_barrier_state:
1281
+ ; GFX12: ; %bb.0: ; %entry
1282
+ ; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1283
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1284
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1285
+ ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1286
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1287
+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1288
+ ; GFX12-NEXT: s_get_barrier_state s2, 1
1289
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1290
+ ; GFX12-NEXT: v_mov_b32_e32 v1, s2
1291
+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1292
+ ; GFX12-NEXT: s_nop 0
1293
+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1294
+ ; GFX12-NEXT: s_endpgm
1295
1295
entry:
1296
1296
%tmp = call i32 @llvm.amdgcn.workitem.id.x ()
1297
1297
%tmp1 = getelementptr i32 , ptr addrspace (1 ) %out , i32 %tmp
@@ -1302,34 +1302,21 @@ entry:
1302
1302
}
1303
1303
1304
1304
define amdgpu_kernel void @test3_s_get_barrier_state (ptr addrspace (1 ) %out ) #0 {
1305
- ; GFX12-SDAG-LABEL: test3_s_get_barrier_state:
1306
- ; GFX12-SDAG: ; %bb.0: ; %entry
1307
- ; GFX12-SDAG-NEXT: s_get_barrier_state s4, 0
1308
- ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1309
- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1310
- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1311
- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1312
- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1313
- ; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1314
- ; GFX12-SDAG-NEXT: s_nop 0
1315
- ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1316
- ; GFX12-SDAG-NEXT: s_endpgm
1317
- ;
1318
- ; GFX12-GISEL-LABEL: test3_s_get_barrier_state:
1319
- ; GFX12-GISEL: ; %bb.0: ; %entry
1320
- ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1321
- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1322
- ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1323
- ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1324
- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1325
- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1326
- ; GFX12-GISEL-NEXT: s_get_barrier_state s2, 0
1327
- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1328
- ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1329
- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1330
- ; GFX12-GISEL-NEXT: s_nop 0
1331
- ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1332
- ; GFX12-GISEL-NEXT: s_endpgm
1305
+ ; GFX12-LABEL: test3_s_get_barrier_state:
1306
+ ; GFX12: ; %bb.0: ; %entry
1307
+ ; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1308
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1309
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1310
+ ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1311
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1312
+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1313
+ ; GFX12-NEXT: s_get_barrier_state s2, 0
1314
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1315
+ ; GFX12-NEXT: v_mov_b32_e32 v1, s2
1316
+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1317
+ ; GFX12-NEXT: s_nop 0
1318
+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1319
+ ; GFX12-NEXT: s_endpgm
1333
1320
entry:
1334
1321
%tmp = call i32 @llvm.amdgcn.workitem.id.x ()
1335
1322
%tmp1 = getelementptr i32 , ptr addrspace (1 ) %out , i32 %tmp
@@ -1401,6 +1388,24 @@ define i32 @test5_s_get_barrier_state_m0(i32 %arg) {
1401
1388
ret i32 %state
1402
1389
}
1403
1390
1391
+ define i32 @test6_s_get_barrier_state_0 () {
1392
+ ; GFX12-LABEL: test6_s_get_barrier_state_0:
1393
+ ; GFX12: ; %bb.0:
1394
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1395
+ ; GFX12-NEXT: s_wait_expcnt 0x0
1396
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1397
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1398
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1399
+ ; GFX12-NEXT: s_get_barrier_state s0, 0
1400
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1401
+ ; GFX12-NEXT: s_wait_alu 0xfffe
1402
+ ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1403
+ ; GFX12-NEXT: v_mov_b32_e32 v0, s0
1404
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1405
+ %state = call i32 @llvm.amdgcn.s.get.barrier.state (i32 0 )
1406
+ ret i32 %state
1407
+ }
1408
+
1404
1409
define amdgpu_kernel void @test_barrier_convert (ptr addrspace (1 ) %out ) #0 {
1405
1410
; GFX12-SDAG-LABEL: test_barrier_convert:
1406
1411
; GFX12-SDAG: ; %bb.0: ; %entry
0 commit comments