@@ -12,7 +12,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
12
12
; GFX12-LABEL: load_2dmsaa:
13
13
; GFX12: ; %bb.0: ; %main_body
14
14
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
15
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
15
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
16
16
; GFX12-NEXT: ; return to shader part epilog
17
17
main_body:
18
18
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32 (i32 1 , i32 %s , i32 %t , i32 %fragid , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -50,7 +50,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrsp
50
50
; GFX12-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 ; encoding: [0x0a,0x01,0x10,0xca,0x0b,0x01,0x02,0x02]
51
51
; GFX12-NEXT: v_mov_b32_e32 v4, v12 ; encoding: [0x0c,0x03,0x08,0x7e]
52
52
; GFX12-NEXT: image_msaa_load v[0:4], [v7, v6, v5], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x0e,0x20,0x86,0xe4,0x00,0x01,0x00,0x00,0x07,0x06,0x05,0x00]
53
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
53
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
54
54
; GFX12-NEXT: global_store_b32 v8, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x08,0x00,0x00,0x00]
55
55
; GFX12-NEXT: ; return to shader part epilog
56
56
main_body:
@@ -71,7 +71,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3
71
71
; GFX12-LABEL: load_2darraymsaa:
72
72
; GFX12: ; %bb.0: ; %main_body
73
73
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2, v3], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x07,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
74
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
74
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
75
75
; GFX12-NEXT: ; return to shader part epilog
76
76
main_body:
77
77
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32 (i32 4 , i32 %s , i32 %t , i32 %slice , i32 %fragid , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -110,7 +110,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr ad
110
110
; GFX12-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 ; encoding: [0x0b,0x01,0x10,0xca,0x0c,0x01,0x02,0x02]
111
111
; GFX12-NEXT: v_mov_b32_e32 v4, v13 ; encoding: [0x0d,0x03,0x08,0x7e]
112
112
; GFX12-NEXT: image_msaa_load v[0:4], [v8, v7, v6, v5], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x0f,0x20,0x06,0xe6,0x00,0x00,0x00,0x00,0x08,0x07,0x06,0x05]
113
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
113
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
114
114
; GFX12-NEXT: global_store_b32 v9, v4, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x02,0x09,0x00,0x00,0x00]
115
115
; GFX12-NEXT: ; return to shader part epilog
116
116
main_body:
@@ -131,7 +131,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_glc(<8 x i32> inreg %rsrc, i32 %s, i32
131
131
; GFX12-LABEL: load_2dmsaa_glc:
132
132
; GFX12: ; %bb.0: ; %main_body
133
133
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_NT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x10,0x00,0x00,0x01,0x02,0x00]
134
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
134
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
135
135
; GFX12-NEXT: ; return to shader part epilog
136
136
main_body:
137
137
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32 (i32 1 , i32 %s , i32 %t , i32 %fragid , <8 x i32 > %rsrc , i32 0 , i32 1 )
@@ -148,7 +148,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_slc(<8 x i32> inreg %rsrc, i32 %s, i32
148
148
; GFX12-LABEL: load_2dmsaa_slc:
149
149
; GFX12: ; %bb.0: ; %main_body
150
150
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_HT ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x20,0x00,0x00,0x01,0x02,0x00]
151
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
151
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
152
152
; GFX12-NEXT: ; return to shader part epilog
153
153
main_body:
154
154
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32 (i32 1 , i32 %s , i32 %t , i32 %fragid , <8 x i32 > %rsrc , i32 0 , i32 2 )
@@ -165,7 +165,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_glc_slc(<8 x i32> inreg %rsrc, i32 %s,
165
165
; GFX12-LABEL: load_2dmsaa_glc_slc:
166
166
; GFX12: ; %bb.0: ; %main_body
167
167
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm th:TH_LOAD_LU ; encoding: [0x06,0x20,0x46,0xe4,0x00,0x00,0x30,0x00,0x00,0x01,0x02,0x00]
168
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
168
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
169
169
; GFX12-NEXT: ; return to shader part epilog
170
170
main_body:
171
171
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32 (i32 1 , i32 %s , i32 %t , i32 %fragid , <8 x i32 > %rsrc , i32 0 , i32 3 )
@@ -182,7 +182,7 @@ define amdgpu_ps <4 x half> @load_2dmsaa_d16(<8 x i32> inreg %rsrc, i32 %s, i32
182
182
; GFX12-LABEL: load_2dmsaa_d16:
183
183
; GFX12: ; %bb.0: ; %main_body
184
184
; GFX12-NEXT: image_msaa_load v[0:1], [v0, v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm d16 ; encoding: [0x26,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x00]
185
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
185
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
186
186
; GFX12-NEXT: ; return to shader part epilog
187
187
main_body:
188
188
%v = call <4 x half > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32 (i32 1 , i32 %s , i32 %t , i32 %fragid , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -215,7 +215,7 @@ define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addr
215
215
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
216
216
; GFX12-NEXT: v_mov_b32_e32 v2, v8 ; encoding: [0x08,0x03,0x04,0x7e]
217
217
; GFX12-NEXT: image_msaa_load v[0:2], [v5, v4, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; encoding: [0x2e,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x05,0x04,0x03,0x00]
218
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
218
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
219
219
; GFX12-NEXT: global_store_b32 v6, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x06,0x00,0x00,0x00]
220
220
; GFX12-NEXT: ; return to shader part epilog
221
221
main_body:
@@ -236,7 +236,7 @@ define amdgpu_ps <4 x half> @load_2darraymsaa_d16(<8 x i32> inreg %rsrc, i32 %s,
236
236
; GFX12-LABEL: load_2darraymsaa_d16:
237
237
; GFX12: ; %bb.0: ; %main_body
238
238
; GFX12-NEXT: image_msaa_load v[0:1], [v0, v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm d16 ; encoding: [0x27,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03]
239
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
239
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
240
240
; GFX12-NEXT: ; return to shader part epilog
241
241
main_body:
242
242
%v = call <4 x half > @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32 (i32 1 , i32 %s , i32 %t , i32 %slice , i32 %fragid , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -269,7 +269,7 @@ define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr
269
269
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) ; encoding: [0x02,0x00,0x87,0xbf]
270
270
; GFX12-NEXT: v_mov_b32_e32 v2, v9 ; encoding: [0x09,0x03,0x04,0x7e]
271
271
; GFX12-NEXT: image_msaa_load v[0:2], [v6, v5, v4, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; encoding: [0x2f,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x06,0x05,0x04,0x03]
272
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
272
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
273
273
; GFX12-NEXT: global_store_b32 v7, v2, s[8:9] ; encoding: [0x08,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x07,0x00,0x00,0x00]
274
274
; GFX12-NEXT: ; return to shader part epilog
275
275
main_body:
@@ -292,7 +292,7 @@ define amdgpu_ps <4 x float> @load_2dmsaa_a16(<8 x i32> inreg %rsrc, i16 %s, i16
292
292
; GFX12: ; %bb.0: ; %main_body
293
293
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
294
294
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x46,0x20,0x46,0xe4,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
295
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
295
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
296
296
; GFX12-NEXT: ; return to shader part epilog
297
297
main_body:
298
298
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16 (i32 1 , i16 %s , i16 %t , i16 %fragid , <8 x i32 > %rsrc , i32 0 , i32 0 )
@@ -313,7 +313,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_a16(<8 x i32> inreg %rsrc, i16 %s
313
313
; GFX12-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05]
314
314
; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05]
315
315
; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x47,0x20,0x06,0xe5,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00]
316
- ; GFX12-NEXT: s_wait_loadcnt 0x0 ; encoding: [0x00,0x00,0xc0 ,0xbf]
316
+ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2 ,0xbf]
317
317
; GFX12-NEXT: ; return to shader part epilog
318
318
main_body:
319
319
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16 (i32 4 , i16 %s , i16 %t , i16 %slice , i16 %fragid , <8 x i32 > %rsrc , i32 0 , i32 0 )
0 commit comments