4
4
define amdgpu_ps void @group_image_sample (i32 inreg noundef %globalTable , i32 inreg noundef %userdata6 , i32 inreg noundef %userdata7 , i32 inreg noundef %userdata8 , i32 inreg noundef %PrimMask , <2 x float > noundef %PerspInterpSample , <2 x float > noundef %PerspInterpCenter , <2 x float > noundef %PerspInterpCentroid ) #2 {
5
5
; GFX11-LABEL: group_image_sample:
6
6
; GFX11: ; %bb.0: ; %.entry
7
- ; GFX11-NEXT: s_mov_b32 s24 , exec_lo
7
+ ; GFX11-NEXT: s_mov_b32 s33 , exec_lo
8
8
; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo
9
9
; GFX11-NEXT: s_mov_b32 m0, s4
10
10
; GFX11-NEXT: s_getpc_b64 s[4:5]
@@ -21,73 +21,79 @@ define amdgpu_ps void @group_image_sample(i32 inreg noundef %globalTable, i32 in
21
21
; GFX11-NEXT: lds_param_load v2, attr0.y wait_vdst:15
22
22
; GFX11-NEXT: lds_param_load v3, attr0.x wait_vdst:15
23
23
; GFX11-NEXT: s_mov_b32 exec_lo, s16
24
+ ; GFX11-NEXT: v_interp_p10_f32 v4, v2, v0, v2 wait_exp:1
25
+ ; GFX11-NEXT: v_interp_p10_f32 v0, v3, v0, v3 wait_exp:0
24
26
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
25
- ; GFX11-NEXT: s_clause 0x3
27
+ ; GFX11-NEXT: s_clause 0xf
26
28
; GFX11-NEXT: s_buffer_load_b64 s[16:17], s[12:15], 0x10
27
29
; GFX11-NEXT: s_buffer_load_b64 s[18:19], s[12:15], 0x20
28
30
; GFX11-NEXT: s_buffer_load_b64 s[20:21], s[12:15], 0x30
29
31
; GFX11-NEXT: s_buffer_load_b64 s[22:23], s[12:15], 0x40
30
- ; GFX11-NEXT: v_interp_p10_f32 v4, v2, v0, v2 wait_exp:1
31
- ; GFX11-NEXT: v_interp_p10_f32 v0, v3, v0, v3 wait_exp:0
32
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
32
+ ; GFX11-NEXT: s_buffer_load_b64 s[24:25], s[12:15], 0x50
33
+ ; GFX11-NEXT: s_buffer_load_b64 s[26:27], s[12:15], 0x60
34
+ ; GFX11-NEXT: s_buffer_load_b64 s[28:29], s[12:15], 0x70
35
+ ; GFX11-NEXT: s_buffer_load_b64 s[30:31], s[12:15], 0x80
36
+ ; GFX11-NEXT: s_buffer_load_b64 s[34:35], s[12:15], 0x90
37
+ ; GFX11-NEXT: s_buffer_load_b64 s[36:37], s[12:15], 0xa0
38
+ ; GFX11-NEXT: s_buffer_load_b64 s[38:39], s[12:15], 0xb0
39
+ ; GFX11-NEXT: s_buffer_load_b64 s[40:41], s[12:15], 0xc0
40
+ ; GFX11-NEXT: s_buffer_load_b64 s[42:43], s[12:15], 0xd0
41
+ ; GFX11-NEXT: s_buffer_load_b64 s[44:45], s[12:15], 0xe0
42
+ ; GFX11-NEXT: s_buffer_load_b64 s[46:47], s[12:15], 0xf0
43
+ ; GFX11-NEXT: s_buffer_load_b64 s[12:13], s[12:15], 0x100
44
+ ; GFX11-NEXT: v_interp_p2_f32 v36, v2, v1, v4 wait_exp:7
33
45
; GFX11-NEXT: v_interp_p2_f32 v0, v3, v1, v0 wait_exp:7
34
- ; GFX11-NEXT: v_interp_p2_f32 v1, v2, v1, v4 wait_exp:7
35
46
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
36
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
37
- ; GFX11-NEXT: v_dual_add_f32 v4, s16, v0 :: v_dual_add_f32 v5, s17, v1
38
- ; GFX11-NEXT: v_dual_add_f32 v12, s20, v0 :: v_dual_add_f32 v13, s21, v1
39
- ; GFX11-NEXT: v_dual_add_f32 v8, s18, v0 :: v_dual_add_f32 v9, s19, v1
40
- ; GFX11-NEXT: v_dual_add_f32 v16, s22, v0 :: v_dual_add_f32 v17, s23, v1
41
- ; GFX11-NEXT: s_clause 0x3
47
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
48
+ ; GFX11-NEXT: v_add_f32_e32 v5, s17, v36
49
+ ; GFX11-NEXT: v_add_f32_e32 v4, s16, v0
50
+ ; GFX11-NEXT: v_add_f32_e32 v8, s18, v0
51
+ ; GFX11-NEXT: v_add_f32_e32 v9, s19, v36
52
+ ; GFX11-NEXT: v_add_f32_e32 v12, s20, v0
53
+ ; GFX11-NEXT: v_add_f32_e32 v13, s21, v36
54
+ ; GFX11-NEXT: v_add_f32_e32 v16, s22, v0
55
+ ; GFX11-NEXT: v_add_f32_e32 v17, s23, v36
56
+ ; GFX11-NEXT: v_add_f32_e32 v20, s24, v0
57
+ ; GFX11-NEXT: v_add_f32_e32 v21, s25, v36
58
+ ; GFX11-NEXT: v_add_f32_e32 v24, s26, v0
59
+ ; GFX11-NEXT: v_add_f32_e32 v25, s27, v36
60
+ ; GFX11-NEXT: v_add_f32_e32 v28, s28, v0
61
+ ; GFX11-NEXT: v_add_f32_e32 v29, s29, v36
62
+ ; GFX11-NEXT: v_add_f32_e32 v32, s30, v0
63
+ ; GFX11-NEXT: v_add_f32_e32 v33, s31, v36
64
+ ; GFX11-NEXT: s_clause 0x7
42
65
; GFX11-NEXT: image_sample v[4:7], v[4:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
43
66
; GFX11-NEXT: image_sample v[8:11], v[8:9], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
44
67
; GFX11-NEXT: image_sample v[12:15], v[12:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
45
68
; GFX11-NEXT: image_sample v[16:19], v[16:17], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
46
- ; GFX11-NEXT: s_clause 0x3
47
- ; GFX11-NEXT: s_buffer_load_b64 s[16:17], s[12:15], 0x50
48
- ; GFX11-NEXT: s_buffer_load_b64 s[18:19], s[12:15], 0x60
49
- ; GFX11-NEXT: s_buffer_load_b64 s[20:21], s[12:15], 0x70
50
- ; GFX11-NEXT: s_buffer_load_b64 s[22:23], s[12:15], 0x80
51
- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
52
- ; GFX11-NEXT: v_dual_add_f32 v20, s16, v0 :: v_dual_add_f32 v21, s17, v1
53
- ; GFX11-NEXT: v_dual_add_f32 v28, s20, v0 :: v_dual_add_f32 v29, s21, v1
54
- ; GFX11-NEXT: v_dual_add_f32 v24, s18, v0 :: v_dual_add_f32 v25, s19, v1
55
- ; GFX11-NEXT: v_dual_add_f32 v32, s22, v0 :: v_dual_add_f32 v33, s23, v1
56
- ; GFX11-NEXT: s_clause 0x3
57
69
; GFX11-NEXT: image_sample v[20:23], v[20:21], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
58
70
; GFX11-NEXT: image_sample v[24:27], v[24:25], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
59
71
; GFX11-NEXT: image_sample v[28:31], v[28:29], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
60
72
; GFX11-NEXT: image_sample v[32:35], v[32:33], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
61
- ; GFX11-NEXT: s_clause 0x3
62
- ; GFX11-NEXT: s_buffer_load_b64 s[16:17], s[12:15], 0x90
63
- ; GFX11-NEXT: s_buffer_load_b64 s[18:19], s[12:15], 0xa0
64
- ; GFX11-NEXT: s_buffer_load_b64 s[20:21], s[12:15], 0xb0
65
- ; GFX11-NEXT: s_buffer_load_b64 s[22:23], s[12:15], 0xc0
66
- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
67
- ; GFX11-NEXT: v_dual_add_f32 v36, s16, v0 :: v_dual_add_f32 v37, s17, v1
68
- ; GFX11-NEXT: v_dual_add_f32 v44, s20, v0 :: v_dual_add_f32 v45, s21, v1
69
- ; GFX11-NEXT: v_dual_add_f32 v40, s18, v0 :: v_dual_add_f32 v41, s19, v1
70
- ; GFX11-NEXT: v_dual_add_f32 v48, s22, v0 :: v_dual_add_f32 v49, s23, v1
71
- ; GFX11-NEXT: s_clause 0x3
72
- ; GFX11-NEXT: image_sample v[36:39], v[36:37], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
73
+ ; GFX11-NEXT: v_add_f32_e32 v37, s34, v0
74
+ ; GFX11-NEXT: v_add_f32_e32 v38, s35, v36
75
+ ; GFX11-NEXT: v_add_f32_e32 v40, s36, v0
76
+ ; GFX11-NEXT: v_add_f32_e32 v41, s37, v36
77
+ ; GFX11-NEXT: v_add_f32_e32 v44, s38, v0
78
+ ; GFX11-NEXT: v_add_f32_e32 v45, s39, v36
79
+ ; GFX11-NEXT: v_add_f32_e32 v48, s40, v0
80
+ ; GFX11-NEXT: v_add_f32_e32 v49, s41, v36
81
+ ; GFX11-NEXT: v_add_f32_e32 v52, s42, v0
82
+ ; GFX11-NEXT: v_add_f32_e32 v53, s43, v36
83
+ ; GFX11-NEXT: v_add_f32_e32 v56, s44, v0
84
+ ; GFX11-NEXT: v_add_f32_e32 v57, s45, v36
85
+ ; GFX11-NEXT: v_add_f32_e32 v60, s46, v0
86
+ ; GFX11-NEXT: v_add_f32_e32 v61, s47, v36
87
+ ; GFX11-NEXT: v_add_f32_e32 v0, s12, v0
88
+ ; GFX11-NEXT: v_add_f32_e32 v1, s13, v36
89
+ ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s33
90
+ ; GFX11-NEXT: s_clause 0x7
91
+ ; GFX11-NEXT: image_sample v[36:39], v[37:38], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
73
92
; GFX11-NEXT: image_sample v[40:43], v[40:41], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
74
93
; GFX11-NEXT: image_sample v[44:47], v[44:45], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
75
94
; GFX11-NEXT: image_sample v[48:51], v[48:49], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
76
- ; GFX11-NEXT: s_clause 0x3
77
- ; GFX11-NEXT: s_buffer_load_b64 s[16:17], s[12:15], 0xd0
78
- ; GFX11-NEXT: s_buffer_load_b64 s[18:19], s[12:15], 0xe0
79
- ; GFX11-NEXT: s_buffer_load_b64 s[20:21], s[12:15], 0xf0
80
- ; GFX11-NEXT: s_buffer_load_b64 s[12:13], s[12:15], 0x100
81
- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
82
- ; GFX11-NEXT: v_dual_add_f32 v52, s16, v0 :: v_dual_add_f32 v53, s17, v1
83
- ; GFX11-NEXT: v_dual_add_f32 v56, s18, v0 :: v_dual_add_f32 v57, s19, v1
84
- ; GFX11-NEXT: s_clause 0x1
85
95
; GFX11-NEXT: image_sample v[52:55], v[52:53], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
86
96
; GFX11-NEXT: image_sample v[56:59], v[56:57], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
87
- ; GFX11-NEXT: v_dual_add_f32 v60, s20, v0 :: v_dual_add_f32 v61, s21, v1
88
- ; GFX11-NEXT: v_dual_add_f32 v0, s12, v0 :: v_dual_add_f32 v1, s13, v1
89
- ; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s24
90
- ; GFX11-NEXT: s_clause 0x1
91
97
; GFX11-NEXT: image_sample v[60:63], v[60:61], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
92
98
; GFX11-NEXT: image_sample v[64:67], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
93
99
; GFX11-NEXT: s_waitcnt vmcnt(14)
@@ -446,7 +452,7 @@ declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #3
446
452
declare float @llvm.amdgcn.interp.inreg.p2 (float , float , float ) #3
447
453
declare <2 x i32 > @llvm.amdgcn.s.buffer.load.v2i32 (<4 x i32 >, i32 , i32 immarg) #8
448
454
449
- attributes #2 = { alwaysinline nounwind memory(readwrite) "amdgpu-sched-strategy" ="max-memory-clause" }
455
+ attributes #2 = { alwaysinline nounwind memory(readwrite) "amdgpu-sched-strategy" ="max-memory-clause" "amdgpu-max-memory-cluster-dwords" = "32" }
450
456
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
451
457
attributes #4 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
452
458
attributes #5 = { nocallback nofree nosync nounwind willreturn memory(read) }
0 commit comments