1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
3
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1150 %s
4
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
2
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
3
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
4
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1150,GFX1150-TRUE16 %s
5
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1150,GFX1150-FAKE16 %s
6
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
7
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
5
8
6
9
define amdgpu_ps <3 x float > @gather_sample (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , <8 x i32 > inreg %rsrc2 , <4 x i32 > inreg %samp2 , float %s , float %t ) {
7
10
; GFX11-LABEL: gather_sample:
@@ -80,35 +83,69 @@ define amdgpu_ps <3 x float> @sample_gather(<8 x i32> inreg %rsrc, <4 x i32> inr
80
83
}
81
84
82
85
define amdgpu_ps <3 x float > @sample_load (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , <8 x i32 > inreg %rsrc2 , i16 %s.16 , i16 %t.16 , i16 %fragid ) {
83
- ; GFX11-LABEL: sample_load:
84
- ; GFX11: ; %bb.0:
85
- ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
86
- ; GFX11-NEXT: v_mov_b32_e32 v4, 0
87
- ; GFX11-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
88
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
89
- ; GFX11-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
90
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
91
- ; GFX11-NEXT: ; return to shader part epilog
86
+ ; GFX11-TRUE16-LABEL: sample_load:
87
+ ; GFX11-TRUE16: ; %bb.0:
88
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
89
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
90
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
91
+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0
92
+ ; GFX11-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
93
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
94
+ ; GFX11-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
95
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
96
+ ; GFX11-TRUE16-NEXT: ; return to shader part epilog
92
97
;
93
- ; GFX1150 -LABEL: sample_load:
94
- ; GFX1150 : ; %bb.0:
95
- ; GFX1150 -NEXT: v_perm_b32 v0, v1, v0, 0x5040100
96
- ; GFX1150 -NEXT: v_mov_b32_e32 v4, 0
97
- ; GFX1150 -NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
98
- ; GFX1150 -NEXT: s_waitcnt vmcnt(0)
99
- ; GFX1150 -NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
100
- ; GFX1150 -NEXT: s_waitcnt vmcnt(0)
101
- ; GFX1150 -NEXT: ; return to shader part epilog
98
+ ; GFX11-FAKE16 -LABEL: sample_load:
99
+ ; GFX11-FAKE16 : ; %bb.0:
100
+ ; GFX11-FAKE16 -NEXT: v_perm_b32 v0, v1, v0, 0x5040100
101
+ ; GFX11-FAKE16 -NEXT: v_mov_b32_e32 v4, 0
102
+ ; GFX11-FAKE16 -NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
103
+ ; GFX11-FAKE16 -NEXT: s_waitcnt vmcnt(0)
104
+ ; GFX11-FAKE16 -NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
105
+ ; GFX11-FAKE16 -NEXT: s_waitcnt vmcnt(0)
106
+ ; GFX11-FAKE16 -NEXT: ; return to shader part epilog
102
107
;
103
- ; GFX12-LABEL: sample_load:
104
- ; GFX12: ; %bb.0:
105
- ; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
106
- ; GFX12-NEXT: v_mov_b32_e32 v4, 0
107
- ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
108
- ; GFX12-NEXT: s_wait_samplecnt 0x0
109
- ; GFX12-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
110
- ; GFX12-NEXT: s_wait_samplecnt 0x0
111
- ; GFX12-NEXT: ; return to shader part epilog
108
+ ; GFX1150-TRUE16-LABEL: sample_load:
109
+ ; GFX1150-TRUE16: ; %bb.0:
110
+ ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
111
+ ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
112
+ ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
113
+ ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, 0
114
+ ; GFX1150-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
115
+ ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
116
+ ; GFX1150-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
117
+ ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
118
+ ; GFX1150-TRUE16-NEXT: ; return to shader part epilog
119
+ ;
120
+ ; GFX1150-FAKE16-LABEL: sample_load:
121
+ ; GFX1150-FAKE16: ; %bb.0:
122
+ ; GFX1150-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
123
+ ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, 0
124
+ ; GFX1150-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
125
+ ; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
126
+ ; GFX1150-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
127
+ ; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
128
+ ; GFX1150-FAKE16-NEXT: ; return to shader part epilog
129
+ ;
130
+ ; GFX12-TRUE16-LABEL: sample_load:
131
+ ; GFX12-TRUE16: ; %bb.0:
132
+ ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
133
+ ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, 0
134
+ ; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
135
+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
136
+ ; GFX12-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
137
+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
138
+ ; GFX12-TRUE16-NEXT: ; return to shader part epilog
139
+ ;
140
+ ; GFX12-FAKE16-LABEL: sample_load:
141
+ ; GFX12-FAKE16: ; %bb.0:
142
+ ; GFX12-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
143
+ ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, 0
144
+ ; GFX12-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
145
+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
146
+ ; GFX12-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
147
+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
148
+ ; GFX12-FAKE16-NEXT: ; return to shader part epilog
112
149
113
150
%w = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32 (i32 15 , float 0 .000000e+00 , float 0 .000000e+00 , <8 x i32 > %rsrc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
114
151
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32 (i32 1 , i16 %s.16 , i16 %t.16 , i16 %fragid , <8 x i32 > %rsrc2 , i32 0 , i32 0 )
@@ -122,35 +159,69 @@ define amdgpu_ps <3 x float> @sample_load(<8 x i32> inreg %rsrc, <4 x i32> inreg
122
159
}
123
160
124
161
define amdgpu_ps <3 x float > @load_sample (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , <8 x i32 > inreg %rsrc2 , i16 %s.16 , i16 %t.16 , i16 %fragid ) {
125
- ; GFX11-LABEL: load_sample:
126
- ; GFX11: ; %bb.0:
127
- ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
128
- ; GFX11-NEXT: v_mov_b32_e32 v4, 0
129
- ; GFX11-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
130
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
131
- ; GFX11-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
132
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
133
- ; GFX11-NEXT: ; return to shader part epilog
162
+ ; GFX11-TRUE16-LABEL: load_sample:
163
+ ; GFX11-TRUE16: ; %bb.0:
164
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
165
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
166
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
167
+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, 0
168
+ ; GFX11-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
169
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
170
+ ; GFX11-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
171
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
172
+ ; GFX11-TRUE16-NEXT: ; return to shader part epilog
134
173
;
135
- ; GFX1150 -LABEL: load_sample:
136
- ; GFX1150 : ; %bb.0:
137
- ; GFX1150 -NEXT: v_perm_b32 v0, v1, v0, 0x5040100
138
- ; GFX1150 -NEXT: v_mov_b32_e32 v4, 0
139
- ; GFX1150 -NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
140
- ; GFX1150 -NEXT: s_waitcnt vmcnt(0)
141
- ; GFX1150 -NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
142
- ; GFX1150 -NEXT: s_waitcnt vmcnt(0)
143
- ; GFX1150 -NEXT: ; return to shader part epilog
174
+ ; GFX11-FAKE16 -LABEL: load_sample:
175
+ ; GFX11-FAKE16 : ; %bb.0:
176
+ ; GFX11-FAKE16 -NEXT: v_perm_b32 v0, v1, v0, 0x5040100
177
+ ; GFX11-FAKE16 -NEXT: v_mov_b32_e32 v4, 0
178
+ ; GFX11-FAKE16 -NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
179
+ ; GFX11-FAKE16 -NEXT: s_waitcnt vmcnt(0)
180
+ ; GFX11-FAKE16 -NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
181
+ ; GFX11-FAKE16 -NEXT: s_waitcnt vmcnt(0)
182
+ ; GFX11-FAKE16 -NEXT: ; return to shader part epilog
144
183
;
145
- ; GFX12-LABEL: load_sample:
146
- ; GFX12: ; %bb.0:
147
- ; GFX12-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
148
- ; GFX12-NEXT: v_mov_b32_e32 v4, 0
149
- ; GFX12-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
150
- ; GFX12-NEXT: s_wait_samplecnt 0x0
151
- ; GFX12-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
152
- ; GFX12-NEXT: s_wait_samplecnt 0x0
153
- ; GFX12-NEXT: ; return to shader part epilog
184
+ ; GFX1150-TRUE16-LABEL: load_sample:
185
+ ; GFX1150-TRUE16: ; %bb.0:
186
+ ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
187
+ ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
188
+ ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
189
+ ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, 0
190
+ ; GFX1150-TRUE16-NEXT: image_msaa_load v[0:3], v[2:3], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
191
+ ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
192
+ ; GFX1150-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
193
+ ; GFX1150-TRUE16-NEXT: s_waitcnt vmcnt(0)
194
+ ; GFX1150-TRUE16-NEXT: ; return to shader part epilog
195
+ ;
196
+ ; GFX1150-FAKE16-LABEL: load_sample:
197
+ ; GFX1150-FAKE16: ; %bb.0:
198
+ ; GFX1150-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
199
+ ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, 0
200
+ ; GFX1150-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
201
+ ; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
202
+ ; GFX1150-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
203
+ ; GFX1150-FAKE16-NEXT: s_waitcnt vmcnt(0)
204
+ ; GFX1150-FAKE16-NEXT: ; return to shader part epilog
205
+ ;
206
+ ; GFX12-TRUE16-LABEL: load_sample:
207
+ ; GFX12-TRUE16: ; %bb.0:
208
+ ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
209
+ ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, 0
210
+ ; GFX12-TRUE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
211
+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
212
+ ; GFX12-TRUE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
213
+ ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
214
+ ; GFX12-TRUE16-NEXT: ; return to shader part epilog
215
+ ;
216
+ ; GFX12-FAKE16-LABEL: load_sample:
217
+ ; GFX12-FAKE16: ; %bb.0:
218
+ ; GFX12-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
219
+ ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, 0
220
+ ; GFX12-FAKE16-NEXT: image_msaa_load v[0:3], [v0, v2], s[12:19] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm a16
221
+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
222
+ ; GFX12-FAKE16-NEXT: image_sample_lz v2, [v4, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
223
+ ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
224
+ ; GFX12-FAKE16-NEXT: ; return to shader part epilog
154
225
155
226
%v = call <4 x float > @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32 (i32 1 , i16 %s.16 , i16 %t.16 , i16 %fragid , <8 x i32 > %rsrc2 , i32 0 , i32 0 )
156
227
%w = call <4 x float > @llvm.amdgcn.image.sample.lz.2d.v4f32.f32 (i32 15 , float 0 .000000e+00 , float 0 .000000e+00 , <8 x i32 > %rsrc , <4 x i32 > %samp , i1 false , i32 0 , i32 0 )
0 commit comments