@@ -158,4 +158,145 @@ gpu.module @test {
158
158
%c = xegpu.dpas %a , %b : vector <32 x32 xf16 >, vector <32 x32 xf16 > -> vector <32 x32 xf32 >
159
159
gpu.return %c : vector <32 x32 xf32 >
160
160
}
161
+
162
+ //-----
163
+
164
+ // CHECK-LABEL: test_create_tdesc_vec
165
+ // CHECK-SAME: [[arg0:%.+]]: ui64
166
+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
167
+ gpu.func @test_create_tdesc_vec (%src: ui64 ) -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>> {
168
+ %cst = arith.constant dense <[
169
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
170
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
171
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
172
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
173
+ ]> : vector <32 xindex >
174
+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
175
+ gpu.return %tdesc : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
176
+ }
177
+
178
+ //-----
179
+
180
+ // CHECK-LABEL: test_create_tdesc_step
181
+ // CHECK-SAME: [[arg0:%.+]]: ui64
182
+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
183
+ gpu.func @test_create_tdesc_step (%src: ui64 ) -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>> {
184
+ %step = arith.constant dense <8 > : vector <32 xindex >
185
+ %seq = vector.step : vector <32 xindex >
186
+ %cst = arith.muli %seq , %step : vector <32 xindex >
187
+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
188
+ gpu.return %tdesc : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
189
+ }
190
+
191
+ //-----
192
+
193
+ // CHECK-LABEL: test_load
194
+ // CHECK-SAME: [[arg0:%.+]]: ui64
195
+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
196
+ // CHECK-COUNT-2: xegpu.load {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
197
+ gpu.func @test_load (%src: ui64 ) -> vector <32 xf32 > {
198
+ %cst = arith.constant dense <[
199
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
200
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
201
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
202
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
203
+ ]> : vector <32 xindex >
204
+
205
+ %c17 = arith.constant 17 : index
206
+ %mask = vector.create_mask %c17: vector <32 xi1 >
207
+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
208
+ %ld = xegpu.load %tdesc , %mask: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 > -> vector <32 xf32 >
209
+
210
+ gpu.return %ld : vector <32 xf32 >
211
+ }
212
+
213
+ //-----
214
+
215
+ // CHECK-LABEL: test_prefetch
216
+ // CHECK-SAME: [[arg0:%.+]]: ui64
217
+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
218
+ // CHECK-COUNT-2: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
219
+ gpu.func @test_prefetch (%src: ui64 ) {
220
+
221
+ %cst = arith.constant dense <[
222
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
223
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
224
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
225
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
226
+ ]> : vector <32 xindex >
227
+
228
+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
229
+
230
+ xegpu.prefetch %tdesc: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
231
+ gpu.return
232
+ }
233
+
234
+ //-----
235
+
236
+ // CHECK-LABEL: test_store
237
+ // CHECK-SAME: [[arg0:%.+]]: ui64
238
+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
239
+ // CHECK-COUNT-2: xegpu.store {{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
240
+ gpu.func @test_store (%src: ui64 ) {
241
+ %cst = arith.constant dense <[
242
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
243
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
244
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
245
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
246
+ ]> : vector <32 xindex >
247
+
248
+ %c17 = arith.constant 17 : index
249
+ %mask = vector.create_mask %c17: vector <32 xi1 >
250
+
251
+ %st_vec = arith.constant dense <1023.0 >: vector <32 xf32 >
252
+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
253
+ xegpu.store %st_vec , %tdesc , %mask: vector <32 xf32 >, !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 >
254
+
255
+ gpu.return
256
+ }
257
+
258
+ //-----
259
+
260
+ // CHECK-LABEL: test_prefetch_load_store_update
261
+ // CHECK-SAME: [[arg0:%.+]]: ui64
262
+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
263
+ // CHECK-COUNT-2: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
264
+ // CHECK-COUNT-2: xegpu.update_offset {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xindex>
265
+ // CHECK-COUNT-2: xegpu.load {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
266
+ // CHECK-COUNT-2: xegpu.store {{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
267
+
268
+ gpu.func @test_prefetch_load_store_update (%src: ui64 ) {
269
+
270
+ %cst = arith.constant dense <[
271
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
272
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
273
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
274
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
275
+ ]> : vector <32 xindex >
276
+
277
+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
278
+ xegpu.prefetch %tdesc: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
279
+
280
+ %delta = arith.constant dense <[
281
+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
282
+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 64 ,
283
+ 128 , 128 , 128 , 128 , 128 , 128 , 128 , 128 ,
284
+ 128 , 128 , 128 , 128 , 128 , 128 , 128 , 256
285
+ ]> : vector <32 xindex >
286
+ %new_tdesc = xegpu.update_offset %tdesc , %delta
287
+ : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xindex >
288
+
289
+ %c17 = arith.constant 17 : index
290
+ %mask = vector.create_mask %c17: vector <32 xi1 >
291
+
292
+ %ld_vec = xegpu.load %new_tdesc , %mask: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 > -> vector <32 xf32 >
293
+
294
+ %st_vec = arith.addf %ld_vec , %ld_vec : vector <32 xf32 >
295
+ xegpu.store %st_vec , %tdesc , %mask:
296
+ vector <32 xf32 >,
297
+ !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>,
298
+ vector <32 xi1 >
299
+
300
+ gpu.return
301
+ }
161
302
}
0 commit comments