@@ -163,11 +163,69 @@ gpu.func @test_create_tdesc_vc_1(%src: memref<?xf32, 3>) {
163
163
gpu.func @test_create_tdesc_vc_with_sg_map (%src: ui64 ) {
164
164
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
165
165
%0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
166
- //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 1 ]>>
167
- %1 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 1 ]>>
166
+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2 ]>>
167
+ %1 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>
168
168
gpu.return
169
169
}
170
170
171
+ // CHECK: gpu.func @test_load_with_sg_map(%[[arg0:.*]]: ui64) {
172
+ gpu.func @test_load_with_sg_map (%src: ui64 ) {
173
+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
174
+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
175
+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
176
+ %1 = arith.constant dense <1 >: vector <4 xi1 >
177
+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>
178
+ %2 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>
179
+ //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose}> : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>, vector<4xi1> -> vector<2x1xf32>
180
+ %3 = xegpu.load %2 , %1 <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >, transpose }> : !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>, vector <4 xi1 > -> vector <2 x1 xf32 >
181
+ gpu.return
182
+ }
183
+
184
+ // CHECK: gpu.func @test_load_with_sg_map_2(%[[arg0:.*]]: ui64) {
185
+ gpu.func @test_load_with_sg_map_2 (%src: ui64 ) {
186
+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
187
+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
188
+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
189
+ %1 = arith.constant dense <1 >: vector <4 xi1 >
190
+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>
191
+ %2 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>
192
+ //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>, vector<4xi1> -> vector<1xf32>
193
+ %3 = xegpu.load %2 , %1 <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}> : !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>, vector <4 xi1 > -> vector <1 xf32 >
194
+ gpu.return
195
+ }
196
+
197
+ // CHECK: gpu.func @test_store_with_sg_map(%[[arg0:.*]]: ui64) {
198
+ gpu.func @test_store_with_sg_map (%src: ui64 ) {
199
+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
200
+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
201
+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
202
+ %1 = arith.constant dense <1 >: vector <4 xi1 >
203
+ //CHECK: %[[cst2:.*]] = arith.constant dense<2.900000e+00> : vector<2x1xf32>
204
+ %2 = arith.constant dense <2.9 >: vector <2 x1 xf32 >
205
+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>
206
+ %3 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>
207
+ //CHECK: xegpu.store %[[cst2]], %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>, transpose}> : vector<2x1xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>, vector<4xi1>
208
+ xegpu.store %2 , %3 , %1 <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >, transpose }> : vector <2 x1 xf32 >, !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>, vector <4 xi1 >
209
+ gpu.return
210
+ }
211
+
212
+ // CHECK: gpu.func @test_store_with_sg_map_2(%[[arg0:.*]]: ui64) {
213
+ gpu.func @test_store_with_sg_map_2 (%src: ui64 ) {
214
+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
215
+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
216
+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
217
+ %1 = arith.constant dense <1 >: vector <4 xi1 >
218
+ //CHECK: %[[cst2:.*]] = arith.constant dense<2.900000e+00> : vector<1xf32>
219
+ %2 = arith.constant dense <2.9 >: vector <1 xf32 >
220
+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>
221
+ %3 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>
222
+ //CHECK: xegpu.store %[[cst2]], %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<1xf32>, !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>, vector<4xi1>
223
+ xegpu.store %2 , %3 , %1 <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >}> : vector <1 xf32 >, !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>, vector <4 xi1 >
224
+ gpu.return
225
+ }
226
+
227
+
228
+
171
229
// CHECK: gpu.func @test_prefetch_vc(%[[arg0:.*]]: ui64) {
172
230
gpu.func @test_prefetch_vc (%src: ui64 ) {
173
231
//CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
0 commit comments