@@ -334,3 +334,100 @@ llvm.func @arm_sme_vector_to_tile_vert(%tileslice : i32,
334
334
(i32 , i32 , vector <[2 ]xi1 >, vector <[2 ]xf64 >) -> ()
335
335
llvm.return
336
336
}
337
+
338
+ // -----
339
+
340
+
341
+ llvm.func @arm_sme_tile_slice_to_vector_horiz (%tileslice : i32 ,
342
+ %nxv16i1 : vector <[16 ]xi1 >,
343
+ %nxv8i1 : vector <[8 ]xi1 >,
344
+ %nxv4i1 : vector <[4 ]xi1 >,
345
+ %nxv2i1 : vector <[2 ]xi1 >,
346
+ %nxv1i1 : vector <[1 ]xi1 >,
347
+ %nxv16i8 : vector <[16 ]xi8 >,
348
+ %nxv8i16 : vector <[8 ]xi16 >,
349
+ %nxv4i32 : vector <[4 ]xi32 >,
350
+ %nxv2i64 : vector <[2 ]xi64 >,
351
+ %nxv1i128 : vector <[1 ]xi128 >,
352
+ %nxv8f16 : vector <[8 ]xf16 >,
353
+ %nxv8bf16 : vector <[8 ]xbf16 >,
354
+ %nxv4f32 : vector <[4 ]xf32 >,
355
+ %nxv2f64 : vector <[2 ]xf64 >) {
356
+ %tile = llvm.mlir.constant (0 : index ) : i32
357
+ // CHECK: call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8
358
+ %res0 = " arm_sme.intr.read.horiz" (%nxv16i8 , %nxv16i1 , %tile , %tileslice )
359
+ : (vector <[16 ]xi8 >, vector <[16 ]xi1 >, i32 , i32 ) -> vector <[16 ]xi8 >
360
+ // CHECK: call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16
361
+ %res1 = " arm_sme.intr.read.horiz" (%nxv8i16 , %nxv8i1 , %tile , %tileslice )
362
+ : (vector <[8 ]xi16 >, vector <[8 ]xi1 >, i32 , i32 ) -> vector <[8 ]xi16 >
363
+ // CHECK: call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32
364
+ %res2 = " arm_sme.intr.read.horiz" (%nxv4i32 , %nxv4i1 , %tile , %tileslice )
365
+ : (vector <[4 ]xi32 >, vector <[4 ]xi1 >, i32 , i32 ) -> vector <[4 ]xi32 >
366
+ // CHECK: call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64
367
+ %res3 = " arm_sme.intr.read.horiz" (%nxv2i64 , %nxv2i1 , %tile , %tileslice )
368
+ : (vector <[2 ]xi64 >, vector <[2 ]xi1 >, i32 , i32 ) -> vector <[2 ]xi64 >
369
+ // CHECK: call <vscale x 1 x i128> @llvm.aarch64.sme.read.horiz.nxv1i128
370
+ %res4 = " arm_sme.intr.read.horiz" (%nxv1i128 , %nxv1i1 , %tile , %tileslice )
371
+ : (vector <[1 ]xi128 >, vector <[1 ]xi1 >, i32 , i32 ) -> vector <[1 ]xi128 >
372
+ // CHECK: call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16
373
+ %res5 = " arm_sme.intr.read.horiz" (%nxv8f16 , %nxv8i1 , %tile , %tileslice )
374
+ : (vector <[8 ]xf16 >, vector <[8 ]xi1 >, i32 , i32 ) -> vector <[8 ]xf16 >
375
+ // CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16
376
+ %res6 = " arm_sme.intr.read.horiz" (%nxv8bf16 , %nxv8i1 , %tile , %tileslice )
377
+ : (vector <[8 ]xbf16 >, vector <[8 ]xi1 >, i32 , i32 ) -> vector <[8 ]xbf16 >
378
+ // CHECK: call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32
379
+ %res7 = " arm_sme.intr.read.horiz" (%nxv4f32 , %nxv4i1 , %tile , %tileslice )
380
+ : (vector <[4 ]xf32 >, vector <[4 ]xi1 >, i32 , i32 ) -> vector <[4 ]xf32 >
381
+ // CHECK: call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64
382
+ %res8 = " arm_sme.intr.read.horiz" (%nxv2f64 , %nxv2i1 , %tile , %tileslice )
383
+ : (vector <[2 ]xf64 >, vector <[2 ]xi1 >, i32 , i32 ) -> vector <[2 ]xf64 >
384
+ llvm.return
385
+ }
386
+
387
+ // -----
388
+
389
+ llvm.func @arm_sme_tile_slice_to_vector_vert (%tileslice : i32 ,
390
+ %nxv16i1 : vector <[16 ]xi1 >,
391
+ %nxv8i1 : vector <[8 ]xi1 >,
392
+ %nxv4i1 : vector <[4 ]xi1 >,
393
+ %nxv2i1 : vector <[2 ]xi1 >,
394
+ %nxv1i1 : vector <[1 ]xi1 >,
395
+ %nxv16i8 : vector <[16 ]xi8 >,
396
+ %nxv8i16 : vector <[8 ]xi16 >,
397
+ %nxv4i32 : vector <[4 ]xi32 >,
398
+ %nxv2i64 : vector <[2 ]xi64 >,
399
+ %nxv1i128 : vector <[1 ]xi128 >,
400
+ %nxv8f16 : vector <[8 ]xf16 >,
401
+ %nxv8bf16 : vector <[8 ]xbf16 >,
402
+ %nxv4f32 : vector <[4 ]xf32 >,
403
+ %nxv2f64 : vector <[2 ]xf64 >) {
404
+ %tile = llvm.mlir.constant (0 : index ) : i32
405
+ // CHECK: call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8
406
+ %res0 = " arm_sme.intr.read.vert" (%nxv16i8 , %nxv16i1 , %tile , %tileslice )
407
+ : (vector <[16 ]xi8 >, vector <[16 ]xi1 >, i32 , i32 ) -> vector <[16 ]xi8 >
408
+ // CHECK: call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16
409
+ %res1 = " arm_sme.intr.read.vert" (%nxv8i16 , %nxv8i1 , %tile , %tileslice )
410
+ : (vector <[8 ]xi16 >, vector <[8 ]xi1 >, i32 , i32 ) -> vector <[8 ]xi16 >
411
+ // CHECK: call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32
412
+ %res2 = " arm_sme.intr.read.vert" (%nxv4i32 , %nxv4i1 , %tile , %tileslice )
413
+ : (vector <[4 ]xi32 >, vector <[4 ]xi1 >, i32 , i32 ) -> vector <[4 ]xi32 >
414
+ // CHECK: call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64
415
+ %res3 = " arm_sme.intr.read.vert" (%nxv2i64 , %nxv2i1 , %tile , %tileslice )
416
+ : (vector <[2 ]xi64 >, vector <[2 ]xi1 >, i32 , i32 ) -> vector <[2 ]xi64 >
417
+ // CHECK: call <vscale x 1 x i128> @llvm.aarch64.sme.read.vert.nxv1i128
418
+ %res4 = " arm_sme.intr.read.vert" (%nxv1i128 , %nxv1i1 , %tile , %tileslice )
419
+ : (vector <[1 ]xi128 >, vector <[1 ]xi1 >, i32 , i32 ) -> vector <[1 ]xi128 >
420
+ // CHECK: call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16
421
+ %res5 = " arm_sme.intr.read.vert" (%nxv8f16 , %nxv8i1 , %tile , %tileslice )
422
+ : (vector <[8 ]xf16 >, vector <[8 ]xi1 >, i32 , i32 ) -> vector <[8 ]xf16 >
423
+ // CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16
424
+ %res6 = " arm_sme.intr.read.vert" (%nxv8bf16 , %nxv8i1 , %tile , %tileslice )
425
+ : (vector <[8 ]xbf16 >, vector <[8 ]xi1 >, i32 , i32 ) -> vector <[8 ]xbf16 >
426
+ // CHECK: call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32
427
+ %res7 = " arm_sme.intr.read.vert" (%nxv4f32 , %nxv4i1 , %tile , %tileslice )
428
+ : (vector <[4 ]xf32 >, vector <[4 ]xi1 >, i32 , i32 ) -> vector <[4 ]xf32 >
429
+ // CHECK: call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64
430
+ %res8 = " arm_sme.intr.read.vert" (%nxv2f64 , %nxv2i1 , %tile , %tileslice )
431
+ : (vector <[2 ]xf64 >, vector <[2 ]xi1 >, i32 , i32 ) -> vector <[2 ]xf64 >
432
+ llvm.return
433
+ }
0 commit comments