@@ -387,3 +387,235 @@ if.then:
387
387
if.else:
388
388
ret void
389
389
}
390
+
391
+ ; Different sizes / types
392
+
393
+ define <vscale x 16 x i8 > @load_v16i8_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
394
+ ; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load(
395
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
396
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
397
+ ; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
398
+ ;
399
+ store <vscale x 4 x i32 > %x , ptr %p
400
+ %load = load <vscale x 16 x i8 >, ptr %p
401
+ ret <vscale x 16 x i8 > %load
402
+ }
403
+
404
+ define <vscale x 4 x float > @load_v4f32_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
405
+ ; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load(
406
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
407
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
408
+ ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
409
+ ;
410
+ store <vscale x 4 x i32 > %x , ptr %p
411
+ %load = load <vscale x 4 x float >, ptr %p
412
+ ret <vscale x 4 x float > %load
413
+ }
414
+
415
+ define <vscale x 4 x float > @load_v4f32_store_v16i8_forward_load (ptr %p , <vscale x 16 x i8 > %x ) {
416
+ ; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load(
417
+ ; CHECK-NEXT: store <vscale x 16 x i8> [[X:%.*]], ptr [[P:%.*]], align 16
418
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
419
+ ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
420
+ ;
421
+ store <vscale x 16 x i8 > %x , ptr %p
422
+ %load = load <vscale x 4 x float >, ptr %p
423
+ ret <vscale x 4 x float > %load
424
+ }
425
+
426
+ define <vscale x 4 x i32 > @load_v4i32_store_v4f32_forward_load (ptr %p , <vscale x 4 x float > %x ) {
427
+ ; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load(
428
+ ; CHECK-NEXT: store <vscale x 4 x float> [[X:%.*]], ptr [[P:%.*]], align 16
429
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
430
+ ; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
431
+ ;
432
+ store <vscale x 4 x float > %x , ptr %p
433
+ %load = load <vscale x 4 x i32 >, ptr %p
434
+ ret <vscale x 4 x i32 > %load
435
+ }
436
+
437
+ define <vscale x 4 x i32 > @load_v4i32_store_v4i64_forward_load (ptr %p , <vscale x 4 x i64 > %x ) {
438
+ ; CHECK-LABEL: @load_v4i32_store_v4i64_forward_load(
439
+ ; CHECK-NEXT: store <vscale x 4 x i64> [[X:%.*]], ptr [[P:%.*]], align 32
440
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
441
+ ; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
442
+ ;
443
+ store <vscale x 4 x i64 > %x , ptr %p
444
+ %load = load <vscale x 4 x i32 >, ptr %p
445
+ ret <vscale x 4 x i32 > %load
446
+ }
447
+
448
+ define <vscale x 4 x i64 > @load_v4i64_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
449
+ ; CHECK-LABEL: @load_v4i64_store_v4i32_forward_load(
450
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
451
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[P]], align 32
452
+ ; CHECK-NEXT: ret <vscale x 4 x i64> [[LOAD]]
453
+ ;
454
+ store <vscale x 4 x i32 > %x , ptr %p
455
+ %load = load <vscale x 4 x i64 >, ptr %p
456
+ ret <vscale x 4 x i64 > %load
457
+ }
458
+
459
+ define <vscale x 2 x i32 > @load_v2i32_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
460
+ ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load(
461
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
462
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[P]], align 8
463
+ ; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
464
+ ;
465
+ store <vscale x 4 x i32 > %x , ptr %p
466
+ %load = load <vscale x 2 x i32 >, ptr %p
467
+ ret <vscale x 2 x i32 > %load
468
+ }
469
+
470
+ define <vscale x 2 x i32 > @load_v2i32_store_v4i32_forward_load_offsets (ptr %p , <vscale x 4 x i32 > %x ) {
471
+ ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsets(
472
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
473
+ ; CHECK-NEXT: [[Q:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[P]], i64 1
474
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
475
+ ; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
476
+ ;
477
+ store <vscale x 4 x i32 > %x , ptr %p
478
+ %q = getelementptr <vscale x 2 x i32 >, ptr %p , i64 1
479
+ %load = load <vscale x 2 x i32 >, ptr %q
480
+ ret <vscale x 2 x i32 > %load
481
+ }
482
+
483
+ define <vscale x 2 x i32 > @load_v2i32_store_v4i32_forward_load_offsetc (ptr %p , <vscale x 4 x i32 > %x ) {
484
+ ; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsetc(
485
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
486
+ ; CHECK-NEXT: [[Q:%.*]] = getelementptr <2 x i32>, ptr [[P]], i64 1
487
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
488
+ ; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
489
+ ;
490
+ store <vscale x 4 x i32 > %x , ptr %p
491
+ %q = getelementptr <2 x i32 >, ptr %p , i64 1
492
+ %load = load <vscale x 2 x i32 >, ptr %q
493
+ ret <vscale x 2 x i32 > %load
494
+ }
495
+
496
+ define <vscale x 2 x ptr > @load_v2p0_store_v4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
497
+ ; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load(
498
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
499
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x ptr>, ptr [[P]], align 16
500
+ ; CHECK-NEXT: ret <vscale x 2 x ptr> [[LOAD]]
501
+ ;
502
+ store <vscale x 4 x i32 > %x , ptr %p
503
+ %load = load <vscale x 2 x ptr >, ptr %p
504
+ ret <vscale x 2 x ptr > %load
505
+ }
506
+
507
+ define <vscale x 2 x i64 > @load_v2i64_store_v2p0_forward_load (ptr %p , <vscale x 2 x ptr > %x ) {
508
+ ; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load(
509
+ ; CHECK-NEXT: store <vscale x 2 x ptr> [[X:%.*]], ptr [[P:%.*]], align 16
510
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[P]], align 16
511
+ ; CHECK-NEXT: ret <vscale x 2 x i64> [[LOAD]]
512
+ ;
513
+ store <vscale x 2 x ptr > %x , ptr %p
514
+ %load = load <vscale x 2 x i64 >, ptr %p
515
+ ret <vscale x 2 x i64 > %load
516
+ }
517
+
518
+ define <vscale x 16 x i8 > @load_nxv16i8_store_v4i32_forward_load (ptr %p , <4 x i32 > %x ) {
519
+ ; CHECK-LABEL: @load_nxv16i8_store_v4i32_forward_load(
520
+ ; CHECK-NEXT: store <4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
521
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
522
+ ; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
523
+ ;
524
+ store <4 x i32 > %x , ptr %p
525
+ %load = load <vscale x 16 x i8 >, ptr %p
526
+ ret <vscale x 16 x i8 > %load
527
+ }
528
+
529
+ define <16 x i8 > @load_v16i8_store_nxv4i32_forward_load (ptr %p , <vscale x 4 x i32 > %x ) {
530
+ ; CHECK-LABEL: @load_v16i8_store_nxv4i32_forward_load(
531
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
532
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[P]], align 16
533
+ ; CHECK-NEXT: ret <16 x i8> [[LOAD]]
534
+ ;
535
+ store <vscale x 4 x i32 > %x , ptr %p
536
+ %load = load <16 x i8 >, ptr %p
537
+ ret <16 x i8 > %load
538
+ }
539
+
540
+ define <vscale x 16 x i8 > @load_v16i8_store_v4i32_forward_constant (ptr %p ) {
541
+ ; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant(
542
+ ; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 4), ptr [[P:%.*]], align 16
543
+ ; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
544
+ ; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
545
+ ;
546
+ store <vscale x 4 x i32 > splat (i32 4 ), ptr %p
547
+ %load = load <vscale x 16 x i8 >, ptr %p
548
+ ret <vscale x 16 x i8 > %load
549
+ }
550
+
551
+ define { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } @bigexample ({ <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a ) vscale_range(1 ,16 ) {
552
+ ; CHECK-LABEL: @bigexample(
553
+ ; CHECK-NEXT: entry:
554
+ ; CHECK-NEXT: [[REF_TMP:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
555
+ ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[REF_TMP]])
556
+ ; CHECK-NEXT: [[A_ELT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A:%.*]], 0
557
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT]], ptr [[REF_TMP]], align 16
558
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
559
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
560
+ ; CHECK-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]]
561
+ ; CHECK-NEXT: [[A_ELT2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 1
562
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16
563
+ ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 5
564
+ ; CHECK-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]]
565
+ ; CHECK-NEXT: [[A_ELT4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 2
566
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16
567
+ ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 48
568
+ ; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]]
569
+ ; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 3
570
+ ; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16
571
+ ; CHECK-NEXT: [[DOTUNPACK:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP]], align 16
572
+ ; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[DOTUNPACK]], 0
573
+ ; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK1]], align 16
574
+ ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP6]], <vscale x 16 x i8> [[DOTUNPACK8]], 1
575
+ ; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK3]], align 16
576
+ ; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP9]], <vscale x 16 x i8> [[DOTUNPACK10]], 2
577
+ ; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK5]], align 16
578
+ ; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP12]], <vscale x 16 x i8> [[DOTUNPACK12]], 3
579
+ ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]])
580
+ ; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP15]]
581
+ ;
582
+ entry:
583
+ %ref.tmp = alloca { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > }, align 16
584
+ call void @llvm.lifetime.start.p0 (i64 -1 , ptr nonnull %ref.tmp )
585
+ %a.elt = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 0
586
+ store <vscale x 4 x i32 > %a.elt , ptr %ref.tmp , align 16
587
+ %0 = call i64 @llvm.vscale.i64 ()
588
+ %1 = shl i64 %0 , 4
589
+ %ref.tmp.repack1 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %1
590
+ %a.elt2 = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 1
591
+ store <vscale x 4 x i32 > %a.elt2 , ptr %ref.tmp.repack1 , align 16
592
+ %2 = call i64 @llvm.vscale.i64 ()
593
+ %3 = shl i64 %2 , 5
594
+ %ref.tmp.repack3 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %3
595
+ %a.elt4 = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 2
596
+ store <vscale x 4 x i32 > %a.elt4 , ptr %ref.tmp.repack3 , align 16
597
+ %4 = call i64 @llvm.vscale.i64 ()
598
+ %5 = mul i64 %4 , 48
599
+ %ref.tmp.repack5 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %5
600
+ %a.elt6 = extractvalue { <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 >, <vscale x 4 x i32 > } %a , 3
601
+ store <vscale x 4 x i32 > %a.elt6 , ptr %ref.tmp.repack5 , align 16
602
+ %.unpack = load <vscale x 16 x i8 >, ptr %ref.tmp , align 16
603
+ %6 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } poison, <vscale x 16 x i8 > %.unpack , 0
604
+ %7 = call i64 @llvm.vscale.i64 ()
605
+ %8 = shl i64 %7 , 4
606
+ %.elt7 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %8
607
+ %.unpack8 = load <vscale x 16 x i8 >, ptr %.elt7 , align 16
608
+ %9 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %6 , <vscale x 16 x i8 > %.unpack8 , 1
609
+ %10 = call i64 @llvm.vscale.i64 ()
610
+ %11 = shl i64 %10 , 5
611
+ %.elt9 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %11
612
+ %.unpack10 = load <vscale x 16 x i8 >, ptr %.elt9 , align 16
613
+ %12 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %9 , <vscale x 16 x i8 > %.unpack10 , 2
614
+ %13 = call i64 @llvm.vscale.i64 ()
615
+ %14 = mul i64 %13 , 48
616
+ %.elt11 = getelementptr inbounds i8 , ptr %ref.tmp , i64 %14
617
+ %.unpack12 = load <vscale x 16 x i8 >, ptr %.elt11 , align 16
618
+ %15 = insertvalue { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %12 , <vscale x 16 x i8 > %.unpack12 , 3
619
+ call void @llvm.lifetime.end.p0 (i64 -1 , ptr nonnull %ref.tmp )
620
+ ret { <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 >, <vscale x 16 x i8 > } %15
621
+ }
0 commit comments