Skip to content

Commit 6d4e72a

Browse files
committed
[GVN] Add extra vscale tests with different types. NFC
1 parent e00f1f8 commit 6d4e72a

File tree

2 files changed

+902
-0
lines changed

2 files changed

+902
-0
lines changed

llvm/test/Transforms/GVN/vscale.ll

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,3 +387,257 @@ if.then:
387387
if.else:
388388
ret void
389389
}
390+
391+
; Different sizes / types
392+
393+
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
394+
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load(
395+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
396+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
397+
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
398+
;
399+
store <vscale x 4 x i32> %x, ptr %p
400+
%load = load <vscale x 16 x i8>, ptr %p
401+
ret <vscale x 16 x i8> %load
402+
}
403+
404+
define <vscale x 4 x float> @load_v4f32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
405+
; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load(
406+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
407+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
408+
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
409+
;
410+
store <vscale x 4 x i32> %x, ptr %p
411+
%load = load <vscale x 4 x float>, ptr %p
412+
ret <vscale x 4 x float> %load
413+
}
414+
415+
define <vscale x 4 x float> @load_v4f32_store_v16i8_forward_load(ptr %p, <vscale x 16 x i8> %x) {
416+
; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load(
417+
; CHECK-NEXT: store <vscale x 16 x i8> [[X:%.*]], ptr [[P:%.*]], align 16
418+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x float>, ptr [[P]], align 16
419+
; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
420+
;
421+
store <vscale x 16 x i8> %x, ptr %p
422+
%load = load <vscale x 4 x float>, ptr %p
423+
ret <vscale x 4 x float> %load
424+
}
425+
426+
define <vscale x 4 x i32> @load_v4i32_store_v4f32_forward_load(ptr %p, <vscale x 4 x float> %x) {
427+
; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load(
428+
; CHECK-NEXT: store <vscale x 4 x float> [[X:%.*]], ptr [[P:%.*]], align 16
429+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
430+
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
431+
;
432+
store <vscale x 4 x float> %x, ptr %p
433+
%load = load <vscale x 4 x i32>, ptr %p
434+
ret <vscale x 4 x i32> %load
435+
}
436+
437+
define <vscale x 4 x i32> @load_v4i32_store_v4i64_forward_load(ptr %p, <vscale x 4 x i64> %x) {
438+
; CHECK-LABEL: @load_v4i32_store_v4i64_forward_load(
439+
; CHECK-NEXT: store <vscale x 4 x i64> [[X:%.*]], ptr [[P:%.*]], align 32
440+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[P]], align 16
441+
; CHECK-NEXT: ret <vscale x 4 x i32> [[LOAD]]
442+
;
443+
store <vscale x 4 x i64> %x, ptr %p
444+
%load = load <vscale x 4 x i32>, ptr %p
445+
ret <vscale x 4 x i32> %load
446+
}
447+
448+
define <vscale x 4 x i64> @load_v4i64_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
449+
; CHECK-LABEL: @load_v4i64_store_v4i32_forward_load(
450+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
451+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[P]], align 32
452+
; CHECK-NEXT: ret <vscale x 4 x i64> [[LOAD]]
453+
;
454+
store <vscale x 4 x i32> %x, ptr %p
455+
%load = load <vscale x 4 x i64>, ptr %p
456+
ret <vscale x 4 x i64> %load
457+
}
458+
459+
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
460+
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load(
461+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
462+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[P]], align 8
463+
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
464+
;
465+
store <vscale x 4 x i32> %x, ptr %p
466+
%load = load <vscale x 2 x i32>, ptr %p
467+
ret <vscale x 2 x i32> %load
468+
}
469+
470+
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsets(ptr %p, <vscale x 4 x i32> %x) {
471+
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsets(
472+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
473+
; CHECK-NEXT: [[Q:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[P]], i64 1
474+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
475+
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
476+
;
477+
store <vscale x 4 x i32> %x, ptr %p
478+
%q = getelementptr <vscale x 2 x i32>, ptr %p, i64 1
479+
%load = load <vscale x 2 x i32>, ptr %q
480+
ret <vscale x 2 x i32> %load
481+
}
482+
483+
define <vscale x 2 x i32> @load_v2i32_store_v4i32_forward_load_offsetc(ptr %p, <vscale x 4 x i32> %x) {
484+
; CHECK-LABEL: @load_v2i32_store_v4i32_forward_load_offsetc(
485+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
486+
; CHECK-NEXT: [[Q:%.*]] = getelementptr <2 x i32>, ptr [[P]], i64 1
487+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[Q]], align 8
488+
; CHECK-NEXT: ret <vscale x 2 x i32> [[LOAD]]
489+
;
490+
store <vscale x 4 x i32> %x, ptr %p
491+
%q = getelementptr <2 x i32>, ptr %p, i64 1
492+
%load = load <vscale x 2 x i32>, ptr %q
493+
ret <vscale x 2 x i32> %load
494+
}
495+
496+
define <vscale x 2 x ptr> @load_v2p0_store_v4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
497+
; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load(
498+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
499+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x ptr>, ptr [[P]], align 16
500+
; CHECK-NEXT: ret <vscale x 2 x ptr> [[LOAD]]
501+
;
502+
store <vscale x 4 x i32> %x, ptr %p
503+
%load = load <vscale x 2 x ptr>, ptr %p
504+
ret <vscale x 2 x ptr> %load
505+
}
506+
507+
define <vscale x 2 x i64> @load_v2i64_store_v2p0_forward_load(ptr %p, <vscale x 2 x ptr> %x) {
508+
; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load(
509+
; CHECK-NEXT: store <vscale x 2 x ptr> [[X:%.*]], ptr [[P:%.*]], align 16
510+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[P]], align 16
511+
; CHECK-NEXT: ret <vscale x 2 x i64> [[LOAD]]
512+
;
513+
store <vscale x 2 x ptr> %x, ptr %p
514+
%load = load <vscale x 2 x i64>, ptr %p
515+
ret <vscale x 2 x i64> %load
516+
}
517+
518+
define <vscale x 16 x i8> @load_nxv16i8_store_v4i32_forward_load(ptr %p, <4 x i32> %x) {
519+
; CHECK-LABEL: @load_nxv16i8_store_v4i32_forward_load(
520+
; CHECK-NEXT: store <4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
521+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
522+
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
523+
;
524+
store <4 x i32> %x, ptr %p
525+
%load = load <vscale x 16 x i8>, ptr %p
526+
ret <vscale x 16 x i8> %load
527+
}
528+
529+
define <16 x i8> @load_v16i8_store_nxv4i32_forward_load(ptr %p, <vscale x 4 x i32> %x) {
530+
; CHECK-LABEL: @load_v16i8_store_nxv4i32_forward_load(
531+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
532+
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[P]], align 16
533+
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
534+
;
535+
store <vscale x 4 x i32> %x, ptr %p
536+
%load = load <16 x i8>, ptr %p
537+
ret <16 x i8> %load
538+
}
539+
540+
define <vscale x 16 x i8> @load_v16i8_store_v4i32_forward_constant(ptr %p) {
541+
; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant(
542+
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 4), ptr [[P:%.*]], align 16
543+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
544+
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
545+
;
546+
store <vscale x 4 x i32> splat (i32 4), ptr %p
547+
%load = load <vscale x 16 x i8>, ptr %p
548+
ret <vscale x 16 x i8> %load
549+
}
550+
551+
define <vscale x 16 x i8> @load_v16i8_struct_store_v4i32_forward_load(ptr %p, { <vscale x 4 x i32> } %x) {
552+
; CHECK-LABEL: @load_v16i8_struct_store_v4i32_forward_load(
553+
; CHECK-NEXT: store { <vscale x 4 x i32> } [[X:%.*]], ptr [[P:%.*]], align 16
554+
; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[P]], align 16
555+
; CHECK-NEXT: ret <vscale x 16 x i8> [[LOAD]]
556+
;
557+
store { <vscale x 4 x i32> } %x, ptr %p
558+
%load = load <vscale x 16 x i8>, ptr %p
559+
ret <vscale x 16 x i8> %load
560+
}
561+
562+
define {<vscale x 16 x i8>} @load_v16i8_store_v4i32_struct_forward_load(ptr %p, <vscale x 4 x i32> %x) {
563+
; CHECK-LABEL: @load_v16i8_store_v4i32_struct_forward_load(
564+
; CHECK-NEXT: store <vscale x 4 x i32> [[X:%.*]], ptr [[P:%.*]], align 16
565+
; CHECK-NEXT: [[LOAD:%.*]] = load { <vscale x 16 x i8> }, ptr [[P]], align 16
566+
; CHECK-NEXT: ret { <vscale x 16 x i8> } [[LOAD]]
567+
;
568+
store <vscale x 4 x i32> %x, ptr %p
569+
%load = load { <vscale x 16 x i8> }, ptr %p
570+
ret { <vscale x 16 x i8> } %load
571+
}
572+
573+
define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @bigexample({ <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a) vscale_range(1,16) {
574+
; CHECK-LABEL: @bigexample(
575+
; CHECK-NEXT: entry:
576+
; CHECK-NEXT: [[REF_TMP:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
577+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[REF_TMP]])
578+
; CHECK-NEXT: [[A_ELT:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A:%.*]], 0
579+
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT]], ptr [[REF_TMP]], align 16
580+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
581+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
582+
; CHECK-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]]
583+
; CHECK-NEXT: [[A_ELT2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 1
584+
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16
585+
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 5
586+
; CHECK-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]]
587+
; CHECK-NEXT: [[A_ELT4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 2
588+
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16
589+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 48
590+
; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]]
591+
; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[A]], 3
592+
; CHECK-NEXT: store <vscale x 4 x i32> [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16
593+
; CHECK-NEXT: [[DOTUNPACK:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP]], align 16
594+
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> [[DOTUNPACK]], 0
595+
; CHECK-NEXT: [[DOTUNPACK8:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK1]], align 16
596+
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP6]], <vscale x 16 x i8> [[DOTUNPACK8]], 1
597+
; CHECK-NEXT: [[DOTUNPACK10:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK3]], align 16
598+
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP9]], <vscale x 16 x i8> [[DOTUNPACK10]], 2
599+
; CHECK-NEXT: [[DOTUNPACK12:%.*]] = load <vscale x 16 x i8>, ptr [[REF_TMP_REPACK5]], align 16
600+
; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP12]], <vscale x 16 x i8> [[DOTUNPACK12]], 3
601+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]])
602+
; CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP15]]
603+
;
604+
entry:
605+
%ref.tmp = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16
606+
call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull %ref.tmp)
607+
%a.elt = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 0
608+
store <vscale x 4 x i32> %a.elt, ptr %ref.tmp, align 16
609+
%0 = call i64 @llvm.vscale.i64()
610+
%1 = shl i64 %0, 4
611+
%ref.tmp.repack1 = getelementptr inbounds i8, ptr %ref.tmp, i64 %1
612+
%a.elt2 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 1
613+
store <vscale x 4 x i32> %a.elt2, ptr %ref.tmp.repack1, align 16
614+
%2 = call i64 @llvm.vscale.i64()
615+
%3 = shl i64 %2, 5
616+
%ref.tmp.repack3 = getelementptr inbounds i8, ptr %ref.tmp, i64 %3
617+
%a.elt4 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 2
618+
store <vscale x 4 x i32> %a.elt4, ptr %ref.tmp.repack3, align 16
619+
%4 = call i64 @llvm.vscale.i64()
620+
%5 = mul i64 %4, 48
621+
%ref.tmp.repack5 = getelementptr inbounds i8, ptr %ref.tmp, i64 %5
622+
%a.elt6 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %a, 3
623+
store <vscale x 4 x i32> %a.elt6, ptr %ref.tmp.repack5, align 16
624+
%.unpack = load <vscale x 16 x i8>, ptr %ref.tmp, align 16
625+
%6 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } poison, <vscale x 16 x i8> %.unpack, 0
626+
%7 = call i64 @llvm.vscale.i64()
627+
%8 = shl i64 %7, 4
628+
%.elt7 = getelementptr inbounds i8, ptr %ref.tmp, i64 %8
629+
%.unpack8 = load <vscale x 16 x i8>, ptr %.elt7, align 16
630+
%9 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %6, <vscale x 16 x i8> %.unpack8, 1
631+
%10 = call i64 @llvm.vscale.i64()
632+
%11 = shl i64 %10, 5
633+
%.elt9 = getelementptr inbounds i8, ptr %ref.tmp, i64 %11
634+
%.unpack10 = load <vscale x 16 x i8>, ptr %.elt9, align 16
635+
%12 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %9, <vscale x 16 x i8> %.unpack10, 2
636+
%13 = call i64 @llvm.vscale.i64()
637+
%14 = mul i64 %13, 48
638+
%.elt11 = getelementptr inbounds i8, ptr %ref.tmp, i64 %14
639+
%.unpack12 = load <vscale x 16 x i8>, ptr %.elt11, align 16
640+
%15 = insertvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %12, <vscale x 16 x i8> %.unpack12, 3
641+
call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull %ref.tmp)
642+
ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %15
643+
}

0 commit comments

Comments
 (0)