Skip to content

Commit 8b3e895

Browse files
committed
[GVN] Tests for load-store forwaring of scalable store to fixed load
1 parent c0861e9 commit 8b3e895

File tree

2 files changed

+256
-0
lines changed

2 files changed

+256
-0
lines changed

llvm/test/Transforms/GVN/vscale.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,3 +641,131 @@ entry:
641641
call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull %ref.tmp)
642642
ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %15
643643
}
644+
645+
define <vscale x 4 x float> @scalable_store_to_fixed_load(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
646+
; CHECK-LABEL: @scalable_store_to_fixed_load(
647+
; CHECK-NEXT: entry:
648+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
649+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
650+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
651+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
652+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
653+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
654+
;
655+
entry:
656+
%retval = alloca { <16 x float> }
657+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
658+
store <vscale x 4 x float> %0, ptr %retval
659+
%1 = load <16 x float>, ptr %retval
660+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
661+
ret <vscale x 4 x float> %cast.scalable
662+
}
663+
664+
; Here, only the lower bound for the vscale is known, but this is enough to allow a forward to a load to 16 elements.
665+
define <vscale x 4 x float> @scalable_store_to_fixed_load_only_lower_bound(<vscale x 4 x float> %a) vscale_range(4) {
666+
; CHECK-LABEL: @scalable_store_to_fixed_load_only_lower_bound(
667+
; CHECK-NEXT: entry:
668+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <vscale x 4 x float> }, align 16
669+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[RETVAL]], align 16
670+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
671+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
672+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
673+
;
674+
entry:
675+
%retval = alloca { <vscale x 4 x float> }
676+
store <vscale x 4 x float> %a, ptr %retval
677+
%1 = load <16 x float>, ptr %retval
678+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
679+
ret <vscale x 4 x float> %cast.scalable
680+
}
681+
682+
define <vscale x 4 x float> @scalable_store_to_fixed_load_with_offset(<vscale x 4 x float> %a) vscale_range(4,4) {
683+
; CHECK-LABEL: @scalable_store_to_fixed_load_with_offset(
684+
; CHECK-NEXT: entry:
685+
; CHECK-NEXT: [[PTR:%.*]] = alloca { <32 x float> }, align 128
686+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
687+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
688+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[GEP]], align 64
689+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
690+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
691+
;
692+
entry:
693+
%ptr = alloca { <32 x float> }
694+
store <vscale x 4 x float> %a, ptr %ptr
695+
%gep = getelementptr inbounds i8, ptr %ptr, i64 8
696+
%1 = load <16 x float>, ptr %gep
697+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
698+
ret <vscale x 4 x float> %cast.scalable
699+
}
700+
701+
define <vscale x 4 x float> @scalable_store_to_fixed_load_unknown_vscale(<vscale x 4 x float> %.coerce) {
702+
; CHECK-LABEL: @scalable_store_to_fixed_load_unknown_vscale(
703+
; CHECK-NEXT: entry:
704+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
705+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
706+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
707+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
708+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
709+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
710+
;
711+
entry:
712+
%retval = alloca { <16 x float> }
713+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
714+
store <vscale x 4 x float> %0, ptr %retval
715+
%1 = load <16 x float>, ptr %retval
716+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
717+
ret <vscale x 4 x float> %cast.scalable
718+
}
719+
720+
define <vscale x 4 x float> @scalable_store_to_fixed_load_size_missmatch(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
721+
; CHECK-LABEL: @scalable_store_to_fixed_load_size_missmatch(
722+
; CHECK-NEXT: entry:
723+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <32 x float> }, align 128
724+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
725+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
726+
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, ptr [[RETVAL]], align 128
727+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> [[TMP1]], i64 0)
728+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
729+
;
730+
entry:
731+
%retval = alloca { <32 x float> }
732+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
733+
store <vscale x 4 x float> %0, ptr %retval
734+
%1 = load <32 x float>, ptr %retval
735+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> %1, i64 0)
736+
ret <vscale x 4 x float> %cast.scalable
737+
}
738+
739+
define <vscale x 4 x i32> @scalable_store_to_fixed_load_different_types(<vscale x 4 x float> %a) vscale_range(4,4) {
740+
; CHECK-LABEL: @scalable_store_to_fixed_load_different_types(
741+
; CHECK-NEXT: entry:
742+
; CHECK-NEXT: [[PTR:%.*]] = alloca { <16 x float> }, align 64
743+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
744+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[PTR]], align 64
745+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP0]], i64 0)
746+
; CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
747+
;
748+
entry:
749+
%ptr = alloca { <16 x float> }
750+
store <vscale x 4 x float> %a, ptr %ptr
751+
%1 = load <16 x i32>, ptr %ptr
752+
%cast.scalable = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> %1, i64 0)
753+
ret <vscale x 4 x i32> %cast.scalable
754+
}
755+
756+
; This function does not have a fixed vscale, but the loaded vector is still known
757+
; to be smaller or equal in size compared to the stored vector.
758+
define <4 x float> @scalable_store_to_small_fixed_load(<vscale x 4 x float> %a) {
759+
; CHECK-LABEL: @scalable_store_to_small_fixed_load(
760+
; CHECK-NEXT: entry:
761+
; CHECK-NEXT: [[PTR:%.*]] = alloca <vscale x 4 x float>, align 16
762+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
763+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16
764+
; CHECK-NEXT: ret <4 x float> [[TMP0]]
765+
;
766+
entry:
767+
%ptr = alloca <vscale x 4 x float>
768+
store <vscale x 4 x float> %a, ptr %ptr
769+
%1 = load <4 x float>, ptr %ptr
770+
ret <4 x float> %1
771+
}

llvm/test/Transforms/NewGVN/vscale.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,3 +646,131 @@ entry:
646646
call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull %ref.tmp)
647647
ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %15
648648
}
649+
650+
define <vscale x 4 x float> @scalable_store_to_fixed_load(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
651+
; CHECK-LABEL: @scalable_store_to_fixed_load(
652+
; CHECK-NEXT: entry:
653+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
654+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
655+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
656+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
657+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
658+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
659+
;
660+
entry:
661+
%retval = alloca { <16 x float> }
662+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
663+
store <vscale x 4 x float> %0, ptr %retval
664+
%1 = load <16 x float>, ptr %retval
665+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
666+
ret <vscale x 4 x float> %cast.scalable
667+
}
668+
669+
; Here, only the lower bound for the vscale is known, but this is enough to allow a forward to a load to 16 elements.
670+
define <vscale x 4 x float> @scalable_store_to_fixed_load_only_lower_bound(<vscale x 4 x float> %a) vscale_range(4) {
671+
; CHECK-LABEL: @scalable_store_to_fixed_load_only_lower_bound(
672+
; CHECK-NEXT: entry:
673+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <vscale x 4 x float> }, align 16
674+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[RETVAL]], align 16
675+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
676+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
677+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
678+
;
679+
entry:
680+
%retval = alloca { <vscale x 4 x float> }
681+
store <vscale x 4 x float> %a, ptr %retval
682+
%1 = load <16 x float>, ptr %retval
683+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
684+
ret <vscale x 4 x float> %cast.scalable
685+
}
686+
687+
define <vscale x 4 x float> @scalable_store_to_fixed_load_with_offset(<vscale x 4 x float> %a) vscale_range(4,4) {
688+
; CHECK-LABEL: @scalable_store_to_fixed_load_with_offset(
689+
; CHECK-NEXT: entry:
690+
; CHECK-NEXT: [[PTR:%.*]] = alloca { <32 x float> }, align 128
691+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
692+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
693+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[GEP]], align 64
694+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
695+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
696+
;
697+
entry:
698+
%ptr = alloca { <32 x float> }
699+
store <vscale x 4 x float> %a, ptr %ptr
700+
%gep = getelementptr inbounds i8, ptr %ptr, i64 8
701+
%1 = load <16 x float>, ptr %gep
702+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
703+
ret <vscale x 4 x float> %cast.scalable
704+
}
705+
706+
define <vscale x 4 x float> @scalable_store_to_fixed_load_unknown_vscale(<vscale x 4 x float> %.coerce) {
707+
; CHECK-LABEL: @scalable_store_to_fixed_load_unknown_vscale(
708+
; CHECK-NEXT: entry:
709+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
710+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
711+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
712+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
713+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
714+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
715+
;
716+
entry:
717+
%retval = alloca { <16 x float> }
718+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
719+
store <vscale x 4 x float> %0, ptr %retval
720+
%1 = load <16 x float>, ptr %retval
721+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
722+
ret <vscale x 4 x float> %cast.scalable
723+
}
724+
725+
define <vscale x 4 x float> @scalable_store_to_fixed_load_size_missmatch(<vscale x 4 x float> %.coerce) vscale_range(4,4) {
726+
; CHECK-LABEL: @scalable_store_to_fixed_load_size_missmatch(
727+
; CHECK-NEXT: entry:
728+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <32 x float> }, align 128
729+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
730+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
731+
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, ptr [[RETVAL]], align 128
732+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> [[TMP1]], i64 0)
733+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
734+
;
735+
entry:
736+
%retval = alloca { <32 x float> }
737+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
738+
store <vscale x 4 x float> %0, ptr %retval
739+
%1 = load <32 x float>, ptr %retval
740+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> %1, i64 0)
741+
ret <vscale x 4 x float> %cast.scalable
742+
}
743+
744+
define <vscale x 4 x i32> @scalable_store_to_fixed_load_different_types(<vscale x 4 x float> %a) vscale_range(4,4) {
745+
; CHECK-LABEL: @scalable_store_to_fixed_load_different_types(
746+
; CHECK-NEXT: entry:
747+
; CHECK-NEXT: [[PTR:%.*]] = alloca { <16 x float> }, align 64
748+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
749+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[PTR]], align 64
750+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP0]], i64 0)
751+
; CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
752+
;
753+
entry:
754+
%ptr = alloca { <16 x float> }
755+
store <vscale x 4 x float> %a, ptr %ptr
756+
%1 = load <16 x i32>, ptr %ptr
757+
%cast.scalable = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> %1, i64 0)
758+
ret <vscale x 4 x i32> %cast.scalable
759+
}
760+
761+
; This function does not have a fixed vscale, but the loaded vector is still known
762+
; to be smaller or equal in size compared to the stored vector.
763+
define <4 x float> @scalable_store_to_small_fixed_load(<vscale x 4 x float> %a) {
764+
; CHECK-LABEL: @scalable_store_to_small_fixed_load(
765+
; CHECK-NEXT: entry:
766+
; CHECK-NEXT: [[PTR:%.*]] = alloca <vscale x 4 x float>, align 16
767+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
768+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16
769+
; CHECK-NEXT: ret <4 x float> [[TMP0]]
770+
;
771+
entry:
772+
%ptr = alloca <vscale x 4 x float>
773+
store <vscale x 4 x float> %a, ptr %ptr
774+
%1 = load <4 x float>, ptr %ptr
775+
ret <4 x float> %1
776+
}

0 commit comments

Comments
 (0)