Skip to content

Commit a5484df

Browse files
committed
[GVN] Tests for load-store forwaring of scalable store to fixed load
1 parent c0861e9 commit a5484df

File tree

1 file changed

+116
-0
lines changed

1 file changed

+116
-0
lines changed

llvm/test/Transforms/GVN/vscale.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,3 +641,119 @@ entry:
641641
call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull %ref.tmp)
642642
ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %15
643643
}
644+
645+
define <vscale x 4 x float> @scalable_store_to_fixed_load(<vscale x 4 x float> %.coerce) #1 {
646+
; CHECK-LABEL: @scalable_store_to_fixed_load(
647+
; CHECK-NEXT: entry:
648+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
649+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
650+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
651+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
652+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
653+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
654+
;
655+
entry:
656+
%retval = alloca { <16 x float> }
657+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
658+
store <vscale x 4 x float> %0, ptr %retval
659+
%1 = load <16 x float>, ptr %retval
660+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
661+
ret <vscale x 4 x float> %cast.scalable
662+
}
663+
664+
define <vscale x 4 x float> @scalable_store_to_fixed_load_with_offset(<vscale x 4 x float> %a) #1 {
665+
; CHECK-LABEL: @scalable_store_to_fixed_load_with_offset(
666+
; CHECK-NEXT: entry:
667+
; CHECK-NEXT: [[PTR:%.*]] = alloca { <32 x float> }, align 128
668+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
669+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
670+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[GEP]], align 64
671+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP0]], i64 0)
672+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
673+
;
674+
entry:
675+
%ptr = alloca { <32 x float> }
676+
store <vscale x 4 x float> %a, ptr %ptr
677+
%gep = getelementptr inbounds i8, ptr %ptr, i64 8
678+
%1 = load <16 x float>, ptr %gep
679+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
680+
ret <vscale x 4 x float> %cast.scalable
681+
}
682+
683+
define <vscale x 4 x float> @scalable_store_to_fixed_load_unknown_vscale(<vscale x 4 x float> %.coerce) {
684+
; CHECK-LABEL: @scalable_store_to_fixed_load_unknown_vscale(
685+
; CHECK-NEXT: entry:
686+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64
687+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
688+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
689+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64
690+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> [[TMP1]], i64 0)
691+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
692+
;
693+
entry:
694+
%retval = alloca { <16 x float> }
695+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
696+
store <vscale x 4 x float> %0, ptr %retval
697+
%1 = load <16 x float>, ptr %retval
698+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float> poison, <16 x float> %1, i64 0)
699+
ret <vscale x 4 x float> %cast.scalable
700+
}
701+
702+
define <vscale x 4 x float> @scalable_store_to_fixed_load_size_missmatch(<vscale x 4 x float> %.coerce) #1 {
703+
; CHECK-LABEL: @scalable_store_to_fixed_load_size_missmatch(
704+
; CHECK-NEXT: entry:
705+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <32 x float> }, align 128
706+
; CHECK-NEXT: [[TMP0:%.*]] = fadd <vscale x 4 x float> [[DOTCOERCE:%.*]], [[DOTCOERCE]]
707+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP0]], ptr [[RETVAL]], align 16
708+
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, ptr [[RETVAL]], align 128
709+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> [[TMP1]], i64 0)
710+
; CHECK-NEXT: ret <vscale x 4 x float> [[CAST_SCALABLE]]
711+
;
712+
entry:
713+
%retval = alloca { <32 x float> }
714+
%0 = fadd <vscale x 4 x float> %.coerce, %.coerce
715+
store <vscale x 4 x float> %0, ptr %retval
716+
%1 = load <32 x float>, ptr %retval
717+
%cast.scalable = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float> poison, <32 x float> %1, i64 0)
718+
ret <vscale x 4 x float> %cast.scalable
719+
}
720+
721+
define <vscale x 4 x i32> @scalable_store_to_fixed_load_different_types(<vscale x 4 x float> %a) #1 {
722+
; CHECK-LABEL: @scalable_store_to_fixed_load_different_types(
723+
; CHECK-NEXT: entry:
724+
; CHECK-NEXT: [[PTR:%.*]] = alloca { <16 x float> }, align 64
725+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
726+
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[PTR]], align 64
727+
; CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TMP0]], i64 0)
728+
; CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
729+
;
730+
entry:
731+
%ptr = alloca { <16 x float> }
732+
store <vscale x 4 x float> %a, ptr %ptr
733+
%1 = load <16 x i32>, ptr %ptr
734+
%cast.scalable = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> %1, i64 0)
735+
ret <vscale x 4 x i32> %cast.scalable
736+
}
737+
738+
; This function does not have a fixed vscale, but the loaded vector is still known
739+
; to be smaller or equal in size compared to the stored vector.
740+
define <4 x float> @scalable_store_to_small_fixed_load(<vscale x 4 x float> %a) {
741+
; CHECK-LABEL: @scalable_store_to_small_fixed_load(
742+
; CHECK-NEXT: entry:
743+
; CHECK-NEXT: [[PTR:%.*]] = alloca <vscale x 4 x float>, align 16
744+
; CHECK-NEXT: store <vscale x 4 x float> [[A:%.*]], ptr [[PTR]], align 16
745+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16
746+
; CHECK-NEXT: ret <4 x float> [[TMP0]]
747+
;
748+
entry:
749+
%ptr = alloca <vscale x 4 x float>
750+
store <vscale x 4 x float> %a, ptr %ptr
751+
%1 = load <4 x float>, ptr %ptr
752+
ret <4 x float> %1
753+
}
754+
755+
declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v16f32(<vscale x 4 x float>, <16 x float>, i64 immarg)
756+
declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>, <16 x i32>, i64 immarg)
757+
declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v32f32(<vscale x 4 x float>, <32 x float>, i64 immarg)
758+
759+
attributes #1 = { vscale_range(4,4) }

0 commit comments

Comments
 (0)