You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[AArch64] Add quadword gather load/scatter store intrinsics with unscaled vector offset (#71290)
This patch add intrinsics of the form
sv<type>_t svld1q_gather_u64offset_<typ>(svbool_t pg, const <type>_t *base, svuint64_t offs);
void svst1q_scatter_u64offset_<typ>(sbvool_t, <type>_t *base, svuint64_t offst, sv<type>_t data);
as well as their short forms.
ACLE spec: ARM-software/acle#257
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2875
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2881
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2882
+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2891
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2897
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv16i8(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2898
+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2907
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2913
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2914
+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2923
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2929
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8i16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2930
+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2939
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2945
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2946
+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2955
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2961
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4i32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2962
+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2971
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2977
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2978
+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2987
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
2993
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2i64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
2994
+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3003
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8bf16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3009
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8bf16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
3010
+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3019
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8f16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3025
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv8f16(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
3026
+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3035
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4f32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3041
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv4f32(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
3042
+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3051
+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2f64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG:%.*]])
3057
+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.vector.offset.nxv2f64(<vscale x 1 x i1> [[TMP0]], ptr [[BASE:%.*]], <vscale x 2 x i64> [[OFF:%.*]])
3058
+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
0 commit comments