Skip to content

[InstCombine] Remove some of the complexity-based canonicalization #91185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
Expand All @@ -54,7 +54,7 @@ svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
Expand Down
44 changes: 22 additions & 22 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c

Large diffs are not rendered by default.

26 changes: 13 additions & 13 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base) MODE_ATTR
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -152,7 +152,7 @@ svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -168,7 +168,7 @@ svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum) M
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -179,7 +179,7 @@ svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum) M
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -195,7 +195,7 @@ svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum) M
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -206,7 +206,7 @@ svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum) M
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -222,7 +222,7 @@ svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum) M
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -233,7 +233,7 @@ svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum) M
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
Expand All @@ -249,7 +249,7 @@ svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -260,7 +260,7 @@ svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -276,7 +276,7 @@ svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -287,7 +287,7 @@ svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -298,7 +298,7 @@ svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
return svld1sb_vnum_u64(pg, base, vnum);
}

#ifndef __ARM_FEATURE_SME
#ifndef __ARM_FEATURE_SME

// CHECK-LABEL: @test_svld1sb_gather_u32base_s32(
// CHECK-NEXT: entry:
Expand Down
18 changes: 9 additions & 9 deletions clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base) MODE_ATTR
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -114,7 +114,7 @@ svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base) MODE_ATTR
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -130,7 +130,7 @@ svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -141,7 +141,7 @@ svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -157,7 +157,7 @@ svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -168,7 +168,7 @@ svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
Expand All @@ -184,7 +184,7 @@ svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -195,7 +195,7 @@ svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
Expand All @@ -206,7 +206,7 @@ svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
return svld1sh_vnum_u64(pg, base, vnum);
}

#ifndef __ARM_FEATURE_SME
#ifndef __ARM_FEATURE_SME

// CHECK-LABEL: @test_svld1sh_gather_u32base_s32(
// CHECK-NEXT: entry:
Expand Down
Loading
Loading