Skip to content

Commit 74aa1ab

Browse files
authored
[InstCombine] Canonicalize scalable GEPs to use llvm.vscale intrinsic (#90569)
Canonicalize getelementptr instructions for scalable vector types into ptradd representation with an explicit llvm.vscale call. This representation has better support in BasicAA, which can reason about llvm.vscale, but not plain scalable GEPs.
1 parent 3684a38 commit 74aa1ab

29 files changed

+3562
-2193
lines changed

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,22 @@ svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base)
3535
// CHECK-LABEL: @test_svld1_vnum_bf16(
3636
// CHECK-NEXT: entry:
3737
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
38-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
39-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
40-
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
38+
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
39+
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
40+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
41+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
42+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
43+
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
4144
//
4245
// CPP-CHECK-LABEL: @_Z20test_svld1_vnum_bf16u10__SVBool_tPKu6__bf16l(
4346
// CPP-CHECK-NEXT: entry:
4447
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
45-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
46-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
47-
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
48+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
49+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4
50+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
51+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
52+
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
53+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
4854
//
4955
svbfloat16_t test_svld1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
5056
{

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c

Lines changed: 132 additions & 66 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c

Lines changed: 84 additions & 48 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -93,18 +93,24 @@ svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base)
9393
// CHECK-LABEL: @test_svld1sh_vnum_s32(
9494
// CHECK-NEXT: entry:
9595
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
96-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
97-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
98-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
99-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
96+
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
97+
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
98+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
99+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
100+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
101+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
102+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
100103
//
101104
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_s32u10__SVBool_tPKsl(
102105
// CPP-CHECK-NEXT: entry:
103106
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
104-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
105-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
106-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
107-
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
107+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
108+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
109+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
110+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
111+
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
112+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
113+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
108114
//
109115
svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
110116
{
@@ -114,18 +120,24 @@ svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
114120
// CHECK-LABEL: @test_svld1sh_vnum_s64(
115121
// CHECK-NEXT: entry:
116122
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
117-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
118-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
119-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
120-
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
123+
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
124+
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
125+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
126+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
127+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
128+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
129+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
121130
//
122131
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_s64u10__SVBool_tPKsl(
123132
// CPP-CHECK-NEXT: entry:
124133
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
125-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
126-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
127-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
128-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
134+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
135+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
136+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
137+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
138+
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
139+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
140+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
129141
//
130142
svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
131143
{
@@ -135,18 +147,24 @@ svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
135147
// CHECK-LABEL: @test_svld1sh_vnum_u32(
136148
// CHECK-NEXT: entry:
137149
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
138-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
139-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
140-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
141-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
150+
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
151+
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
152+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
153+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
154+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
155+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
156+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
142157
//
143158
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_u32u10__SVBool_tPKsl(
144159
// CPP-CHECK-NEXT: entry:
145160
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
146-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
147-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
148-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
149-
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
161+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
162+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
163+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
164+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
165+
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
166+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
167+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
150168
//
151169
svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
152170
{
@@ -156,18 +174,24 @@ svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
156174
// CHECK-LABEL: @test_svld1sh_vnum_u64(
157175
// CHECK-NEXT: entry:
158176
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
159-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
160-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
161-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
162-
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
177+
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
178+
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
179+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
180+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
181+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
182+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
183+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
163184
//
164185
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_u64u10__SVBool_tPKsl(
165186
// CPP-CHECK-NEXT: entry:
166187
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
167-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
168-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
169-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
170-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
188+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
189+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
190+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
191+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
192+
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
193+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
194+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
171195
//
172196
svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
173197
{

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -55,18 +55,24 @@ svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base)
5555
// CHECK-LABEL: @test_svld1sw_vnum_s64(
5656
// CHECK-NEXT: entry:
5757
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
58-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
59-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
60-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
61-
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
58+
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
59+
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
60+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
61+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
62+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
63+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
64+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
6265
//
6366
// CPP-CHECK-LABEL: @_Z21test_svld1sw_vnum_s64u10__SVBool_tPKil(
6467
// CPP-CHECK-NEXT: entry:
6568
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
66-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
67-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
68-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
69-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
69+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
70+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
71+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
72+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
73+
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
74+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
75+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
7076
//
7177
svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
7278
{
@@ -76,18 +82,24 @@ svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
7682
// CHECK-LABEL: @test_svld1sw_vnum_u64(
7783
// CHECK-NEXT: entry:
7884
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
79-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
80-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
81-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
82-
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
85+
// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
86+
// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
87+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
88+
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
89+
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
90+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
91+
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
8392
//
8493
// CPP-CHECK-LABEL: @_Z21test_svld1sw_vnum_u64u10__SVBool_tPKil(
8594
// CPP-CHECK-NEXT: entry:
8695
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
87-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
88-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
89-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
90-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
96+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64()
97+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
98+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP2]], [[VNUM:%.*]]
99+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
100+
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
101+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
102+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
91103
//
92104
svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
93105
{

0 commit comments

Comments
 (0)