You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[InstCombine] Canonicalize scalable GEPs to use llvm.vscale intrinsic (#90569)
Canonicalize getelementptr instructions for scalable vector types into
ptradd representation with an explicit llvm.vscale call. This
representation has better support in BasicAA, which can reason about
llvm.vscale, but not plain scalable GEPs.
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
38
-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
39
-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
45
-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 8 x bfloat>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
46
-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
47
-
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
53
+
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
96
-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
97
-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
98
-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
101
+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
104
-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
105
-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
106
-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
107
-
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
112
+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
113
+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
117
-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
118
-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
119
-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
128
+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
125
-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
126
-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
127
-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
128
-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
139
+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
140
+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
138
-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
139
-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
140
-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
155
+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
146
-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 4 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
147
-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
148
-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
149
-
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
166
+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
167
+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
159
-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
160
-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
161
-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
182
+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
167
-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
168
-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
169
-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
170
-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
193
+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
194
+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
58
-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
59
-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
60
-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
63
+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
66
-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
67
-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
68
-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
69
-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
74
+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
75
+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
79
-
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
80
-
// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
81
-
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
90
+
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
87
-
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
88
-
// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
89
-
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
90
-
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
101
+
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
102
+
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
0 commit comments