@@ -155,14 +155,24 @@ bb:
155
155
ret i32 %load
156
156
}
157
157
158
-
158
+ ; Test gep of index select unfolding on an alloca that is splittable, but not
159
+ ; promotable. The allocas here will be optimized away by subsequent passes.
159
160
define i32 @test_select_idx_memcpy (i1 %c , ptr %p ) {
160
161
; CHECK-LABEL: @test_select_idx_memcpy(
161
- ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [20 x i64], align 8
162
- ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA]], ptr [[P:%.*]], i64 160, i1 false)
162
+ ; CHECK-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca [4 x i8], align 8
163
+ ; CHECK-NEXT: [[ALLOCA_SROA_2:%.*]] = alloca [20 x i8], align 4
164
+ ; CHECK-NEXT: [[ALLOCA_SROA_22:%.*]] = alloca [4 x i8], align 8
165
+ ; CHECK-NEXT: [[ALLOCA_SROA_3:%.*]] = alloca [132 x i8], align 4
166
+ ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ALLOCA_SROA_0]], ptr align 1 [[P:%.*]], i64 4, i1 false)
167
+ ; CHECK-NEXT: [[ALLOCA_SROA_2_0_P_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4
168
+ ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ALLOCA_SROA_2]], ptr align 1 [[ALLOCA_SROA_2_0_P_SROA_IDX]], i64 20, i1 false)
169
+ ; CHECK-NEXT: [[ALLOCA_SROA_22_0_P_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 24
170
+ ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ALLOCA_SROA_22]], ptr align 1 [[ALLOCA_SROA_22_0_P_SROA_IDX]], i64 4, i1 false)
171
+ ; CHECK-NEXT: [[ALLOCA_SROA_3_0_P_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
172
+ ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ALLOCA_SROA_3]], ptr align 1 [[ALLOCA_SROA_3_0_P_SROA_IDX]], i64 132, i1 false)
163
173
; CHECK-NEXT: [[IDX:%.*]] = select i1 [[C:%.*]], i64 24, i64 0
164
- ; CHECK-NEXT: [[GEP :%.*]] = getelementptr inbounds i8 , ptr [[ALLOCA ]], i64 [[IDX ]]
165
- ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[GEP ]], align 4
174
+ ; CHECK-NEXT: [[IDX_SROA_SEL :%.*]] = select i1 [[C]] , ptr [[ALLOCA_SROA_22 ]], ptr [[ALLOCA_SROA_0 ]]
175
+ ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[IDX_SROA_SEL ]], align 4
166
176
; CHECK-NEXT: ret i32 [[RES]]
167
177
;
168
178
%alloca = alloca [20 x i64 ], align 8
@@ -173,16 +183,13 @@ define i32 @test_select_idx_memcpy(i1 %c, ptr %p) {
173
183
ret i32 %res
174
184
}
175
185
186
+ ; Test gep of index select unfolding on an alloca that is splittable and
187
+ ; promotable.
176
188
define i32 @test_select_idx_mem2reg (i1 %c ) {
177
189
; CHECK-LABEL: @test_select_idx_mem2reg(
178
- ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [20 x i64], align 8
179
- ; CHECK-NEXT: store i32 1, ptr [[ALLOCA]], align 4
180
- ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 24
181
- ; CHECK-NEXT: store i32 2, ptr [[GEP1]], align 4
182
190
; CHECK-NEXT: [[IDX:%.*]] = select i1 [[C:%.*]], i64 24, i64 0
183
- ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 [[IDX]]
184
- ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[GEP2]], align 4
185
- ; CHECK-NEXT: ret i32 [[RES]]
191
+ ; CHECK-NEXT: [[RES_SROA_SPECULATED:%.*]] = select i1 [[C]], i32 2, i32 1
192
+ ; CHECK-NEXT: ret i32 [[RES_SROA_SPECULATED]]
186
193
;
187
194
%alloca = alloca [20 x i64 ], align 8
188
195
store i32 1 , ptr %alloca
@@ -194,6 +201,9 @@ define i32 @test_select_idx_mem2reg(i1 %c) {
194
201
ret i32 %res
195
202
}
196
203
204
+ ; Test gep of index select unfolding on an alloca that escaped, and as such
205
+ ; is not splittable or promotable.
206
+ ; FIXME: Ideally, no transform would take place in this case.
197
207
define i32 @test_select_idx_escaped (i1 %c , ptr %p ) {
198
208
; CHECK-LABEL: @test_select_idx_escaped(
199
209
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [20 x i64], align 8
@@ -202,8 +212,10 @@ define i32 @test_select_idx_escaped(i1 %c, ptr %p) {
202
212
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 24
203
213
; CHECK-NEXT: store i32 2, ptr [[GEP1]], align 4
204
214
; CHECK-NEXT: [[IDX:%.*]] = select i1 [[C:%.*]], i64 24, i64 0
205
- ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 [[IDX]]
206
- ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[GEP2]], align 4
215
+ ; CHECK-NEXT: [[DOTSROA_GEP:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 24
216
+ ; CHECK-NEXT: [[DOTSROA_GEP1:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 0
217
+ ; CHECK-NEXT: [[IDX_SROA_SEL:%.*]] = select i1 [[C]], ptr [[DOTSROA_GEP]], ptr [[DOTSROA_GEP1]]
218
+ ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[IDX_SROA_SEL]], align 4
207
219
; CHECK-NEXT: ret i32 [[RES]]
208
220
;
209
221
%alloca = alloca [20 x i64 ], align 8
@@ -217,6 +229,38 @@ define i32 @test_select_idx_escaped(i1 %c, ptr %p) {
217
229
ret i32 %res
218
230
}
219
231
232
+ ; FIXME: Should we allow recursive select unfolding if all the leaves are
233
+ ; constants?
234
+ define i32 @test_select_idx_nested (i1 %c , i1 %c2 ) {
235
+ ; CHECK-LABEL: @test_select_idx_nested(
236
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [20 x i64], align 8
237
+ ; CHECK-NEXT: store i32 1, ptr [[ALLOCA]], align 4
238
+ ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 8
239
+ ; CHECK-NEXT: store i32 2, ptr [[GEP1]], align 4
240
+ ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 24
241
+ ; CHECK-NEXT: store i32 3, ptr [[GEP2]], align 4
242
+ ; CHECK-NEXT: [[IDX1:%.*]] = select i1 [[C:%.*]], i64 24, i64 0
243
+ ; CHECK-NEXT: [[IDX2:%.*]] = select i1 [[C2:%.*]], i64 [[IDX1]], i64 8
244
+ ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[ALLOCA]], i64 [[IDX2]]
245
+ ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[GEP3]], align 4
246
+ ; CHECK-NEXT: ret i32 [[RES]]
247
+ ;
248
+ %alloca = alloca [20 x i64 ], align 8
249
+ store i32 1 , ptr %alloca
250
+ %gep1 = getelementptr inbounds i8 , ptr %alloca , i64 8
251
+ store i32 2 , ptr %gep1
252
+ %gep2 = getelementptr inbounds i8 , ptr %alloca , i64 24
253
+ store i32 3 , ptr %gep2
254
+ %idx1 = select i1 %c , i64 24 , i64 0
255
+ %idx2 = select i1 %c2 , i64 %idx1 , i64 8
256
+ %gep3 = getelementptr inbounds i8 , ptr %alloca , i64 %idx2
257
+ %res = load i32 , ptr %gep3 , align 4
258
+ ret i32 %res
259
+ }
260
+
261
+ ; The following cases involve non-constant indices and should not be
262
+ ; transformed.
263
+
220
264
define i32 @test_select_idx_not_constant1 (i1 %c , ptr %p , i64 %arg ) {
221
265
; CHECK-LABEL: @test_select_idx_not_constant1(
222
266
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [20 x i64], align 8
0 commit comments