@@ -182,9 +182,204 @@ loop:
182
182
exit:
183
183
ret void
184
184
}
185
+
186
+ define void @geps_feeding_interleave_groups_with_reuse (ptr %arg , i64 %arg1 , ptr %arg2 ) #0 {
187
+ ; CHECK-LABEL: define void @geps_feeding_interleave_groups_with_reuse(
188
+ ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
189
+ ; CHECK-NEXT: [[ENTRY:.*]]:
190
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1
191
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 30
192
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
193
+ ; CHECK: [[VECTOR_SCEVCHECK]]:
194
+ ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8
195
+ ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
196
+ ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
197
+ ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
198
+ ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]]
199
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]]
200
+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]]
201
+ ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
202
+ ; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 12
203
+ ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
204
+ ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
205
+ ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
206
+ ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[MUL_RESULT3]]
207
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]]
208
+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]]
209
+ ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]]
210
+ ; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[ARG2]], i64 4
211
+ ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
212
+ ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0
213
+ ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1
214
+ ; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT7]]
215
+ ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 [[MUL_RESULT7]]
216
+ ; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP5]]
217
+ ; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW8]]
218
+ ; CHECK-NEXT: [[MUL9:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[ARG1]])
219
+ ; CHECK-NEXT: [[MUL_RESULT10:%.*]] = extractvalue { i64, i1 } [[MUL9]], 0
220
+ ; CHECK-NEXT: [[MUL_OVERFLOW11:%.*]] = extractvalue { i64, i1 } [[MUL9]], 1
221
+ ; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT10]]
222
+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[MUL_RESULT10]]
223
+ ; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr [[TMP14]], [[ARG2]]
224
+ ; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW11]]
225
+ ; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP4]], [[TMP8]]
226
+ ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[TMP12]]
227
+ ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP18]], [[TMP16]]
228
+ ; CHECK-NEXT: br i1 [[TMP19]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
229
+ ; CHECK: [[VECTOR_MEMCHECK]]:
230
+ ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4
231
+ ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16
232
+ ; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP21]]
233
+ ; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[ARG1]], 5
234
+ ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 32
235
+ ; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP23]]
236
+ ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP13]]
237
+ ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[ARG]], [[SCEVGEP12]]
238
+ ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
239
+ ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
240
+ ; CHECK: [[VECTOR_PH]]:
241
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2
242
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
243
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
244
+ ; CHECK: [[VECTOR_BODY]]:
245
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
246
+ ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 0
247
+ ; CHECK-NEXT: [[TMP25:%.*]] = shl i64 [[TMP24]], 5
248
+ ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP25]]
249
+ ; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP24]], 4
250
+ ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[TMP27]]
251
+ ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP26]], i32 0
252
+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP29]], align 4
253
+ ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
254
+ ; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
255
+ ; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
256
+ ; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
257
+ ; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
258
+ ; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
259
+ ; CHECK-NEXT: [[STRIDED_VEC19:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
260
+ ; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
261
+ ; CHECK-NEXT: [[TMP30:%.*]] = fadd <2 x float> [[STRIDED_VEC]], [[STRIDED_VEC17]]
262
+ ; CHECK-NEXT: [[TMP31:%.*]] = fmul <2 x float> [[TMP30]], zeroinitializer
263
+ ; CHECK-NEXT: [[TMP32:%.*]] = fadd <2 x float> [[STRIDED_VEC14]], [[STRIDED_VEC18]]
264
+ ; CHECK-NEXT: [[TMP33:%.*]] = fmul <2 x float> [[TMP32]], zeroinitializer
265
+ ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x float> [[STRIDED_VEC15]], [[STRIDED_VEC19]]
266
+ ; CHECK-NEXT: [[TMP35:%.*]] = fmul <2 x float> [[TMP34]], zeroinitializer
267
+ ; CHECK-NEXT: [[TMP36:%.*]] = fadd <2 x float> [[STRIDED_VEC16]], [[STRIDED_VEC20]]
268
+ ; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x float> [[TMP36]], zeroinitializer
269
+ ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP28]], i64 12
270
+ ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr float, ptr [[TMP38]], i32 -3
271
+ ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <2 x float> [[TMP31]], <2 x float> [[TMP33]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
272
+ ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x float> [[TMP35]], <2 x float> [[TMP37]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
273
+ ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x float> [[TMP40]], <4 x float> [[TMP41]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
274
+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP42]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
275
+ ; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP39]], align 4
276
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
277
+ ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
278
+ ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
279
+ ; CHECK: [[MIDDLE_BLOCK]]:
280
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
281
+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
282
+ ; CHECK: [[SCALAR_PH]]:
283
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
284
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
285
+ ; CHECK: [[LOOP]]:
286
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
287
+ ; CHECK-NEXT: [[SHL_IV_5:%.*]] = shl i64 [[IV]], 5
288
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[SHL_IV_5]]
289
+ ; CHECK-NEXT: [[ADD_5:%.*]] = or disjoint i64 [[SHL_IV_5]], 16
290
+ ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[ADD_5]]
291
+ ; CHECK-NEXT: [[SHL_IV_4:%.*]] = shl i64 [[IV]], 4
292
+ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[SHL_IV_4]]
293
+ ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_1]], align 4
294
+ ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_2]], align 4
295
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], [[L_2]]
296
+ ; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 0.000000e+00
297
+ ; CHECK-NEXT: store float [[MUL_1]], ptr [[GEP_3]], align 4
298
+ ; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 4
299
+ ; CHECK-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_4]], align 4
300
+ ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 4
301
+ ; CHECK-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_5]], align 4
302
+ ; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_3]], [[L_4]]
303
+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 0.000000e+00
304
+ ; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 4
305
+ ; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_6]], align 4
306
+ ; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 8
307
+ ; CHECK-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_7]], align 4
308
+ ; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 8
309
+ ; CHECK-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_8]], align 4
310
+ ; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_5]], [[L_6]]
311
+ ; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 0.000000e+00
312
+ ; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 8
313
+ ; CHECK-NEXT: store float [[MUL_3]], ptr [[GEP_9]], align 4
314
+ ; CHECK-NEXT: [[I27:%.*]] = getelementptr i8, ptr [[GEP_1]], i64 12
315
+ ; CHECK-NEXT: [[L_7:%.*]] = load float, ptr [[I27]], align 4
316
+ ; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i8, ptr [[GEP_2]], i64 12
317
+ ; CHECK-NEXT: [[L_8:%.*]] = load float, ptr [[GEP_10]], align 4
318
+ ; CHECK-NEXT: [[ADD_4:%.*]] = fadd float [[L_7]], [[L_8]]
319
+ ; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[ADD_4]], 0.000000e+00
320
+ ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i8, ptr [[GEP_3]], i64 12
321
+ ; CHECK-NEXT: store float [[MUL_4]], ptr [[GEP_11]], align 4
322
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
323
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[ARG1]]
324
+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
325
+ ; CHECK: [[EXIT]]:
326
+ ; CHECK-NEXT: ret void
327
+ ;
328
+ entry:
329
+ br label %loop
330
+
331
+ loop:
332
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
333
+ %shl.iv.5 = shl i64 %iv , 5
334
+ %gep.1 = getelementptr i8 , ptr %arg , i64 %shl.iv.5
335
+ %add.5 = or disjoint i64 %shl.iv.5 , 16
336
+ %gep.2 = getelementptr i8 , ptr %arg , i64 %add.5
337
+ %shl.iv.4 = shl i64 %iv , 4
338
+ %gep.3 = getelementptr i8 , ptr %arg2 , i64 %shl.iv.4
339
+ %l.1 = load float , ptr %gep.1 , align 4
340
+ %l.2 = load float , ptr %gep.2 , align 4
341
+ %add.1 = fadd float %l.1 , %l.2
342
+ %mul.1 = fmul float %add.1 , 0 .000000e+00
343
+ store float %mul.1 , ptr %gep.3 , align 4
344
+ %gep.4 = getelementptr i8 , ptr %gep.1 , i64 4
345
+ %l.3 = load float , ptr %gep.4 , align 4
346
+ %gep.5 = getelementptr i8 , ptr %gep.2 , i64 4
347
+ %l.4 = load float , ptr %gep.5 , align 4
348
+ %add.2 = fadd float %l.3 , %l.4
349
+ %mul.2 = fmul float %add.2 , 0 .000000e+00
350
+ %gep.6 = getelementptr i8 , ptr %gep.3 , i64 4
351
+ store float %mul.2 , ptr %gep.6 , align 4
352
+ %gep.7 = getelementptr i8 , ptr %gep.1 , i64 8
353
+ %l.5 = load float , ptr %gep.7 , align 4
354
+ %gep.8 = getelementptr i8 , ptr %gep.2 , i64 8
355
+ %l.6 = load float , ptr %gep.8 , align 4
356
+ %add.3 = fadd float %l.5 , %l.6
357
+ %mul.3 = fmul float %add.3 , 0 .000000e+00
358
+ %gep.9 = getelementptr i8 , ptr %gep.3 , i64 8
359
+ store float %mul.3 , ptr %gep.9 , align 4
360
+ %i27 = getelementptr i8 , ptr %gep.1 , i64 12
361
+ %l.7 = load float , ptr %i27 , align 4
362
+ %gep.10 = getelementptr i8 , ptr %gep.2 , i64 12
363
+ %l.8 = load float , ptr %gep.10 , align 4
364
+ %add.4 = fadd float %l.7 , %l.8
365
+ %mul.4 = fmul float %add.4 , 0 .000000e+00
366
+ %gep.11 = getelementptr i8 , ptr %gep.3 , i64 12
367
+ store float %mul.4 , ptr %gep.11 , align 4
368
+ %iv.next = add i64 %iv , 1
369
+ %ec = icmp eq i64 %iv , %arg1
370
+ br i1 %ec , label %exit , label %loop
371
+
372
+ exit:
373
+ ret void
374
+ }
375
+
376
+ attributes #0 = { "target-features" ="+sse4.2" }
377
+
185
378
;.
186
379
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
187
380
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
188
381
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
189
382
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
383
+ ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
384
+ ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
190
385
;.
0 commit comments