@@ -275,49 +275,49 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q
275
275
; CHECK: vector.body:
276
276
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
277
277
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
278
+ ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]]
279
+ ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP11]], i64 4
280
+ ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
281
+ ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP12]], i64 8
282
+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
283
+ ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[TMP15]], i64 12
278
284
; CHECK-NEXT: [[OFFSET_IDX6:%.*]] = shl i64 [[INDEX]], 2
285
+ ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX6]]
286
+ ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP16]], i64 4
287
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
288
+ ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 8
289
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
290
+ ; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[TMP8]], i64 12
279
291
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
280
292
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
281
293
; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
282
294
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT12]]
283
295
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
284
296
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
285
297
; CHECK: pred.store.if:
286
- ; CHECK-NEXT: [[NEXT_GEP :%.*]] = getelementptr i8, ptr [[P:%.* ]], i64 [[OFFSET_IDX ]]
287
- ; CHECK-NEXT: [[NEXT_GEP7 :%.*]] = getelementptr i8, ptr [[Q:%.* ]], i64 [[OFFSET_IDX6 ]]
298
+ ; CHECK-NEXT: [[NEXT_GEP7 :%.*]] = getelementptr i8, ptr [[Q ]], i64 [[OFFSET_IDX6 ]]
299
+ ; CHECK-NEXT: [[NEXT_GEP :%.*]] = getelementptr i8, ptr [[P ]], i64 [[OFFSET_IDX ]]
288
300
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[NEXT_GEP7]], align 16
289
301
; CHECK-NEXT: store i32 [[TMP5]], ptr [[NEXT_GEP]], align 16
290
302
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
291
303
; CHECK: pred.store.continue:
292
304
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
293
305
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
294
306
; CHECK: pred.store.if11:
295
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
296
- ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
297
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
298
- ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4
299
307
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16
300
308
; CHECK-NEXT: store i32 [[TMP9]], ptr [[NEXT_GEP3]], align 16
301
309
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
302
310
; CHECK: pred.store.continue12:
303
311
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
304
312
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
305
313
; CHECK: pred.store.if13:
306
- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
307
- ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP11]], i64 8
308
- ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
309
- ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP12]], i64 8
310
314
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16
311
315
; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP4]], align 16
312
316
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
313
317
; CHECK: pred.store.continue14:
314
318
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
315
319
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
316
320
; CHECK: pred.store.if15:
317
- ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]]
318
- ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[TMP15]], i64 12
319
- ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]]
320
- ; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[TMP16]], i64 12
321
321
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP10]], align 16
322
322
; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP5]], align 16
323
323
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
@@ -453,16 +453,28 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
453
453
; CHECK: vector.body:
454
454
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
455
455
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
456
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]]
457
+ ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP7]], i64 2
458
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
459
+ ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4
460
+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
461
+ ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP13]], i64 6
456
462
; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = shl i64 [[INDEX]], 2
463
+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX5]]
464
+ ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4
465
+ ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
466
+ ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP19]], i64 8
467
+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
468
+ ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP20]], i64 12
457
469
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
458
470
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
459
471
; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
460
472
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], splat (i64 257)
461
473
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
462
474
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
463
475
; CHECK: pred.store.if:
464
- ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST:%.* ]], i64 [[OFFSET_IDX5]]
465
- ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.* ]], i64 [[OFFSET_IDX]]
476
+ ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
477
+ ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
466
478
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[NEXT_GEP]], align 2
467
479
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
468
480
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7
@@ -472,10 +484,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
472
484
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
473
485
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
474
486
; CHECK: pred.store.if9:
475
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
476
- ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4
477
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
478
- ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 2
479
487
; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[NEXT_GEP2]], align 2
480
488
; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32
481
489
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7
@@ -485,10 +493,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
485
493
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2
486
494
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
487
495
; CHECK: pred.store.if11:
488
- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
489
- ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8
490
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
491
- ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4
492
496
; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[NEXT_GEP3]], align 2
493
497
; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32
494
498
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7
@@ -498,10 +502,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
498
502
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3
499
503
; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE15]]
500
504
; CHECK: pred.store.if13:
501
- ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]]
502
- ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12
503
- ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
504
- ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP20]], i64 6
505
505
; CHECK-NEXT: [[TMP21:%.*]] = load i16, ptr [[NEXT_GEP4]], align 2
506
506
; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32
507
507
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7
0 commit comments