@@ -310,3 +310,208 @@ loop:
310
310
exit: ; preds = %loop
311
311
ret void
312
312
}
313
+
314
+ ; Check the scenario where we have an unknown Stride, which happens to also be
315
+ ; the loop iteration count. If we speculate Stride==1, it implies that the loop
316
+ ; will iterate no more than a single iteration.
317
+ define void @unknown_stride_equalto_tc (i32 %N , ptr %A , ptr %B , i32 %j ) {
318
+ ; CHECK-LABEL: 'unknown_stride_equalto_tc'
319
+ ; CHECK-NEXT: loop:
320
+ ; CHECK-NEXT: Memory dependences are safe with run-time checks
321
+ ; CHECK-NEXT: Dependences:
322
+ ; CHECK-NEXT: Run-time memory checks:
323
+ ; CHECK-NEXT: Check 0:
324
+ ; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]):
325
+ ; CHECK-NEXT: ptr %A
326
+ ; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
327
+ ; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
328
+ ; CHECK-NEXT: Grouped accesses:
329
+ ; CHECK-NEXT: Group [[GRP5]]:
330
+ ; CHECK-NEXT: (Low: %A High: (4 + %A))
331
+ ; CHECK-NEXT: Member: %A
332
+ ; CHECK-NEXT: Group [[GRP6]]:
333
+ ; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B))))
334
+ ; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
335
+ ; CHECK-EMPTY:
336
+ ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
337
+ ; CHECK-NEXT: SCEV assumptions:
338
+ ; CHECK-NEXT: {%j,+,%N}<%loop> Added Flags: <nssw>
339
+ ; CHECK-EMPTY:
340
+ ; CHECK-NEXT: Expressions re-written:
341
+ ; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
342
+ ; CHECK-NEXT: ((2 * (sext i32 {%j,+,%N}<%loop> to i64))<nsw> + %B)
343
+ ; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
344
+ ;
345
+ entry:
346
+ %cmp = icmp eq i32 %N , 0
347
+ br i1 %cmp , label %exit , label %loop
348
+
349
+ loop:
350
+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
351
+ %mul = mul i32 %iv , %N
352
+ %add = add i32 %mul , %j
353
+ %arrayidx = getelementptr inbounds i16 , ptr %B , i32 %add
354
+ %load = load i16 , ptr %arrayidx
355
+ %sext = sext i16 %load to i32
356
+ store i32 %sext , ptr %A
357
+ %iv.next = add nuw i32 %iv , 1
358
+ %exitcond = icmp eq i32 %iv.next , %N
359
+ br i1 %exitcond , label %exit , label %loop
360
+
361
+ exit:
362
+ ret void
363
+ }
364
+
365
+
366
+ ; Check the scenario where we have an unknown Stride, which happens to also be
367
+ ; the loop iteration count, but the TC is zero-extended from a narrower type.
368
+ define void @unknown_stride_equalto_zext_tc (i16 zeroext %N , ptr %A , ptr %B , i32 %j ) {
369
+ ; CHECK-LABEL: 'unknown_stride_equalto_zext_tc'
370
+ ; CHECK-NEXT: loop:
371
+ ; CHECK-NEXT: Memory dependences are safe with run-time checks
372
+ ; CHECK-NEXT: Dependences:
373
+ ; CHECK-NEXT: Run-time memory checks:
374
+ ; CHECK-NEXT: Check 0:
375
+ ; CHECK-NEXT: Comparing group ([[GRP7:0x[0-9a-f]+]]):
376
+ ; CHECK-NEXT: ptr %A
377
+ ; CHECK-NEXT: Against group ([[GRP8:0x[0-9a-f]+]]):
378
+ ; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
379
+ ; CHECK-NEXT: Grouped accesses:
380
+ ; CHECK-NEXT: Group [[GRP7]]:
381
+ ; CHECK-NEXT: (Low: %A High: (4 + %A))
382
+ ; CHECK-NEXT: Member: %A
383
+ ; CHECK-NEXT: Group [[GRP8]]:
384
+ ; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B))))
385
+ ; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
386
+ ; CHECK-EMPTY:
387
+ ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
388
+ ; CHECK-NEXT: SCEV assumptions:
389
+ ; CHECK-NEXT: {%j,+,(zext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
390
+ ; CHECK-EMPTY:
391
+ ; CHECK-NEXT: Expressions re-written:
392
+ ; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
393
+ ; CHECK-NEXT: ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
394
+ ; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
395
+ ;
396
+ entry:
397
+ %N.ext = zext i16 %N to i32
398
+ %cmp = icmp eq i16 %N , 0
399
+ br i1 %cmp , label %exit , label %loop
400
+
401
+ loop:
402
+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
403
+ %mul = mul nuw i32 %iv , %N.ext
404
+ %add = add i32 %mul , %j
405
+ %arrayidx = getelementptr inbounds i16 , ptr %B , i32 %add
406
+ %load = load i16 , ptr %arrayidx
407
+ %sext = sext i16 %load to i32
408
+ store i32 %sext , ptr %A
409
+ %iv.next = add nuw nsw i32 %iv , 1
410
+ %exitcond = icmp eq i32 %iv.next , %N.ext
411
+ br i1 %exitcond , label %exit , label %loop
412
+
413
+ exit:
414
+ ret void
415
+ }
416
+
417
+ ; Check the scenario where we have an unknown Stride, which happens to also be
418
+ ; the loop iteration count, but the TC is sign-extended from a narrower type.
419
+ define void @unknown_stride_equalto_sext_tc (i16 %N , ptr %A , ptr %B , i32 %j ) {
420
+ ; CHECK-LABEL: 'unknown_stride_equalto_sext_tc'
421
+ ; CHECK-NEXT: loop:
422
+ ; CHECK-NEXT: Memory dependences are safe with run-time checks
423
+ ; CHECK-NEXT: Dependences:
424
+ ; CHECK-NEXT: Run-time memory checks:
425
+ ; CHECK-NEXT: Check 0:
426
+ ; CHECK-NEXT: Comparing group ([[GRP9:0x[0-9a-f]+]]):
427
+ ; CHECK-NEXT: ptr %A
428
+ ; CHECK-NEXT: Against group ([[GRP10:0x[0-9a-f]+]]):
429
+ ; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
430
+ ; CHECK-NEXT: Grouped accesses:
431
+ ; CHECK-NEXT: Group [[GRP9]]:
432
+ ; CHECK-NEXT: (Low: %A High: (4 + %A))
433
+ ; CHECK-NEXT: Member: %A
434
+ ; CHECK-NEXT: Group [[GRP10]]:
435
+ ; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B))))
436
+ ; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
437
+ ; CHECK-EMPTY:
438
+ ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
439
+ ; CHECK-NEXT: SCEV assumptions:
440
+ ; CHECK-NEXT: {%j,+,(sext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
441
+ ; CHECK-EMPTY:
442
+ ; CHECK-NEXT: Expressions re-written:
443
+ ; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
444
+ ; CHECK-NEXT: ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
445
+ ; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
446
+ ;
447
+ entry:
448
+ %N.ext = sext i16 %N to i32
449
+ %cmp = icmp eq i16 %N , 0
450
+ br i1 %cmp , label %exit , label %loop
451
+
452
+ loop:
453
+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
454
+ %mul = mul nuw i32 %iv , %N.ext
455
+ %add = add i32 %mul , %j
456
+ %arrayidx = getelementptr inbounds i16 , ptr %B , i32 %add
457
+ %load = load i16 , ptr %arrayidx
458
+ %sext = sext i16 %load to i32
459
+ store i32 %sext , ptr %A
460
+ %iv.next = add nuw nsw i32 %iv , 1
461
+ %exitcond = icmp eq i32 %iv.next , %N.ext
462
+ br i1 %exitcond , label %exit , label %loop
463
+
464
+ exit:
465
+ ret void
466
+ }
467
+
468
+ ; Check the scenario where we have an unknown Stride, which happens to also be
469
+ ; the loop iteration count, but the TC is truncated from a wider type.
470
+ define void @unknown_stride_equalto_trunc_tc (i64 %N , ptr %A , ptr %B , i32 %j ) {
471
+ ; CHECK-LABEL: 'unknown_stride_equalto_trunc_tc'
472
+ ; CHECK-NEXT: loop:
473
+ ; CHECK-NEXT: Memory dependences are safe with run-time checks
474
+ ; CHECK-NEXT: Dependences:
475
+ ; CHECK-NEXT: Run-time memory checks:
476
+ ; CHECK-NEXT: Check 0:
477
+ ; CHECK-NEXT: Comparing group ([[GRP11:0x[0-9a-f]+]]):
478
+ ; CHECK-NEXT: ptr %A
479
+ ; CHECK-NEXT: Against group ([[GRP12:0x[0-9a-f]+]]):
480
+ ; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
481
+ ; CHECK-NEXT: Grouped accesses:
482
+ ; CHECK-NEXT: Group [[GRP11]]:
483
+ ; CHECK-NEXT: (Low: %A High: (4 + %A))
484
+ ; CHECK-NEXT: Member: %A
485
+ ; CHECK-NEXT: Group [[GRP12]]:
486
+ ; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B))))
487
+ ; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
488
+ ; CHECK-EMPTY:
489
+ ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
490
+ ; CHECK-NEXT: SCEV assumptions:
491
+ ; CHECK-NEXT: {%j,+,(trunc i64 %N to i32)}<nw><%loop> Added Flags: <nssw>
492
+ ; CHECK-EMPTY:
493
+ ; CHECK-NEXT: Expressions re-written:
494
+ ; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
495
+ ; CHECK-NEXT: ((2 * (sext i32 {%j,+,(trunc i64 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
496
+ ; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
497
+ ;
498
+ entry:
499
+ %N.trunc = trunc i64 %N to i32
500
+ %cmp = icmp eq i64 %N , 0
501
+ br i1 %cmp , label %exit , label %loop
502
+
503
+ loop:
504
+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
505
+ %mul = mul nuw i32 %iv , %N.trunc
506
+ %add = add i32 %mul , %j
507
+ %arrayidx = getelementptr inbounds i16 , ptr %B , i32 %add
508
+ %load = load i16 , ptr %arrayidx
509
+ %sext = sext i16 %load to i32
510
+ store i32 %sext , ptr %A
511
+ %iv.next = add nuw nsw i32 %iv , 1
512
+ %exitcond = icmp eq i32 %iv.next , %N.trunc
513
+ br i1 %exitcond , label %exit , label %loop
514
+
515
+ exit:
516
+ ret void
517
+ }
0 commit comments