Skip to content

Commit a80dd44

Browse files
authored
LAA: pre-commit tests for stride-versioning (#97570)
Add tests for when the Stride is unknown and equal to TC, with different kinds of casts. In these cases, LAA should not speculate on Stride.
1 parent 4e04286 commit a80dd44

File tree

1 file changed

+205
-0
lines changed

1 file changed

+205
-0
lines changed

llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,3 +310,208 @@ loop:
310310
exit: ; preds = %loop
311311
ret void
312312
}
313+
314+
; Check the scenario where we have an unknown Stride, which happens to also be
315+
; the loop iteration count. If we speculate Stride==1, it implies that the loop
316+
; will iterate no more than a single iteration.
317+
define void @unknown_stride_equalto_tc(i32 %N, ptr %A, ptr %B, i32 %j) {
318+
; CHECK-LABEL: 'unknown_stride_equalto_tc'
319+
; CHECK-NEXT: loop:
320+
; CHECK-NEXT: Memory dependences are safe with run-time checks
321+
; CHECK-NEXT: Dependences:
322+
; CHECK-NEXT: Run-time memory checks:
323+
; CHECK-NEXT: Check 0:
324+
; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]):
325+
; CHECK-NEXT: ptr %A
326+
; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
327+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
328+
; CHECK-NEXT: Grouped accesses:
329+
; CHECK-NEXT: Group [[GRP5]]:
330+
; CHECK-NEXT: (Low: %A High: (4 + %A))
331+
; CHECK-NEXT: Member: %A
332+
; CHECK-NEXT: Group [[GRP6]]:
333+
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B))))
334+
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
335+
; CHECK-EMPTY:
336+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
337+
; CHECK-NEXT: SCEV assumptions:
338+
; CHECK-NEXT: {%j,+,%N}<%loop> Added Flags: <nssw>
339+
; CHECK-EMPTY:
340+
; CHECK-NEXT: Expressions re-written:
341+
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
342+
; CHECK-NEXT: ((2 * (sext i32 {%j,+,%N}<%loop> to i64))<nsw> + %B)
343+
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
344+
;
345+
entry:
346+
%cmp = icmp eq i32 %N, 0
347+
br i1 %cmp, label %exit, label %loop
348+
349+
loop:
350+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
351+
%mul = mul i32 %iv, %N
352+
%add = add i32 %mul, %j
353+
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
354+
%load = load i16, ptr %arrayidx
355+
%sext = sext i16 %load to i32
356+
store i32 %sext, ptr %A
357+
%iv.next = add nuw i32 %iv, 1
358+
%exitcond = icmp eq i32 %iv.next, %N
359+
br i1 %exitcond, label %exit, label %loop
360+
361+
exit:
362+
ret void
363+
}
364+
365+
366+
; Check the scenario where we have an unknown Stride, which happens to also be
367+
; the loop iteration count, but the TC is zero-extended from a narrower type.
368+
define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32 %j) {
369+
; CHECK-LABEL: 'unknown_stride_equalto_zext_tc'
370+
; CHECK-NEXT: loop:
371+
; CHECK-NEXT: Memory dependences are safe with run-time checks
372+
; CHECK-NEXT: Dependences:
373+
; CHECK-NEXT: Run-time memory checks:
374+
; CHECK-NEXT: Check 0:
375+
; CHECK-NEXT: Comparing group ([[GRP7:0x[0-9a-f]+]]):
376+
; CHECK-NEXT: ptr %A
377+
; CHECK-NEXT: Against group ([[GRP8:0x[0-9a-f]+]]):
378+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
379+
; CHECK-NEXT: Grouped accesses:
380+
; CHECK-NEXT: Group [[GRP7]]:
381+
; CHECK-NEXT: (Low: %A High: (4 + %A))
382+
; CHECK-NEXT: Member: %A
383+
; CHECK-NEXT: Group [[GRP8]]:
384+
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B))))
385+
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
386+
; CHECK-EMPTY:
387+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
388+
; CHECK-NEXT: SCEV assumptions:
389+
; CHECK-NEXT: {%j,+,(zext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
390+
; CHECK-EMPTY:
391+
; CHECK-NEXT: Expressions re-written:
392+
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
393+
; CHECK-NEXT: ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
394+
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
395+
;
396+
entry:
397+
%N.ext = zext i16 %N to i32
398+
%cmp = icmp eq i16 %N, 0
399+
br i1 %cmp, label %exit, label %loop
400+
401+
loop:
402+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
403+
%mul = mul nuw i32 %iv, %N.ext
404+
%add = add i32 %mul, %j
405+
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
406+
%load = load i16, ptr %arrayidx
407+
%sext = sext i16 %load to i32
408+
store i32 %sext, ptr %A
409+
%iv.next = add nuw nsw i32 %iv, 1
410+
%exitcond = icmp eq i32 %iv.next, %N.ext
411+
br i1 %exitcond, label %exit, label %loop
412+
413+
exit:
414+
ret void
415+
}
416+
417+
; Check the scenario where we have an unknown Stride, which happens to also be
418+
; the loop iteration count, but the TC is sign-extended from a narrower type.
419+
define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) {
420+
; CHECK-LABEL: 'unknown_stride_equalto_sext_tc'
421+
; CHECK-NEXT: loop:
422+
; CHECK-NEXT: Memory dependences are safe with run-time checks
423+
; CHECK-NEXT: Dependences:
424+
; CHECK-NEXT: Run-time memory checks:
425+
; CHECK-NEXT: Check 0:
426+
; CHECK-NEXT: Comparing group ([[GRP9:0x[0-9a-f]+]]):
427+
; CHECK-NEXT: ptr %A
428+
; CHECK-NEXT: Against group ([[GRP10:0x[0-9a-f]+]]):
429+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
430+
; CHECK-NEXT: Grouped accesses:
431+
; CHECK-NEXT: Group [[GRP9]]:
432+
; CHECK-NEXT: (Low: %A High: (4 + %A))
433+
; CHECK-NEXT: Member: %A
434+
; CHECK-NEXT: Group [[GRP10]]:
435+
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B))))
436+
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
437+
; CHECK-EMPTY:
438+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
439+
; CHECK-NEXT: SCEV assumptions:
440+
; CHECK-NEXT: {%j,+,(sext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
441+
; CHECK-EMPTY:
442+
; CHECK-NEXT: Expressions re-written:
443+
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
444+
; CHECK-NEXT: ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
445+
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
446+
;
447+
entry:
448+
%N.ext = sext i16 %N to i32
449+
%cmp = icmp eq i16 %N, 0
450+
br i1 %cmp, label %exit, label %loop
451+
452+
loop:
453+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
454+
%mul = mul nuw i32 %iv, %N.ext
455+
%add = add i32 %mul, %j
456+
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
457+
%load = load i16, ptr %arrayidx
458+
%sext = sext i16 %load to i32
459+
store i32 %sext, ptr %A
460+
%iv.next = add nuw nsw i32 %iv, 1
461+
%exitcond = icmp eq i32 %iv.next, %N.ext
462+
br i1 %exitcond, label %exit, label %loop
463+
464+
exit:
465+
ret void
466+
}
467+
468+
; Check the scenario where we have an unknown Stride, which happens to also be
469+
; the loop iteration count, but the TC is truncated from a wider type.
470+
define void @unknown_stride_equalto_trunc_tc(i64 %N, ptr %A, ptr %B, i32 %j) {
471+
; CHECK-LABEL: 'unknown_stride_equalto_trunc_tc'
472+
; CHECK-NEXT: loop:
473+
; CHECK-NEXT: Memory dependences are safe with run-time checks
474+
; CHECK-NEXT: Dependences:
475+
; CHECK-NEXT: Run-time memory checks:
476+
; CHECK-NEXT: Check 0:
477+
; CHECK-NEXT: Comparing group ([[GRP11:0x[0-9a-f]+]]):
478+
; CHECK-NEXT: ptr %A
479+
; CHECK-NEXT: Against group ([[GRP12:0x[0-9a-f]+]]):
480+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
481+
; CHECK-NEXT: Grouped accesses:
482+
; CHECK-NEXT: Group [[GRP11]]:
483+
; CHECK-NEXT: (Low: %A High: (4 + %A))
484+
; CHECK-NEXT: Member: %A
485+
; CHECK-NEXT: Group [[GRP12]]:
486+
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B))))
487+
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
488+
; CHECK-EMPTY:
489+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
490+
; CHECK-NEXT: SCEV assumptions:
491+
; CHECK-NEXT: {%j,+,(trunc i64 %N to i32)}<nw><%loop> Added Flags: <nssw>
492+
; CHECK-EMPTY:
493+
; CHECK-NEXT: Expressions re-written:
494+
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
495+
; CHECK-NEXT: ((2 * (sext i32 {%j,+,(trunc i64 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
496+
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
497+
;
498+
entry:
499+
%N.trunc = trunc i64 %N to i32
500+
%cmp = icmp eq i64 %N, 0
501+
br i1 %cmp, label %exit, label %loop
502+
503+
loop:
504+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
505+
%mul = mul nuw i32 %iv, %N.trunc
506+
%add = add i32 %mul, %j
507+
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
508+
%load = load i16, ptr %arrayidx
509+
%sext = sext i16 %load to i32
510+
store i32 %sext, ptr %A
511+
%iv.next = add nuw nsw i32 %iv, 1
512+
%exitcond = icmp eq i32 %iv.next, %N.trunc
513+
br i1 %exitcond, label %exit, label %loop
514+
515+
exit:
516+
ret void
517+
}

0 commit comments

Comments
 (0)