Skip to content

Commit 7bec335

Browse files
committed
!fixu address comments, check strides have same direction.
1 parent a695f67 commit 7bec335

File tree

3 files changed

+19
-134
lines changed

3 files changed

+19
-134
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,10 +1996,11 @@ getDependenceDistanceStrideAndSize(
19961996
InnermostLoop))
19971997
return MemoryDepChecker::Dependence::IndirectUnsafe;
19981998

1999-
// Need accesses with constant stride. We don't want to vectorize
2000-
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
2001-
// in the address space.
2002-
if (!StrideAPtr || !StrideBPtr) {
1999+
// Need accesses with constant strides and the same direction. We don't want
2000+
// to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
2001+
// could wrap in the address space.
2002+
if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
2003+
(StrideAPtr < 0 && StrideBPtr > 0)) {
20032004
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
20042005
return MemoryDepChecker::Dependence::Unknown;
20052006
}
@@ -2075,7 +2076,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20752076

20762077
// Negative distances are not plausible dependencies.
20772078
if (SE.isKnownNonPositive(Dist)) {
2078-
if (!SE.isKnownNegative(Dist)) {
2079+
if (SE.isKnownNonNegative(Dist)) {
20792080
if (HasSameSize) {
20802081
// Write to the same location with the same size.
20812082
return Dependence::Forward;

llvm/test/Transforms/LoopVectorize/global_alias.ll

Lines changed: 10 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -491,101 +491,22 @@ for.end: ; preds = %for.body
491491
ret i32 %1
492492
}
493493

494-
; /// Different objects, swapped induction, alias at the end
495-
; int noAlias15 (int a) {
496-
; int i;
497-
; for (i=0; i<SIZE; i++)
498-
; Foo.A[i] = Foo.B[SIZE-i-1] + a;
499-
; return Foo.A[a];
500-
; }
501-
; CHECK-LABEL: define i32 @noAlias15(
502-
; CHECK: vector.memcheck:
503-
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
504-
; CHECK: add nsw <4 x i32>
505-
; CHECK: ret
506-
507-
define i32 @noAlias15(i32 %a) nounwind {
508-
entry:
509-
br label %for.body
510-
511-
for.body: ; preds = %entry, %for.body
512-
%i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
513-
%sub1 = sub nuw nsw i32 99, %i.05
514-
%arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1
515-
%0 = load i32, ptr %arrayidx, align 4
516-
%add = add nsw i32 %0, %a
517-
%arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %i.05
518-
store i32 %add, ptr %arrayidx2, align 4
519-
%inc = add nuw nsw i32 %i.05, 1
520-
%exitcond.not = icmp eq i32 %inc, 100
521-
br i1 %exitcond.not, label %for.end, label %for.body
522-
523-
for.end: ; preds = %for.body
524-
%arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a
525-
%1 = load i32, ptr %arrayidx3, align 4
526-
ret i32 %1
527-
}
528-
529-
; /// Different objects, swapped induction, alias at the beginning
530-
; int noAlias16 (int a) {
531-
; int i;
532-
; for (i=0; i<SIZE; i++)
533-
; Foo.A[SIZE-i-1] = Foo.B[i] + a;
534-
; return Foo.A[a];
535-
; }
536-
; CHECK-LABEL: define i32 @noAlias16(
537-
; CHECK: entry:
538-
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
539-
540-
; CHECK: add nsw <4 x i32>
541-
; CHECK: ret
542-
543-
define i32 @noAlias16(i32 %a) nounwind {
544-
entry:
545-
br label %for.body
546-
547-
for.body: ; preds = %entry, %for.body
548-
%i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
549-
%arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %i.05
550-
%0 = load i32, ptr %arrayidx, align 4
551-
%add = add nsw i32 %0, %a
552-
%sub1 = sub nuw nsw i32 99, %i.05
553-
%arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub1
554-
store i32 %add, ptr %arrayidx2, align 4
555-
%inc = add nuw nsw i32 %i.05, 1
556-
%exitcond.not = icmp eq i32 %inc, 100
557-
br i1 %exitcond.not, label %for.end, label %for.body
558-
559-
for.end: ; preds = %for.body
560-
%arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a
561-
%1 = load i32, ptr %arrayidx3, align 4
562-
ret i32 %1
563-
}
564-
565494

566495
;; === Now, the tests that we could vectorize with induction changes or run-time checks ===
567496

568497

569498
; /// Different objects, swapped induction, alias at the end
570-
; int mayAlias01 (int a, int N) {
499+
; int mayAlias01 (int a) {
571500
; int i;
572-
; for (i=0; i<N; i++)
501+
; for (i=0; i<SIZE; i++)
573502
; Foo.A[i] = Foo.B[SIZE-i-1] + a;
574503
; return Foo.A[a];
575504
; }
576505
; CHECK-LABEL: define i32 @mayAlias01(
577-
; CHECK: vector.memcheck:
578-
; CHECK-NEXT: [[MUL:%.+]] = shl i32 %N, 2
579-
; CHECK-NEXT: [[SCEVGEP0:%.+]] = getelementptr i8, ptr @Foo, i32 [[MUL]]
580-
; CHECK-NEXT: [[SUB:%.+]] = sub i32 804, [[MUL]]
581-
; CHECK-NEXT: [[SCEVGEP1:%.+]] = getelementptr i8, ptr @Foo, i32 [[SUB]]
582-
; CHECK-NEXT: [[BOUND:%.+]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP0]]
583-
; CHECK-NEXT: br i1 [[BOUND]], label %scalar.ph, label %vector.ph
584-
585-
; CHECK: add nsw <4 x i32>
506+
; CHECK-NOT: add nsw <4 x i32>
586507
; CHECK: ret
587508

588-
define i32 @mayAlias01(i32 %a, i32 %N) nounwind {
509+
define i32 @mayAlias01(i32 %a) nounwind {
589510
entry:
590511
br label %for.body
591512

@@ -598,7 +519,7 @@ for.body: ; preds = %entry, %for.body
598519
%arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %i.05
599520
store i32 %add, ptr %arrayidx2, align 4
600521
%inc = add nuw nsw i32 %i.05, 1
601-
%exitcond.not = icmp eq i32 %inc, %N
522+
%exitcond.not = icmp eq i32 %inc, 100
602523
br i1 %exitcond.not, label %for.end, label %for.body
603524

604525
for.end: ; preds = %for.body
@@ -608,20 +529,17 @@ for.end: ; preds = %for.body
608529
}
609530

610531
; /// Different objects, swapped induction, alias at the beginning
611-
; int mayAlias02 (int a, int N) {
532+
; int mayAlias02 (int a) {
612533
; int i;
613-
; for (i=0; i<N; i++)
534+
; for (i=0; i<SIZE; i++)
614535
; Foo.A[SIZE-i-1] = Foo.B[i] + a;
615536
; return Foo.A[a];
616537
; }
617538
; CHECK-LABEL: define i32 @mayAlias02(
618-
; CHECK: vector.memcheck:
619-
; CHECK-NEXT: br i1 false, label %scalar.ph, label %vector.ph
620-
621-
; CHECK: add nsw <4 x i32>
539+
; CHECK-NOT: add nsw <4 x i32>
622540
; CHECK: ret
623541

624-
define i32 @mayAlias02(i32 %a, i32 %N) nounwind {
542+
define i32 @mayAlias02(i32 %a) nounwind {
625543
entry:
626544
br label %for.body
627545

@@ -634,7 +552,7 @@ for.body: ; preds = %entry, %for.body
634552
%arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub1
635553
store i32 %add, ptr %arrayidx2, align 4
636554
%inc = add nuw nsw i32 %i.05, 1
637-
%exitcond.not = icmp eq i32 %inc, %N
555+
%exitcond.not = icmp eq i32 %inc, 100
638556
br i1 %exitcond.not, label %for.end, label %for.body
639557

640558
for.end: ; preds = %for.body

llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -12,43 +12,9 @@ define i16 @helper(i16 %0, i64 %x) {
1212
; CHECK-NEXT: [[DATA:%.*]] = alloca [2 x i8], align 2
1313
; CHECK-NEXT: store i16 [[TMP0:%.*]], ptr [[DATA]], align 2
1414
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
15-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[X:%.*]], 12
16-
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 1, [[X]]
17-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP9]]
18-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt ptr [[TMP3]], [[TMP1]]
19-
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[TMP4]]
20-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DATA]], i64 [[X]]
21-
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 2, [[X]]
22-
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 [[TMP5]]
23-
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]]
24-
; CHECK-NEXT: [[OR_COND7:%.*]] = or i1 [[OR_COND]], [[BOUND1]]
25-
; CHECK-NEXT: br i1 [[OR_COND7]], label [[BB6_I_I_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
26-
; CHECK: vector.ph:
27-
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[X]], -4
28-
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -3
2915
; CHECK-NEXT: br label [[BB6_I_I:%.*]]
30-
; CHECK: vector.body:
31-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[BB6_I_I]] ]
32-
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw i64 0, [[INDEX]]
33-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[INDEX]]
34-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 2, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
35-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [0 x i8], ptr [[INVARIANT_GEP]], i64 0, i64 [[TMP6]]
36-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[GEP]], align 2, !alias.scope [[META3]]
37-
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD3]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
38-
; CHECK-NEXT: store <4 x i8> [[REVERSE]], ptr [[TMP7]], align 2, !alias.scope [[META0]], !noalias [[META3]]
39-
; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
40-
; CHECK-NEXT: store <4 x i8> [[REVERSE4]], ptr [[GEP]], align 2, !alias.scope [[META3]]
41-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
42-
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
43-
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[BB6_I_I]], !llvm.loop [[LOOP5:![0-9]+]]
44-
; CHECK: middle.block:
45-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[X]]
46-
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[BB6_I_I_PREHEADER]]
47-
; CHECK: bb6.i.i.preheader:
48-
; CHECK-NEXT: [[ITER_SROA_0_07_I_I_PH:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
49-
; CHECK-NEXT: br label [[BB6_I_I1:%.*]]
5016
; CHECK: bb6.i.i:
51-
; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I1]] ], [ [[ITER_SROA_0_07_I_I_PH]], [[BB6_I_I_PREHEADER]] ]
17+
; CHECK-NEXT: [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
5218
; CHECK-NEXT: [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]]
5319
; CHECK-NEXT: [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
5420
; CHECK-NEXT: [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]]
@@ -57,8 +23,8 @@ define i16 @helper(i16 %0, i64 %x) {
5723
; CHECK-NEXT: [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1
5824
; CHECK-NEXT: store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1
5925
; CHECK-NEXT: store i8 [[TMP_0_COPYLOAD_I_I_I_I]], ptr [[_39_I_I]], align 1
60-
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP2]], [[X]]
61-
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT]], label [[BB6_I_I1]], !llvm.loop [[LOOP8:![0-9]+]]
26+
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[TMP2]], [[X:%.*]]
27+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT:%.*]], label [[BB6_I_I]]
6228
; CHECK: exit:
6329
; CHECK-NEXT: [[DOTSROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[DATA]], align 2
6430
; CHECK-NEXT: ret i16 [[DOTSROA_0_0_COPYLOAD]]

0 commit comments

Comments
 (0)