Skip to content

Commit 296b700

Browse files
fhahnFlorian Hahn
authored andcommitted
[Loads] Check loop-varying pointer in isDereferenceableAndAlignedInLoop.
If the load executes in a successor of the header, check if the loop-varying pointer is dereferenceable and aligned the branch in the header. This is stricter than necessary and we could instead look for any block in the loop that executes unconditionally and post-dominates the block with the access. Also moves up the assumption check to make sure it is done for each pointer in the chain.
1 parent 8ac6a6b commit 296b700

File tree

2 files changed

+57
-78
lines changed

2 files changed

+57
-78
lines changed

llvm/lib/Analysis/Loads.cpp

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,29 @@ static bool isDereferenceableAndAlignedPointer(
4747
if (!Visited.insert(V).second)
4848
return false;
4949

50+
if (CtxI) {
51+
/// Look through assumes to see if both dereferencability and alignment can
52+
/// be provent by an assume
53+
RetainedKnowledge AlignRK;
54+
RetainedKnowledge DerefRK;
55+
if (getKnowledgeForValue(
56+
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
57+
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
58+
if (!isValidAssumeForContext(Assume, CtxI, DT))
59+
return false;
60+
if (RK.AttrKind == Attribute::Alignment)
61+
AlignRK = std::max(AlignRK, RK);
62+
if (RK.AttrKind == Attribute::Dereferenceable)
63+
DerefRK = std::max(DerefRK, RK);
64+
if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
65+
DerefRK.ArgValue >= Size.getZExtValue())
66+
return true; // We have found what we needed so we stop looking
67+
return false; // Other assumes may have better information. so
68+
// keep looking
69+
}))
70+
return true;
71+
}
72+
5073
// Note that it is not safe to speculate into a malloc'd region because
5174
// malloc may return null.
5275

@@ -168,29 +191,6 @@ static bool isDereferenceableAndAlignedPointer(
168191
Size, DL, CtxI, AC, DT, TLI,
169192
Visited, MaxDepth);
170193

171-
if (CtxI) {
172-
/// Look through assumes to see if both dereferencability and alignment can
173-
/// be provent by an assume
174-
RetainedKnowledge AlignRK;
175-
RetainedKnowledge DerefRK;
176-
if (getKnowledgeForValue(
177-
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
178-
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
179-
if (!isValidAssumeForContext(Assume, CtxI, DT))
180-
return false;
181-
if (RK.AttrKind == Attribute::Alignment)
182-
AlignRK = std::max(AlignRK, RK);
183-
if (RK.AttrKind == Attribute::Dereferenceable)
184-
DerefRK = std::max(DerefRK, RK);
185-
if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
186-
DerefRK.ArgValue >= Size.getZExtValue())
187-
return true; // We have found what we needed so we stop looking
188-
return false; // Other assumes may have better information. so
189-
// keep looking
190-
}))
191-
return true;
192-
}
193-
194194
// If we don't know, assume the worst.
195195
return false;
196196
}
@@ -288,6 +288,19 @@ bool llvm::isDereferenceableAndAlignedInLoop(
288288
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
289289
HeaderFirstNonPHI, AC, &DT);
290290

291+
// If the load executes in a successor of the header, check if the
292+
// loop-varying pointer is dereferenceable and aligned at the branch in the
293+
// header. This is stricter than necessary and we could instead look for any
294+
// block in the loop that executes unconditionally and post-dominates the
295+
// block with the access.
296+
if (LI->getParent() != L->getHeader() &&
297+
L->getExitingBlock() == L->getLoopLatch() &&
298+
isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
299+
L->getHeader()->getTerminator(), AC,
300+
&DT)) {
301+
return true;
302+
}
303+
291304
// Otherwise, check to see if we have a repeating access pattern where we can
292305
// prove that all accesses are well aligned and dereferenceable.
293306
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));

llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll

Lines changed: 21 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
1111
; CHECK: [[VECTOR_PH]]:
1212
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1313
; CHECK: [[VECTOR_BODY]]:
14-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
15-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
14+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
15+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
1616
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1717
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
1818
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -23,25 +23,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
2323
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
2424
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
2525
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
26-
; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
27-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
28-
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
29-
; CHECK: [[PRED_LOAD_IF]]:
30-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
31-
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
32-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
33-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
34-
; CHECK: [[PRED_LOAD_CONTINUE]]:
35-
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
36-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
37-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
38-
; CHECK: [[PRED_LOAD_IF1]]:
39-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
40-
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
41-
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
42-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
43-
; CHECK: [[PRED_LOAD_CONTINUE2]]:
44-
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
26+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
27+
; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
4528
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
4629
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
4730
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0
@@ -606,17 +589,17 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias %a, ptr no
606589
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
607590
; CHECK: [[PRED_LOAD_IF]]:
608591
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
609-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
610-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
592+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
593+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
611594
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
612595
; CHECK: [[PRED_LOAD_CONTINUE]]:
613596
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
614597
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
615598
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
616599
; CHECK: [[PRED_LOAD_IF1]]:
617-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
618-
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
619-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
600+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
601+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
602+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
620603
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
621604
; CHECK: [[PRED_LOAD_CONTINUE2]]:
622605
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
@@ -703,24 +686,24 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias %a, ptr n
703686
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
704687
; CHECK: [[PRED_LOAD_IF]]:
705688
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
706-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
707-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
689+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
690+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
708691
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
709692
; CHECK: [[PRED_LOAD_CONTINUE]]:
710693
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
711694
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
712695
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
713696
; CHECK: [[PRED_LOAD_IF1]]:
714-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
715-
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
716-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
697+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
698+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
699+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
717700
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
718701
; CHECK: [[PRED_LOAD_CONTINUE2]]:
719702
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
720703
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
721-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
722-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
723-
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ]
704+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
705+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
706+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP16]], i64 4), "dereferenceable"(ptr [[TMP18]], i64 4) ]
724707
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
725708
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
726709
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP29]], i64 4), "dereferenceable"(ptr [[TMP19]], i64 4) ]
@@ -796,8 +779,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
796779
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
797780
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
798781
; CHECK: [[VECTOR_BODY]]:
799-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
800-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
782+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
783+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
801784
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
802785
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
803786
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -808,25 +791,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
808791
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
809792
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
810793
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
811-
; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
812-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
813-
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
814-
; CHECK: [[PRED_LOAD_IF]]:
815-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
816-
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
817-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
818-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
819-
; CHECK: [[PRED_LOAD_CONTINUE]]:
820-
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
821-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
822-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
823-
; CHECK: [[PRED_LOAD_IF1]]:
824-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
825-
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
826-
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
827-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
828-
; CHECK: [[PRED_LOAD_CONTINUE2]]:
829-
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
794+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
795+
; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
830796
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
831797
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
832798
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0

0 commit comments

Comments
 (0)