Skip to content

Commit 559bc0c

Browse files
committed
[Loads] Check loop-varying pointer in isDereferenceableAndAlignedInLoop.
If the load executes in a successor of the header, check if the loop-varying pointer is dereferenceable and aligned the branch in the header. This is stricter than necessary and we could instead look for any block in the loop that executes unconditionally and post-dominates the block with the access. Also moves up the assumption check to make sure it is done for each pointer in the chain.
1 parent a35640f commit 559bc0c

File tree

2 files changed

+57
-78
lines changed

2 files changed

+57
-78
lines changed

llvm/lib/Analysis/Loads.cpp

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,29 @@ static bool isDereferenceableAndAlignedPointer(
4848
if (!Visited.insert(V).second)
4949
return false;
5050

51+
if (CtxI) {
52+
/// Look through assumes to see if both dereferencability and alignment can
53+
/// be provent by an assume
54+
RetainedKnowledge AlignRK;
55+
RetainedKnowledge DerefRK;
56+
if (getKnowledgeForValue(
57+
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
58+
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
59+
if (!isValidAssumeForContext(Assume, CtxI, DT))
60+
return false;
61+
if (RK.AttrKind == Attribute::Alignment)
62+
AlignRK = std::max(AlignRK, RK);
63+
if (RK.AttrKind == Attribute::Dereferenceable)
64+
DerefRK = std::max(DerefRK, RK);
65+
if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
66+
DerefRK.ArgValue >= Size.getZExtValue())
67+
return true; // We have found what we needed so we stop looking
68+
return false; // Other assumes may have better information. so
69+
// keep looking
70+
}))
71+
return true;
72+
}
73+
5174
// Note that it is not safe to speculate into a malloc'd region because
5275
// malloc may return null.
5376

@@ -171,29 +194,6 @@ static bool isDereferenceableAndAlignedPointer(
171194
Size, DL, CtxI, AC, DT, TLI,
172195
Visited, MaxDepth);
173196

174-
if (CtxI) {
175-
/// Look through assumes to see if both dereferencability and alignment can
176-
/// be provent by an assume
177-
RetainedKnowledge AlignRK;
178-
RetainedKnowledge DerefRK;
179-
if (getKnowledgeForValue(
180-
V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
181-
[&](RetainedKnowledge RK, Instruction *Assume, auto) {
182-
if (!isValidAssumeForContext(Assume, CtxI, DT))
183-
return false;
184-
if (RK.AttrKind == Attribute::Alignment)
185-
AlignRK = std::max(AlignRK, RK);
186-
if (RK.AttrKind == Attribute::Dereferenceable)
187-
DerefRK = std::max(DerefRK, RK);
188-
if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
189-
DerefRK.ArgValue >= Size.getZExtValue())
190-
return true; // We have found what we needed so we stop looking
191-
return false; // Other assumes may have better information. so
192-
// keep looking
193-
}))
194-
return true;
195-
}
196-
197197
// If we don't know, assume the worst.
198198
return false;
199199
}
@@ -291,6 +291,19 @@ bool llvm::isDereferenceableAndAlignedInLoop(
291291
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
292292
HeaderFirstNonPHI, AC, &DT);
293293

294+
// If the load executes in a successor of the header, check if the
295+
// loop-varying pointer is dereferenceable and aligned at the branch in the
296+
// header. This is stricter than necessary and we could instead look for any
297+
// block in the loop that executes unconditionally and post-dominates the
298+
// block with the access.
299+
if (LI->getParent() != L->getHeader() &&
300+
L->getExitingBlock() == L->getLoopLatch() &&
301+
isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
302+
L->getHeader()->getTerminator(), AC,
303+
&DT)) {
304+
return true;
305+
}
306+
294307
// Otherwise, check to see if we have a repeating access pattern where we can
295308
// prove that all accesses are well aligned and dereferenceable.
296309
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));

llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll

Lines changed: 21 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
1111
; CHECK: [[VECTOR_PH]]:
1212
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1313
; CHECK: [[VECTOR_BODY]]:
14-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
15-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
14+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
15+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
1616
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1717
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
1818
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -23,25 +23,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr
2323
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
2424
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
2525
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
26-
; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
27-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
28-
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
29-
; CHECK: [[PRED_LOAD_IF]]:
30-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
31-
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
32-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
33-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
34-
; CHECK: [[PRED_LOAD_CONTINUE]]:
35-
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
36-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
37-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
38-
; CHECK: [[PRED_LOAD_IF1]]:
39-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
40-
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
41-
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
42-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
43-
; CHECK: [[PRED_LOAD_CONTINUE2]]:
44-
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
26+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
27+
; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
4528
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
4629
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
4730
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0
@@ -529,17 +512,17 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias %a, ptr no
529512
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
530513
; CHECK: [[PRED_LOAD_IF]]:
531514
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
532-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
533-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
515+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
516+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
534517
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
535518
; CHECK: [[PRED_LOAD_CONTINUE]]:
536519
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
537520
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
538521
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
539522
; CHECK: [[PRED_LOAD_IF1]]:
540-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
541-
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
542-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
523+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1
524+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
525+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
543526
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
544527
; CHECK: [[PRED_LOAD_CONTINUE2]]:
545528
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
@@ -626,24 +609,24 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias %a, ptr n
626609
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
627610
; CHECK: [[PRED_LOAD_IF]]:
628611
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
629-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
630-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
612+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4
613+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
631614
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
632615
; CHECK: [[PRED_LOAD_CONTINUE]]:
633616
; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ]
634617
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
635618
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
636619
; CHECK: [[PRED_LOAD_IF1]]:
637-
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
638-
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
639-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1
620+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
621+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
622+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1
640623
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
641624
; CHECK: [[PRED_LOAD_CONTINUE2]]:
642625
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
643626
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]]
644-
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
645-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
646-
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ]
627+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
628+
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
629+
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP16]], i64 4), "dereferenceable"(ptr [[TMP18]], i64 4) ]
647630
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
648631
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
649632
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP29]], i64 4), "dereferenceable"(ptr [[TMP19]], i64 4) ]
@@ -719,8 +702,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
719702
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
720703
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
721704
; CHECK: [[VECTOR_BODY]]:
722-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
723-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
705+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
706+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
724707
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
725708
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
726709
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
@@ -731,25 +714,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr
731714
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
732715
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
733716
; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
734-
; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
735-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
736-
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
737-
; CHECK: [[PRED_LOAD_IF]]:
738-
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0
739-
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
740-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
741-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
742-
; CHECK: [[PRED_LOAD_CONTINUE]]:
743-
; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ]
744-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
745-
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
746-
; CHECK: [[PRED_LOAD_IF1]]:
747-
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1
748-
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
749-
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1
750-
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
751-
; CHECK: [[PRED_LOAD_CONTINUE2]]:
752-
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ]
717+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
718+
; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4
753719
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]]
754720
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
755721
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0

0 commit comments

Comments
 (0)