Skip to content

Commit fd97dfb

Browse files
authored
[LV] Don't mark ptrs as safe to speculate if fed by UB/poison op. (llvm#143204)
Add additional checks before marking pointers safe to load speculatively. If some computations feeding the pointer may trigger UB, we cannot load the pointer speculatively, because we cannot compute the address speculatively. The UB triggering instructions will be predicated, but if the predicated block does not execute the result is poison. Similarly, we also cannot load the pointer speculatively if it may be poison. The patch also checks if any of the operands defined outside the loop may be poison when entering the loop. We *don't* need to check if any operation inside the loop may produce poison due to flags, as those will be dropped if needed. There are some types of instructions inside the loop that can produce poison independent of flags. Currently loads are also checked, not sure if there's a convenient API to check for all such operands. Fixes llvm#142957. PR: llvm#143204
1 parent 5835f1e commit fd97dfb

File tree

5 files changed

+202
-111
lines changed

5 files changed

+202
-111
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,10 +1491,51 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
14911491
SmallVector<const SCEVPredicate *, 4> Predicates;
14921492
for (Instruction &I : *BB) {
14931493
LoadInst *LI = dyn_cast<LoadInst>(&I);
1494+
1495+
// Make sure we can execute all computations feeding into Ptr in the loop
1496+
// w/o triggering UB and that none of the out-of-loop operands are poison.
1497+
// We do not need to check if operations inside the loop can produce
1498+
// poison due to flags (e.g. due to an inbounds GEP going out of bounds),
1499+
// because flags will be dropped when executing them unconditionally.
1500+
// TODO: Results could be improved by considering poison-propagation
1501+
// properties of visited ops.
1502+
auto CanSpeculatePointerOp = [this](Value *Ptr) {
1503+
SmallVector<Value *> Worklist = {Ptr};
1504+
SmallPtrSet<Value *, 4> Visited;
1505+
while (!Worklist.empty()) {
1506+
Value *CurrV = Worklist.pop_back_val();
1507+
if (!Visited.insert(CurrV).second)
1508+
continue;
1509+
1510+
auto *CurrI = dyn_cast<Instruction>(CurrV);
1511+
if (!CurrI || !TheLoop->contains(CurrI)) {
1512+
// If operands from outside the loop may be poison then Ptr may also
1513+
// be poison.
1514+
if (!isGuaranteedNotToBePoison(CurrV, AC,
1515+
TheLoop->getLoopPredecessor()
1516+
->getTerminator()
1517+
->getIterator()))
1518+
return false;
1519+
continue;
1520+
}
1521+
1522+
// A loaded value may be poison, independent of any flags.
1523+
if (isa<LoadInst>(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC))
1524+
return false;
1525+
1526+
// For other ops, assume poison can only be introduced via flags,
1527+
// which can be dropped.
1528+
if (!isa<PHINode>(CurrI) && !isSafeToSpeculativelyExecute(CurrI))
1529+
return false;
1530+
append_range(Worklist, CurrI->operands());
1531+
}
1532+
return true;
1533+
};
14941534
// Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so
14951535
// that it will consider loops that need guarding by SCEV checks. The
14961536
// vectoriser will generate these checks if we decide to vectorise.
14971537
if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1538+
CanSpeculatePointerOp(LI->getPointerOperand()) &&
14981539
isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC,
14991540
&Predicates))
15001541
SafePointers.insert(LI->getPointerOperand());

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 82 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -666,34 +666,54 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
666666
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
667667
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
668668
; CHECK: [[VECTOR_BODY]]:
669-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ]
670-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ]
669+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
670+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
671671
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
672672
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
673673
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
674-
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
675-
; CHECK: [[PRED_SREM_IF]]:
676-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]]
677-
; CHECK: [[PRED_SREM_CONTINUE]]:
674+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
675+
; CHECK: [[PRED_LOAD_IF]]:
676+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
677+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], poison
678+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP4]]
679+
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP23]], align 1
680+
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> poison, i8 [[TMP6]], i32 0
681+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
682+
; CHECK: [[PRED_LOAD_CONTINUE]]:
683+
; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP24]], %[[PRED_LOAD_IF]] ]
678684
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
679-
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
680-
; CHECK: [[PRED_SREM_IF1]]:
681-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]]
682-
; CHECK: [[PRED_SREM_CONTINUE2]]:
685+
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
686+
; CHECK: [[PRED_LOAD_IF1]]:
687+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
688+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], poison
689+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP11]]
690+
; CHECK-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 1
691+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP8]], i8 [[TMP26]], i32 1
692+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
693+
; CHECK: [[PRED_LOAD_CONTINUE2]]:
694+
; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i8> [ [[TMP8]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
683695
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
684-
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]]
685-
; CHECK: [[PRED_SREM_IF3]]:
686-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]]
687-
; CHECK: [[PRED_SREM_CONTINUE4]]:
696+
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
697+
; CHECK: [[PRED_LOAD_IF3]]:
698+
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
699+
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], poison
700+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP18]]
701+
; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1
702+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP29]], i8 [[TMP20]], i32 2
703+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
704+
; CHECK: [[PRED_LOAD_CONTINUE4]]:
705+
; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i8> [ [[TMP29]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], %[[PRED_LOAD_IF3]] ]
688706
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
689-
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]]
690-
; CHECK: [[PRED_SREM_IF5]]:
691-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]]
692-
; CHECK: [[PRED_SREM_CONTINUE6]]:
693-
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], poison
707+
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]]
708+
; CHECK: [[PRED_LOAD_IF5]]:
709+
; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 3
710+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP30]], poison
694711
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP12]]
695-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
696-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1
712+
; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP13]], align 1
713+
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3
714+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
715+
; CHECK: [[PRED_LOAD_CONTINUE6]]:
716+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ]
697717
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
698718
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
699719
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0
@@ -743,34 +763,54 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
743763
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
744764
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
745765
; CHECK: [[VECTOR_BODY]]:
746-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ]
747-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ]
766+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
767+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
748768
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
749769
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
750770
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
751-
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
752-
; CHECK: [[PRED_SREM_IF]]:
753-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]]
754-
; CHECK: [[PRED_SREM_CONTINUE]]:
771+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
772+
; CHECK: [[PRED_LOAD_IF]]:
773+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
774+
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP9]], poison
775+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]]
776+
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP23]], align 1
777+
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> poison, i8 [[TMP6]], i32 0
778+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
779+
; CHECK: [[PRED_LOAD_CONTINUE]]:
780+
; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP24]], %[[PRED_LOAD_IF]] ]
755781
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
756-
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
757-
; CHECK: [[PRED_SREM_IF1]]:
758-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]]
759-
; CHECK: [[PRED_SREM_CONTINUE2]]:
782+
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
783+
; CHECK: [[PRED_LOAD_IF1]]:
784+
; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 1
785+
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP26]], poison
786+
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP29]]
787+
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP30]], align 1
788+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP25]], i8 [[TMP13]], i32 1
789+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
790+
; CHECK: [[PRED_LOAD_CONTINUE2]]:
791+
; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP25]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
760792
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
761-
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]]
762-
; CHECK: [[PRED_SREM_IF3]]:
763-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]]
764-
; CHECK: [[PRED_SREM_CONTINUE4]]:
793+
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
794+
; CHECK: [[PRED_LOAD_IF3]]:
795+
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
796+
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], poison
797+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP18]]
798+
; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1
799+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP20]], i32 2
800+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
801+
; CHECK: [[PRED_LOAD_CONTINUE4]]:
802+
; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i8> [ [[TMP15]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], %[[PRED_LOAD_IF3]] ]
765803
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
766-
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]]
767-
; CHECK: [[PRED_SREM_IF5]]:
768-
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]]
769-
; CHECK: [[PRED_SREM_CONTINUE6]]:
770-
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], poison
804+
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]]
805+
; CHECK: [[PRED_LOAD_IF5]]:
806+
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3
807+
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP31]], poison
771808
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP7]]
772-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
773-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
809+
; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP8]], align 1
810+
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3
811+
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
812+
; CHECK: [[PRED_LOAD_CONTINUE6]]:
813+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ]
774814
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
775815
; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison
776816
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3

0 commit comments

Comments
 (0)