Skip to content

Commit ead3556

Browse files
committed
[LoopUtils] Freeze compare results for diff checks instead of pointers.
THe freezes are introduced to avoid branch on undef/poison, if any of the pointers may be poison. The same can be achieved by just freezing the compare, which reduces the number of freezes needed. See https://alive2.llvm.org/ce/z/NHa_ud Note that the individual compares need to be frozen and it is not sufficient to only freeze the resulting OR: Result OR frozen only (UNSOUND): https://alive2.llvm.org/ce/z/YzFHQY Individual conds frozen (SOUND): https://alive2.llvm.org/ce/z/5L6Z3f
1 parent 6a323e7 commit ead3556

File tree

3 files changed

+20
-26
lines changed

3 files changed

+20
-26
lines changed

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,15 +1795,12 @@ Value *llvm::addDiffRuntimeChecks(
17951795
ConstantInt::get(Ty, IC * C.AccessSize));
17961796
Value *Sink = Expander.expandCodeFor(C.SinkStart, Ty, Loc);
17971797
Value *Src = Expander.expandCodeFor(C.SrcStart, Ty, Loc);
1798-
if (C.NeedsFreeze) {
1799-
IRBuilder<> Builder(Loc);
1800-
Sink = Builder.CreateFreeze(Sink, Sink->getName() + ".fr");
1801-
Src = Builder.CreateFreeze(Src, Src->getName() + ".fr");
1802-
}
18031798
Value *Diff = ChkBuilder.CreateSub(Sink, Src);
18041799
Value *IsConflict =
18051800
ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "diff.check");
1806-
1801+
if (C.NeedsFreeze)
1802+
IsConflict =
1803+
ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
18071804
if (MemoryRuntimeCheck) {
18081805
IsConflict =
18091806
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");

llvm/test/Transforms/LoopVectorize/forked-pointers.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,28 +17,27 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
1717
define dso_local void @forked_ptrs_different_base_same_offset(ptr nocapture readonly %Base1, ptr nocapture readonly %Base2, ptr nocapture %Dest, ptr nocapture readonly %Preds) {
1818
; CHECK-LABEL: @forked_ptrs_different_base_same_offset(
1919
; CHECK-NEXT: entry:
20-
; CHECK-NEXT: [[BASE1_FR:%.*]] = freeze ptr [[BASE1:%.*]]
21-
; CHECK-NEXT: [[BASE2_FR:%.*]] = freeze ptr [[BASE2:%.*]]
22-
; CHECK-NEXT: [[DEST_FR:%.*]] = freeze ptr [[DEST:%.*]]
2320
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
2421
; CHECK: vector.memcheck:
25-
; CHECK-NEXT: [[DEST1:%.*]] = ptrtoint ptr [[DEST_FR]] to i64
22+
; CHECK-NEXT: [[DEST1:%.*]] = ptrtoint ptr [[DEST:%.*]] to i64
2623
; CHECK-NEXT: [[PREDS2:%.*]] = ptrtoint ptr [[PREDS:%.*]] to i64
27-
; CHECK-NEXT: [[BASE23:%.*]] = ptrtoint ptr [[BASE2_FR]] to i64
28-
; CHECK-NEXT: [[BASE15:%.*]] = ptrtoint ptr [[BASE1_FR]] to i64
24+
; CHECK-NEXT: [[BASE23:%.*]] = ptrtoint ptr [[BASE2:%.*]] to i64
25+
; CHECK-NEXT: [[BASE15:%.*]] = ptrtoint ptr [[BASE1:%.*]] to i64
2926
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DEST1]], [[PREDS2]]
3027
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
3128
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DEST1]], [[BASE23]]
32-
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 16
29+
; CHECK-NEXT: [[TMP1_FR:%.*]] = freeze i64 [[TMP1]]
30+
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1_FR]], 16
3331
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
3432
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DEST1]], [[BASE15]]
35-
; CHECK-NEXT: [[DIFF_CHECK7:%.*]] = icmp ult i64 [[TMP2]], 16
33+
; CHECK-NEXT: [[TMP2_FR:%.*]] = freeze i64 [[TMP2]]
34+
; CHECK-NEXT: [[DIFF_CHECK7:%.*]] = icmp ult i64 [[TMP2_FR]], 16
3635
; CHECK-NEXT: [[CONFLICT_RDX8:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK7]]
3736
; CHECK-NEXT: br i1 [[CONFLICT_RDX8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3837
; CHECK: vector.ph:
39-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE2_FR]], i64 0
38+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE2]], i64 0
4039
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
41-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE1_FR]], i64 0
40+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE1]], i64 0
4241
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer
4342
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
4443
; CHECK: vector.body:
@@ -66,7 +65,7 @@ define dso_local void @forked_ptrs_different_base_same_offset(ptr nocapture read
6665
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP18]], i64 1
6766
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i64 2
6867
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP20]], i64 3
69-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[DEST_FR]], i64 [[INDEX]]
68+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[DEST]], i64 [[INDEX]]
7069
; CHECK-NEXT: store <4 x float> [[TMP24]], ptr [[TMP25]], align 4
7170
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
7271
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
@@ -82,10 +81,10 @@ define dso_local void @forked_ptrs_different_base_same_offset(ptr nocapture read
8281
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREDS]], i64 [[INDVARS_IV]]
8382
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
8483
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP27]], 0
85-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], ptr [[BASE2_FR]], ptr [[BASE1_FR]]
84+
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], ptr [[BASE2]], ptr [[BASE1]]
8685
; CHECK-NEXT: [[DOTSINK_IN:%.*]] = getelementptr inbounds float, ptr [[SPEC_SELECT]], i64 [[INDVARS_IV]]
8786
; CHECK-NEXT: [[DOTSINK:%.*]] = load float, ptr [[DOTSINK_IN]], align 4
88-
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[DEST_FR]], i64 [[INDVARS_IV]]
87+
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[DEST]], i64 [[INDVARS_IV]]
8988
; CHECK-NEXT: store float [[DOTSINK]], ptr [[TMP28]], align 4
9089
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
9190
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100

llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,13 @@ define void @test_loop_dependent_select1(ptr %src.1, ptr %src.2, ptr %dst, i1 %c
8484
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 2
8585
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
8686
; CHECK: vector.memcheck:
87-
; CHECK-NEXT: [[DST1_FR:%.*]] = freeze i64 [[DST1]]
88-
; CHECK-NEXT: [[SRC_12_FR:%.*]] = freeze i64 [[SRC_12]]
89-
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[DST1_FR]], [[SRC_12_FR]]
87+
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC_12]]
9088
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
91-
; CHECK-NEXT: [[DST1_FR4:%.*]] = freeze i64 [[DST1]]
92-
; CHECK-NEXT: [[SRC_23_FR:%.*]] = freeze i64 [[SRC_23]]
93-
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[DST1_FR4]], [[SRC_23_FR]]
89+
; CHECK-NEXT: [[DIFF_CHECK_FR:%.*]] = freeze i1 [[DIFF_CHECK]]
90+
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[DST1]], [[SRC_23]]
9491
; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP4]], 2
95-
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK5]]
92+
; CHECK-NEXT: [[DIFF_CHECK5_FR:%.*]] = freeze i1 [[DIFF_CHECK5]]
93+
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK_FR]], [[DIFF_CHECK5_FR]]
9694
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
9795
; CHECK: vector.ph:
9896
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2

0 commit comments

Comments
 (0)