Skip to content

Commit 76e02af

Browse files
committed
[LoopIdiom] BCmp: loop exit count must not be wider than size_t that bcmp takes
As reported by Joerg Sonnenberger in IRC, for 32-bit systems, where pointer and size_t are 32-bit, if you use 64-bit-wide variable in the loop, you could end up with loop exit count being of the type wider than the size_t. Now, i'm not sure if we can produce `bcmp` from that (just truncate?), but we certainly should not assert/miscompile. llvm-svn: 374811
1 parent b9c55e2 commit 76e02af

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,6 +2080,10 @@ bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes,
20802080

20812081
LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n");
20822082

2083+
// bcmp / memcmp take length argument as size_t, so let's conservatively
2084+
// assume that the iteration count should be not wider than that.
2085+
Type *CmpFuncSizeTy = DL->getIntPtrType(SE->getContext());
2086+
20832087
// For how many iterations is loop guaranteed not to exit via LoopLatch?
20842088
// This is one less than the maximal number of comparisons,and is: n + -1
20852089
const SCEV *LoopExitCount =
@@ -2089,6 +2093,8 @@ bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes,
20892093
// Exit count, similarly, must be loop-invant that dominates the loop header.
20902094
if (LoopExitCount == SE->getCouldNotCompute() ||
20912095
!LoopExitCount->getType()->isIntOrPtrTy() ||
2096+
LoopExitCount->getType()->getScalarSizeInBits() >
2097+
CmpFuncSizeTy->getScalarSizeInBits() ||
20922098
!SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) {
20932099
LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n");
20942100
return false;

llvm/test/Transforms/LoopIdiom/bcmp-basic.ll

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,3 +1758,62 @@ cleanup:
17581758
%res = phi i1 [ false, %for.body ], [ true, %for.cond ], [ false, %entry ]
17591759
ret i1 %res
17601760
}
1761+
1762+
; With -m32:
1763+
; int index_wider_than_pointer(int* a, int* b, long long num) {
1764+
; for(long long i = 0; i < num; ++i) {
1765+
; if(a[i] != b[i])
1766+
; return 1;
1767+
; }
1768+
; return 0;
1769+
; }
1770+
define dso_local i64 @test(i64* %a, i64* %b, i128 %num) {
1771+
; CHECK-LABEL: @test(
1772+
; CHECK-NEXT: entry:
1773+
; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i128 [[NUM:%.*]], 0
1774+
; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[CLEANUP:%.*]]
1775+
; CHECK: for.body.preheader:
1776+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1777+
; CHECK: for.cond:
1778+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i128 [[INC:%.*]], [[NUM]]
1779+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT:%.*]]
1780+
; CHECK: for.body:
1781+
; CHECK-NEXT: [[I_010:%.*]] = phi i128 [ [[INC]], [[FOR_COND:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
1782+
; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i128 [[I_010]] to i64
1783+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[IDXPROM]]
1784+
; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]]
1785+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[IDXPROM]]
1786+
; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARRAYIDX2]]
1787+
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[TMP0]], [[TMP1]]
1788+
; CHECK-NEXT: [[INC]] = add nuw nsw i128 [[I_010]], 1
1789+
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP_LOOPEXIT]]
1790+
; CHECK: cleanup.loopexit:
1791+
; CHECK-NEXT: [[DOTPH:%.*]] = phi i64 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ]
1792+
; CHECK-NEXT: br label [[CLEANUP]]
1793+
; CHECK: cleanup:
1794+
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[DOTPH]], [[CLEANUP_LOOPEXIT]] ]
1795+
; CHECK-NEXT: ret i64 [[TMP2]]
1796+
;
1797+
entry:
1798+
%cmp9 = icmp sgt i128 %num, 0
1799+
br i1 %cmp9, label %for.body, label %cleanup
1800+
1801+
for.cond: ; preds = %for.body
1802+
%cmp = icmp slt i128 %inc, %num
1803+
br i1 %cmp, label %for.body, label %cleanup
1804+
1805+
for.body: ; preds = %entry, %for.cond
1806+
%i.010 = phi i128 [ %inc, %for.cond ], [ 0, %entry ]
1807+
%idxprom = trunc i128 %i.010 to i64
1808+
%arrayidx = getelementptr inbounds i64, i64* %a, i64 %idxprom
1809+
%0 = load i64, i64* %arrayidx
1810+
%arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %idxprom
1811+
%1 = load i64, i64* %arrayidx2
1812+
%cmp3 = icmp eq i64 %0, %1
1813+
%inc = add nuw nsw i128 %i.010, 1
1814+
br i1 %cmp3, label %for.cond, label %cleanup
1815+
1816+
cleanup: ; preds = %for.body, %for.cond, %entry
1817+
%2 = phi i64 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ]
1818+
ret i64 %2
1819+
}

0 commit comments

Comments
 (0)