Skip to content

Commit 22f0ebb

Browse files
authored
TargetLibraryInfo: Use pointer index size to determine getSizeTSize(). (#118747)
When using non-integral pointer types, such as on CHERI targets, size_t is equivalent to the index size, which is allowed to be smaller than the size of the pointer.
1 parent a67bd94 commit 22f0ebb

File tree

6 files changed

+58
-72
lines changed

6 files changed

+58
-72
lines changed

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#define LLVM_ANALYSIS_TARGETLIBRARYINFO_H
1111

1212
#include "llvm/ADT/DenseMap.h"
13+
#include "llvm/IR/Constants.h"
1314
#include "llvm/IR/InstrTypes.h"
15+
#include "llvm/IR/Module.h"
1416
#include "llvm/IR/PassManager.h"
1517
#include "llvm/Pass.h"
1618
#include "llvm/TargetParser/Triple.h"
@@ -565,6 +567,16 @@ class TargetLibraryInfo {
565567
/// \copydoc TargetLibraryInfoImpl::getSizeTSize()
566568
unsigned getSizeTSize(const Module &M) const { return Impl->getSizeTSize(M); }
567569

570+
/// Returns an IntegerType corresponding to size_t.
571+
IntegerType *getSizeTType(const Module &M) const {
572+
return IntegerType::get(M.getContext(), getSizeTSize(M));
573+
}
574+
575+
/// Returns a constant materialized as a size_t type.
576+
ConstantInt *getAsSizeT(uint64_t V, const Module &M) const {
577+
return ConstantInt::get(getSizeTType(M), V);
578+
}
579+
568580
/// \copydoc TargetLibraryInfoImpl::getIntSize()
569581
unsigned getIntSize() const {
570582
return Impl->getIntSize();

llvm/lib/Analysis/TargetLibraryInfo.cpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,19 +1459,16 @@ unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const {
14591459
}
14601460

14611461
unsigned TargetLibraryInfoImpl::getSizeTSize(const Module &M) const {
1462-
// There is really no guarantee that sizeof(size_t) is equal to sizeof(int*).
1463-
// If that isn't true then it should be possible to derive the SizeTTy from
1464-
// the target triple here instead and do an early return.
1465-
1466-
// Historically LLVM assume that size_t has same size as intptr_t (hence
1467-
// deriving the size from sizeof(int*) in address space zero). This should
1468-
// work for most targets. For future consideration: DataLayout also implement
1469-
// getIndexSizeInBits which might map better to size_t compared to
1470-
// getPointerSizeInBits. Hard coding address space zero here might be
1471-
// unfortunate as well. Maybe getDefaultGlobalsAddressSpace() or
1472-
// getAllocaAddrSpace() is better.
1473-
unsigned AddressSpace = 0;
1474-
return M.getDataLayout().getPointerSizeInBits(AddressSpace);
1462+
// There is really no guarantee that sizeof(size_t) is equal to the index
1463+
// size of the default address space. If that isn't true then it should be
1464+
// possible to derive the SizeTTy from the target triple here instead and do
1465+
// an early return.
1466+
1467+
// Hard coding address space zero may seem unfortunate, but a number of
1468+
// configurations of common targets (i386, x86-64 x32, aarch64 x32, possibly
1469+
// others) have larger-than-size_t index sizes on non-default address spaces,
1470+
// making this the best default.
1471+
return M.getDataLayout().getIndexSizeInBits(/*AddressSpace=*/0);
14751472
}
14761473

14771474
TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

Lines changed: 29 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -397,9 +397,8 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
397397

398398
// We have enough information to now generate the memcpy call to do the
399399
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
400-
B.CreateMemCpy(
401-
CpyDst, Align(1), Src, Align(1),
402-
ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));
400+
B.CreateMemCpy(CpyDst, Align(1), Src, Align(1),
401+
TLI->getAsSizeT(Len + 1, *B.GetInsertBlock()->getModule()));
403402
return Dst;
404403
}
405404

@@ -590,26 +589,21 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {
590589
if (Len1 && Len2) {
591590
return copyFlags(
592591
*CI, emitMemCmp(Str1P, Str2P,
593-
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
594-
std::min(Len1, Len2)),
592+
TLI->getAsSizeT(std::min(Len1, Len2), *CI->getModule()),
595593
B, DL, TLI));
596594
}
597595

598596
// strcmp to memcmp
599597
if (!HasStr1 && HasStr2) {
600598
if (canTransformToMemCmp(CI, Str1P, Len2, DL))
601-
return copyFlags(
602-
*CI,
603-
emitMemCmp(Str1P, Str2P,
604-
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
605-
B, DL, TLI));
599+
return copyFlags(*CI, emitMemCmp(Str1P, Str2P,
600+
TLI->getAsSizeT(Len2, *CI->getModule()),
601+
B, DL, TLI));
606602
} else if (HasStr1 && !HasStr2) {
607603
if (canTransformToMemCmp(CI, Str2P, Len1, DL))
608-
return copyFlags(
609-
*CI,
610-
emitMemCmp(Str1P, Str2P,
611-
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
612-
B, DL, TLI));
604+
return copyFlags(*CI, emitMemCmp(Str1P, Str2P,
605+
TLI->getAsSizeT(Len1, *CI->getModule()),
606+
B, DL, TLI));
613607
}
614608

615609
annotateNonNullNoUndefBasedOnAccess(CI, {0, 1});
@@ -676,19 +670,15 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) {
676670
if (!HasStr1 && HasStr2) {
677671
Len2 = std::min(Len2, Length);
678672
if (canTransformToMemCmp(CI, Str1P, Len2, DL))
679-
return copyFlags(
680-
*CI,
681-
emitMemCmp(Str1P, Str2P,
682-
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len2),
683-
B, DL, TLI));
673+
return copyFlags(*CI, emitMemCmp(Str1P, Str2P,
674+
TLI->getAsSizeT(Len2, *CI->getModule()),
675+
B, DL, TLI));
684676
} else if (HasStr1 && !HasStr2) {
685677
Len1 = std::min(Len1, Length);
686678
if (canTransformToMemCmp(CI, Str2P, Len1, DL))
687-
return copyFlags(
688-
*CI,
689-
emitMemCmp(Str1P, Str2P,
690-
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len1),
691-
B, DL, TLI));
679+
return copyFlags(*CI, emitMemCmp(Str1P, Str2P,
680+
TLI->getAsSizeT(Len1, *CI->getModule()),
681+
B, DL, TLI));
692682
}
693683

694684
return nullptr;
@@ -722,15 +712,13 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
722712

723713
// We have enough information to now generate the memcpy call to do the
724714
// copy for us. Make a memcpy to copy the nul byte with align = 1.
725-
CallInst *NewCI =
726-
B.CreateMemCpy(Dst, Align(1), Src, Align(1),
727-
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
715+
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
716+
TLI->getAsSizeT(Len, *CI->getModule()));
728717
mergeAttributesAndFlags(NewCI, *CI);
729718
return Dst;
730719
}
731720

732721
Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
733-
Function *Callee = CI->getCalledFunction();
734722
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
735723

736724
// stpcpy(d,s) -> strcpy(d,s) if the result is not used.
@@ -749,10 +737,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
749737
else
750738
return nullptr;
751739

752-
Type *PT = Callee->getFunctionType()->getParamType(0);
753-
Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
740+
Value *LenV = TLI->getAsSizeT(Len, *CI->getModule());
754741
Value *DstEnd = B.CreateInBoundsGEP(
755-
B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
742+
B.getInt8Ty(), Dst, TLI->getAsSizeT(Len - 1, *CI->getModule()));
756743

757744
// We have enough information to now generate the memcpy call to do the
758745
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -819,13 +806,11 @@ Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
819806
return ConstantInt::get(CI->getType(), 0);
820807
}
821808

822-
Function *Callee = CI->getCalledFunction();
823-
Type *PT = Callee->getFunctionType()->getParamType(0);
824809
// Transform strlcpy(D, S, N) to memcpy(D, S, N') where N' is the lower
825810
// bound on strlen(S) + 1 and N, optionally followed by a nul store to
826811
// D[N' - 1] if necessary.
827812
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
828-
ConstantInt::get(DL.getIntPtrType(PT), NBytes));
813+
TLI->getAsSizeT(NBytes, *CI->getModule()));
829814
mergeAttributesAndFlags(NewCI, *CI);
830815

831816
if (!NulTerm) {
@@ -844,7 +829,6 @@ Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
844829
// otherwise.
845830
Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
846831
IRBuilderBase &B) {
847-
Function *Callee = CI->getCalledFunction();
848832
Value *Dst = CI->getArgOperand(0);
849833
Value *Src = CI->getArgOperand(1);
850834
Value *Size = CI->getArgOperand(2);
@@ -922,11 +906,10 @@ Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
922906
/*M=*/nullptr, /*AddNull=*/false);
923907
}
924908

925-
Type *PT = Callee->getFunctionType()->getParamType(0);
926909
// st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
927910
// S and N are constant.
928911
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
929-
ConstantInt::get(DL.getIntPtrType(PT), N));
912+
TLI->getAsSizeT(N, *CI->getModule()));
930913
mergeAttributesAndFlags(NewCI, *CI);
931914
if (!RetEnd)
932915
return Dst;
@@ -3438,10 +3421,9 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
34383421
return nullptr; // we found a format specifier, bail out.
34393422

34403423
// sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
3441-
B.CreateMemCpy(
3442-
Dest, Align(1), CI->getArgOperand(1), Align(1),
3443-
ConstantInt::get(DL.getIntPtrType(CI->getContext()),
3444-
FormatStr.size() + 1)); // Copy the null byte.
3424+
B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(1), Align(1),
3425+
// Copy the null byte.
3426+
TLI->getAsSizeT(FormatStr.size() + 1, *CI->getModule()));
34453427
return ConstantInt::get(CI->getType(), FormatStr.size());
34463428
}
34473429

@@ -3476,9 +3458,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
34763458

34773459
uint64_t SrcLen = GetStringLength(CI->getArgOperand(2));
34783460
if (SrcLen) {
3479-
B.CreateMemCpy(
3480-
Dest, Align(1), CI->getArgOperand(2), Align(1),
3481-
ConstantInt::get(DL.getIntPtrType(CI->getContext()), SrcLen));
3461+
B.CreateMemCpy(Dest, Align(1), CI->getArgOperand(2), Align(1),
3462+
TLI->getAsSizeT(SrcLen, *CI->getModule()));
34823463
// Returns total number of characters written without null-character.
34833464
return ConstantInt::get(CI->getType(), SrcLen - 1);
34843465
} else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
@@ -3576,11 +3557,8 @@ Value *LibCallSimplifier::emitSnPrintfMemCpy(CallInst *CI, Value *StrArg,
35763557
Value *DstArg = CI->getArgOperand(0);
35773558
if (NCopy && StrArg)
35783559
// Transform the call to lvm.memcpy(dst, fmt, N).
3579-
copyFlags(
3580-
*CI,
3581-
B.CreateMemCpy(
3582-
DstArg, Align(1), StrArg, Align(1),
3583-
ConstantInt::get(DL.getIntPtrType(CI->getContext()), NCopy)));
3560+
copyFlags(*CI, B.CreateMemCpy(DstArg, Align(1), StrArg, Align(1),
3561+
TLI->getAsSizeT(NCopy, *CI->getModule())));
35843562

35853563
if (N > Str.size())
35863564
// Return early when the whole format string, including the final nul,
@@ -3696,11 +3674,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
36963674
if (FormatStr.contains('%'))
36973675
return nullptr; // We found a format specifier.
36983676

3699-
unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
3700-
Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
37013677
return copyFlags(
37023678
*CI, emitFWrite(CI->getArgOperand(1),
3703-
ConstantInt::get(SizeTTy, FormatStr.size()),
3679+
TLI->getAsSizeT(FormatStr.size(), *CI->getModule()),
37043680
CI->getArgOperand(0), B, DL, TLI));
37053681
}
37063682

llvm/test/Transforms/InstCombine/stdio-custom-dl.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ target datalayout = "e-m:o-p:40:64:64:32-i64:64-f80:128-n8:16:32:64-S128"
88
@.str.1 = private unnamed_addr constant [2 x i8] c"w\00", align 1
99
@.str.2 = private unnamed_addr constant [4 x i8] c"str\00", align 1
1010

11-
; Check fwrite is generated with arguments of ptr size, not index size
11+
;; Check fwrite is generated with arguments of index size, not ptr size
12+
1213
define internal void @fputs_test_custom_dl() {
1314
; CHECK-LABEL: @fputs_test_custom_dl(
1415
; CHECK-NEXT: [[CALL:%.*]] = call ptr @fopen(ptr nonnull @.str, ptr nonnull @.str.1)
15-
; CHECK-NEXT: [[TMP1:%.*]] = call i40 @fwrite(ptr nonnull @.str.2, i40 3, i40 1, ptr [[CALL]])
16+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @fwrite(ptr nonnull @.str.2, i32 3, i32 1, ptr %call)
1617
; CHECK-NEXT: ret void
1718
;
1819
%call = call ptr @fopen(ptr @.str, ptr @.str.1)

llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ define void @test_strncpy_to_memcpy(ptr addrspace(200) %dst) addrspace(200) noun
5252
; CHECK-LABEL: define {{[^@]+}}@test_strncpy_to_memcpy
5353
; CHECK-SAME: (ptr addrspace(200) [[DST:%.*]]) addrspace(200) #[[ATTR1]] {
5454
; CHECK-NEXT: entry:
55-
; CHECK-NEXT: call addrspace(200) void @llvm.memcpy.p200.p200.i128(ptr addrspace(200) noundef align 1 dereferenceable(17) [[DST]], ptr addrspace(200) noundef align 1 dereferenceable(17) @str, i128 17, i1 false)
55+
; CHECK-NEXT: call addrspace(200) void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef align 1 dereferenceable(17) [[DST]], ptr addrspace(200) noundef align 1 dereferenceable(17) @str, i64 17, i1 false)
5656
; CHECK-NEXT: ret void
5757
;
5858
entry:
@@ -64,7 +64,7 @@ define void @test_stpncpy_to_memcpy(ptr addrspace(200) %dst) addrspace(200) noun
6464
; CHECK-LABEL: define {{[^@]+}}@test_stpncpy_to_memcpy
6565
; CHECK-SAME: (ptr addrspace(200) [[DST:%.*]]) addrspace(200) #[[ATTR1]] {
6666
; CHECK-NEXT: entry:
67-
; CHECK-NEXT: call addrspace(200) void @llvm.memcpy.p200.p200.i128(ptr addrspace(200) noundef align 1 dereferenceable(17) [[DST]], ptr addrspace(200) noundef align 1 dereferenceable(17) @str, i128 17, i1 false)
67+
; CHECK-NEXT: call addrspace(200) void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) noundef align 1 dereferenceable(17) [[DST]], ptr addrspace(200) noundef align 1 dereferenceable(17) @str, i64 17, i1 false)
6868
; CHECK-NEXT: ret void
6969
;
7070
entry:

llvm/test/Transforms/MergeICmps/X86/distinct-index-width-crash.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ target triple = "x86_64"
88
target datalayout = "e-p:64:64:64:32"
99

1010
; Define a cunstom data layout that has index width < pointer width
11-
; and make sure that doesn't mreak anything
11+
; and make sure that doesn't break anything
1212
define void @fat_ptrs(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b) {
1313
; CHECK-LABEL: @fat_ptrs(
1414
; CHECK-NEXT: bb0:
1515
; CHECK-NEXT: [[PTR_A1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A:%.*]], i32 0, i32 1
1616
; CHECK-NEXT: [[PTR_B1:%.*]] = getelementptr inbounds [2 x i64], ptr [[B:%.*]], i32 0, i32 1
1717
; CHECK-NEXT: br label %"bb1+bb2"
1818
; CHECK: "bb1+bb2":
19-
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[A]], ptr [[B]], i64 16)
19+
; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[A]], ptr [[B]], i32 16)
2020
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
2121
; CHECK-NEXT: br label [[BB3:%.*]]
2222
; CHECK: bb3:

0 commit comments

Comments
 (0)