Skip to content

Commit e12e02d

Browse files
Michael Benfieldgburgessiv
authored andcommitted
[clang] Evaluate strlen of strcpy argument for -Wfortify-source.
Also introduce Expr::tryEvaluateStrLen. Differential Revision: https://reviews.llvm.org/D104887
1 parent 6929bd6 commit e12e02d

File tree

6 files changed

+170
-111
lines changed

6 files changed

+170
-111
lines changed

clang/include/clang/AST/Expr.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,12 @@ class Expr : public ValueStmt {
740740
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
741741
unsigned Type) const;
742742

743+
/// If the current Expr is a pointer, this will try to statically
744+
/// determine the strlen of the string pointed to.
745+
/// Returns true if all of the above holds and we were able to figure out the
746+
/// strlen, false otherwise.
747+
bool tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const;
748+
743749
/// Enumeration used to describe the kind of Null pointer constant
744750
/// returned from \c isNullPointerConstant().
745751
enum NullPointerConstantKind {

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,11 @@ def warn_fortify_source_size_mismatch : Warning<
816816
"'%0' size argument is too large; destination buffer has size %1,"
817817
" but size argument is %2">, InGroup<FortifySource>;
818818

819+
def warn_fortify_strlen_overflow: Warning<
820+
"'%0' will always overflow; destination buffer has size %1,"
821+
" but the source string has length %2 (including NUL byte)">,
822+
InGroup<FortifySource>;
823+
819824
def warn_fortify_source_format_overflow : Warning<
820825
"'%0' will always overflow; destination buffer has size %1,"
821826
" but format string expands to at least %2">,

clang/lib/AST/ExprConstant.cpp

Lines changed: 61 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,6 +1826,8 @@ static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info);
18261826
static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result,
18271827
EvalInfo &Info);
18281828
static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
1829+
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
1830+
EvalInfo &Info);
18291831

18301832
/// Evaluate an integer or fixed point expression into an APResult.
18311833
static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
@@ -11836,46 +11838,10 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
1183611838
case Builtin::BI__builtin_wcslen: {
1183711839
// As an extension, we support __builtin_strlen() as a constant expression,
1183811840
// and support folding strlen() to a constant.
11839-
LValue String;
11840-
if (!EvaluatePointer(E->getArg(0), String, Info))
11841-
return false;
11842-
11843-
QualType CharTy = E->getArg(0)->getType()->getPointeeType();
11844-
11845-
// Fast path: if it's a string literal, search the string value.
11846-
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
11847-
String.getLValueBase().dyn_cast<const Expr *>())) {
11848-
// The string literal may have embedded null characters. Find the first
11849-
// one and truncate there.
11850-
StringRef Str = S->getBytes();
11851-
int64_t Off = String.Offset.getQuantity();
11852-
if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
11853-
S->getCharByteWidth() == 1 &&
11854-
// FIXME: Add fast-path for wchar_t too.
11855-
Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
11856-
Str = Str.substr(Off);
11857-
11858-
StringRef::size_type Pos = Str.find(0);
11859-
if (Pos != StringRef::npos)
11860-
Str = Str.substr(0, Pos);
11861-
11862-
return Success(Str.size(), E);
11863-
}
11864-
11865-
// Fall through to slow path to issue appropriate diagnostic.
11866-
}
11867-
11868-
// Slow path: scan the bytes of the string looking for the terminating 0.
11869-
for (uint64_t Strlen = 0; /**/; ++Strlen) {
11870-
APValue Char;
11871-
if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
11872-
!Char.isInt())
11873-
return false;
11874-
if (!Char.getInt())
11875-
return Success(Strlen, E);
11876-
if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
11877-
return false;
11878-
}
11841+
uint64_t StrLen;
11842+
if (EvaluateBuiltinStrLen(E->getArg(0), StrLen, Info))
11843+
return Success(StrLen, E);
11844+
return false;
1187911845
}
1188011846

1188111847
case Builtin::BIstrcmp:
@@ -15736,3 +15702,58 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
1573615702
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
1573715703
return tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
1573815704
}
15705+
15706+
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
15707+
EvalInfo &Info) {
15708+
if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
15709+
return false;
15710+
15711+
LValue String;
15712+
15713+
if (!EvaluatePointer(E, String, Info))
15714+
return false;
15715+
15716+
QualType CharTy = E->getType()->getPointeeType();
15717+
15718+
// Fast path: if it's a string literal, search the string value.
15719+
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>(
15720+
String.getLValueBase().dyn_cast<const Expr *>())) {
15721+
StringRef Str = S->getBytes();
15722+
int64_t Off = String.Offset.getQuantity();
15723+
if (Off >= 0 && (uint64_t)Off <= (uint64_t)Str.size() &&
15724+
S->getCharByteWidth() == 1 &&
15725+
// FIXME: Add fast-path for wchar_t too.
15726+
Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) {
15727+
Str = Str.substr(Off);
15728+
15729+
StringRef::size_type Pos = Str.find(0);
15730+
if (Pos != StringRef::npos)
15731+
Str = Str.substr(0, Pos);
15732+
15733+
Result = Str.size();
15734+
return true;
15735+
}
15736+
15737+
// Fall through to slow path.
15738+
}
15739+
15740+
// Slow path: scan the bytes of the string looking for the terminating 0.
15741+
for (uint64_t Strlen = 0; /**/; ++Strlen) {
15742+
APValue Char;
15743+
if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) ||
15744+
!Char.isInt())
15745+
return false;
15746+
if (!Char.getInt()) {
15747+
Result = Strlen;
15748+
return true;
15749+
}
15750+
if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
15751+
return false;
15752+
}
15753+
}
15754+
15755+
bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const {
15756+
Expr::EvalStatus Status;
15757+
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
15758+
return EvaluateBuiltinStrLen(this, Result, Info);
15759+
}

clang/lib/Sema/SemaChecking.cpp

Lines changed: 77 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -588,14 +588,8 @@ class EstimateSizeFormatHandler
588588

589589
} // namespace
590590

591-
/// Check a call to BuiltinID for buffer overflows. If BuiltinID is a
592-
/// __builtin_*_chk function, then use the object size argument specified in the
593-
/// source. Otherwise, infer the object size using __builtin_object_size.
594591
void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
595592
CallExpr *TheCall) {
596-
// FIXME: There are some more useful checks we could be doing here:
597-
// - Evaluate strlen of strcpy arguments, use as object size.
598-
599593
if (TheCall->isValueDependent() || TheCall->isTypeDependent() ||
600594
isConstantEvaluated())
601595
return;
@@ -607,13 +601,66 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
607601
const TargetInfo &TI = getASTContext().getTargetInfo();
608602
unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType());
609603

604+
auto ComputeExplicitObjectSizeArgument =
605+
[&](unsigned Index) -> Optional<llvm::APSInt> {
606+
Expr::EvalResult Result;
607+
Expr *SizeArg = TheCall->getArg(Index);
608+
if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
609+
return llvm::None;
610+
return Result.Val.getInt();
611+
};
612+
613+
auto ComputeSizeArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
614+
// If the parameter has a pass_object_size attribute, then we should use its
615+
// (potentially) more strict checking mode. Otherwise, conservatively assume
616+
// type 0.
617+
int BOSType = 0;
618+
if (const auto *POS =
619+
FD->getParamDecl(Index)->getAttr<PassObjectSizeAttr>())
620+
BOSType = POS->getType();
621+
622+
const Expr *ObjArg = TheCall->getArg(Index);
623+
uint64_t Result;
624+
if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
625+
return llvm::None;
626+
627+
// Get the object size in the target's size_t width.
628+
return llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
629+
};
630+
631+
auto ComputeStrLenArgument = [&](unsigned Index) -> Optional<llvm::APSInt> {
632+
Expr *ObjArg = TheCall->getArg(Index);
633+
uint64_t Result;
634+
if (!ObjArg->tryEvaluateStrLen(Result, getASTContext()))
635+
return llvm::None;
636+
// Add 1 for null byte.
637+
return llvm::APSInt::getUnsigned(Result + 1).extOrTrunc(SizeTypeWidth);
638+
};
639+
640+
Optional<llvm::APSInt> SourceSize;
641+
Optional<llvm::APSInt> DestinationSize;
610642
unsigned DiagID = 0;
611643
bool IsChkVariant = false;
612-
Optional<llvm::APSInt> UsedSize;
613-
unsigned SizeIndex, ObjectIndex;
644+
614645
switch (BuiltinID) {
615646
default:
616647
return;
648+
case Builtin::BI__builtin_strcpy:
649+
case Builtin::BIstrcpy: {
650+
DiagID = diag::warn_fortify_strlen_overflow;
651+
SourceSize = ComputeStrLenArgument(1);
652+
DestinationSize = ComputeSizeArgument(0);
653+
break;
654+
}
655+
656+
case Builtin::BI__builtin___strcpy_chk: {
657+
DiagID = diag::warn_fortify_strlen_overflow;
658+
SourceSize = ComputeStrLenArgument(1);
659+
DestinationSize = ComputeExplicitObjectSizeArgument(2);
660+
IsChkVariant = true;
661+
break;
662+
}
663+
617664
case Builtin::BIsprintf:
618665
case Builtin::BI__builtin___sprintf_chk: {
619666
size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3;
@@ -639,14 +686,13 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
639686
H, FormatBytes, FormatBytes + StrLen, getLangOpts(),
640687
Context.getTargetInfo(), false)) {
641688
DiagID = diag::warn_fortify_source_format_overflow;
642-
UsedSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
643-
.extOrTrunc(SizeTypeWidth);
689+
SourceSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound())
690+
.extOrTrunc(SizeTypeWidth);
644691
if (BuiltinID == Builtin::BI__builtin___sprintf_chk) {
692+
DestinationSize = ComputeExplicitObjectSizeArgument(2);
645693
IsChkVariant = true;
646-
ObjectIndex = 2;
647694
} else {
648-
IsChkVariant = false;
649-
ObjectIndex = 0;
695+
DestinationSize = ComputeSizeArgument(0);
650696
}
651697
break;
652698
}
@@ -664,18 +710,19 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
664710
case Builtin::BI__builtin___memccpy_chk:
665711
case Builtin::BI__builtin___mempcpy_chk: {
666712
DiagID = diag::warn_builtin_chk_overflow;
713+
SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 2);
714+
DestinationSize =
715+
ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
667716
IsChkVariant = true;
668-
SizeIndex = TheCall->getNumArgs() - 2;
669-
ObjectIndex = TheCall->getNumArgs() - 1;
670717
break;
671718
}
672719

673720
case Builtin::BI__builtin___snprintf_chk:
674721
case Builtin::BI__builtin___vsnprintf_chk: {
675722
DiagID = diag::warn_builtin_chk_overflow;
723+
SourceSize = ComputeExplicitObjectSizeArgument(1);
724+
DestinationSize = ComputeExplicitObjectSizeArgument(3);
676725
IsChkVariant = true;
677-
SizeIndex = 1;
678-
ObjectIndex = 3;
679726
break;
680727
}
681728

@@ -691,8 +738,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
691738
// size larger than the destination buffer though; this is a runtime abort
692739
// in _FORTIFY_SOURCE mode, and is quite suspicious otherwise.
693740
DiagID = diag::warn_fortify_source_size_mismatch;
694-
SizeIndex = TheCall->getNumArgs() - 1;
695-
ObjectIndex = 0;
741+
SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
742+
DestinationSize = ComputeSizeArgument(0);
696743
break;
697744
}
698745

@@ -705,59 +752,23 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
705752
case Builtin::BImempcpy:
706753
case Builtin::BI__builtin_mempcpy: {
707754
DiagID = diag::warn_fortify_source_overflow;
708-
SizeIndex = TheCall->getNumArgs() - 1;
709-
ObjectIndex = 0;
755+
SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1);
756+
DestinationSize = ComputeSizeArgument(0);
710757
break;
711758
}
712759
case Builtin::BIsnprintf:
713760
case Builtin::BI__builtin_snprintf:
714761
case Builtin::BIvsnprintf:
715762
case Builtin::BI__builtin_vsnprintf: {
716763
DiagID = diag::warn_fortify_source_size_mismatch;
717-
SizeIndex = 1;
718-
ObjectIndex = 0;
764+
SourceSize = ComputeExplicitObjectSizeArgument(1);
765+
DestinationSize = ComputeSizeArgument(0);
719766
break;
720767
}
721768
}
722769

723-
llvm::APSInt ObjectSize;
724-
// For __builtin___*_chk, the object size is explicitly provided by the caller
725-
// (usually using __builtin_object_size). Use that value to check this call.
726-
if (IsChkVariant) {
727-
Expr::EvalResult Result;
728-
Expr *SizeArg = TheCall->getArg(ObjectIndex);
729-
if (!SizeArg->EvaluateAsInt(Result, getASTContext()))
730-
return;
731-
ObjectSize = Result.Val.getInt();
732-
733-
// Otherwise, try to evaluate an imaginary call to __builtin_object_size.
734-
} else {
735-
// If the parameter has a pass_object_size attribute, then we should use its
736-
// (potentially) more strict checking mode. Otherwise, conservatively assume
737-
// type 0.
738-
int BOSType = 0;
739-
if (const auto *POS =
740-
FD->getParamDecl(ObjectIndex)->getAttr<PassObjectSizeAttr>())
741-
BOSType = POS->getType();
742-
743-
Expr *ObjArg = TheCall->getArg(ObjectIndex);
744-
uint64_t Result;
745-
if (!ObjArg->tryEvaluateObjectSize(Result, getASTContext(), BOSType))
746-
return;
747-
// Get the object size in the target's size_t width.
748-
ObjectSize = llvm::APSInt::getUnsigned(Result).extOrTrunc(SizeTypeWidth);
749-
}
750-
751-
// Evaluate the number of bytes of the object that this call will use.
752-
if (!UsedSize) {
753-
Expr::EvalResult Result;
754-
Expr *UsedSizeArg = TheCall->getArg(SizeIndex);
755-
if (!UsedSizeArg->EvaluateAsInt(Result, getASTContext()))
756-
return;
757-
UsedSize = Result.Val.getInt().extOrTrunc(SizeTypeWidth);
758-
}
759-
760-
if (UsedSize.getValue().ule(ObjectSize))
770+
if (!SourceSize || !DestinationSize ||
771+
SourceSize.getValue().ule(DestinationSize.getValue()))
761772
return;
762773

763774
StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID);
@@ -770,10 +781,13 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD,
770781
FunctionName = FunctionName.drop_front(std::strlen("__builtin_"));
771782
}
772783

784+
SmallString<16> DestinationStr;
785+
SmallString<16> SourceStr;
786+
DestinationSize->toString(DestinationStr, /*Radix=*/10);
787+
SourceSize->toString(SourceStr, /*Radix=*/10);
773788
DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall,
774789
PDiag(DiagID)
775-
<< FunctionName << toString(ObjectSize, /*Radix=*/10)
776-
<< toString(UsedSize.getValue(), /*Radix=*/10));
790+
<< FunctionName << DestinationStr << SourceStr);
777791
}
778792

779793
static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall,

0 commit comments

Comments
 (0)