Skip to content

Commit 1275e62

Browse files
authored
Merge pull request #41035 from beccadax/dont-take-this-literally
Don't import string macros with invalid UTF-8
2 parents d9b9afe + 4bd532a commit 1275e62

File tree

6 files changed

+33
-1
lines changed

6 files changed

+33
-1
lines changed

include/swift/Basic/Unicode.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ bool isSingleUnicodeScalar(StringRef S);
6868

6969
unsigned extractFirstUnicodeScalar(StringRef S);
7070

71+
/// Returns true if \p S does not contain any ill-formed subsequences. This does
72+
/// not check whether all of the characters in it are actually allocated or
73+
/// used correctly; it just checks that every byte can be grouped into a code
74+
/// unit (Unicode scalar).
75+
bool isWellFormedUTF8(StringRef S);
76+
7177
} // end namespace unicode
7278
} // end namespace swift
7379

lib/Basic/Unicode.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,8 @@ unsigned swift::unicode::extractFirstUnicodeScalar(StringRef S) {
123123
(void)Result;
124124
return Scalar;
125125
}
126+
127+
bool swift::unicode::isWellFormedUTF8(StringRef S) {
128+
const llvm::UTF8 *begin = S.bytes_begin();
129+
return llvm::isLegalUTF8String(&begin, S.bytes_end());
130+
}

lib/ClangImporter/ImportMacro.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "swift/AST/Stmt.h"
2323
#include "swift/AST/Types.h"
2424
#include "swift/Basic/PrettyStackTrace.h"
25+
#include "swift/Basic/Unicode.h"
2526
#include "swift/ClangImporter/ClangModule.h"
2627
#include "clang/AST/ASTContext.h"
2728
#include "clang/AST/Expr.h"
@@ -194,7 +195,11 @@ static ValueDecl *importStringLiteral(ClangImporter::Implementation &Impl,
194195
if (!importTy)
195196
return nullptr;
196197

197-
return Impl.createConstant(name, DC, importTy, parsed->getString(),
198+
StringRef text = parsed->getString();
199+
if (!unicode::isWellFormedUTF8(text))
200+
return nullptr;
201+
202+
return Impl.createConstant(name, DC, importTy, text,
198203
ConstantConvertKind::None, /*static*/ false,
199204
ClangN);
200205
}

lib/SIL/IR/SILInstructions.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,15 @@ StringLiteralInst::StringLiteralInst(SILDebugLocation Loc, StringRef Text,
10451045
SILNode::Bits.StringLiteralInst.TheEncoding = unsigned(encoding);
10461046
SILNode::Bits.StringLiteralInst.Length = Text.size();
10471047
memcpy(getTrailingObjects<char>(), Text.data(), Text.size());
1048+
1049+
// It is undefined behavior to feed ill-formed UTF-8 into `Swift.String`;
1050+
// however, the compiler creates string literals in many places, so there's a
1051+
// risk of a mistake. StringLiteralInsts can be optimized into
1052+
// IntegerLiteralInsts before reaching IRGen, so this constructor is the best
1053+
// chokepoint to validate *all* string literals that may eventually end up in
1054+
// a binary.
1055+
assert((encoding == Encoding::Bytes || unicode::isWellFormedUTF8(Text))
1056+
&& "Created StringLiteralInst with ill-formed UTF-8");
10481057
}
10491058

10501059
StringLiteralInst *StringLiteralInst::create(SILDebugLocation Loc,

test/ClangImporter/macros.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ func testCFString() -> String {
7474
return str
7575
}
7676

77+
func testInvalidStringLiterals() {
78+
// <rdar://67840900> - assertion/crash from importing a macro with a string
79+
// literal containing invalid UTF-8 characters
80+
_ = INVALID_UTF8_STRING // expected-error {{cannot find 'INVALID_UTF8_STRING' in scope}}
81+
}
82+
7783
func testInvalidIntegerLiterals() {
7884
var l1 = INVALID_INTEGER_LITERAL_1 // expected-error {{cannot find 'INVALID_INTEGER_LITERAL_1' in scope}}
7985
// FIXME: <rdar://problem/16445608> Swift should set up a DiagnosticConsumer for Clang

test/Inputs/clang-importer-sdk/usr/include/macros.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#define UTF8_STRING u8"Swift 🏃"
3636
#define OBJC_STRING @"Unicode! ✨"
3737
#define CF_STRING CFSTR("Swift")
38+
#define INVALID_UTF8_STRING "\xFF\xFF\xFF\xFF\xFF\xFF"
3839

3940
#define INVALID_INTEGER_LITERAL_1 10_9
4041
#define INVALID_INTEGER_LITERAL_2 10abc

0 commit comments

Comments
 (0)