Skip to content

Commit ac512d4

Browse files
committed
[libSyntax] Add a reference counted version of OwnedString
We cannot use unowned strings for token texts of incrementally parsed syntax trees since the source buffer to which reused nodes refer will have been freed for reused nodes. Always copying the token text whenever OwnedString is passed is too expensive. A reference counted copy of the string allows us to keep the token's string alive across incremental parses while eliminating unnecessary copies.
1 parent a03749a commit ac512d4

File tree

10 files changed

+127
-268
lines changed

10 files changed

+127
-268
lines changed

include/swift/Basic/OwnedString.h

Lines changed: 51 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -22,117 +22,89 @@
2222

2323
#include "llvm/ADT/IntrusiveRefCntPtr.h"
2424
#include "llvm/ADT/StringRef.h"
25+
#include "llvm/Support/TrailingObjects.h"
2526

2627
using llvm::StringRef;
2728

2829
namespace swift {
2930

30-
enum class StringOwnership {
31-
/// An OwnedString holds a weak reference to the underlying string storage
32-
/// and will never attempt to free it.
33-
Unowned,
34-
35-
/// An OwnedString has its own copy of the underlying string storage and
36-
/// will free the storage upon its destruction.
37-
Copied,
38-
};
39-
4031
/// Holds a string - either statically allocated or dynamically allocated
4132
/// and owned by this type.
4233
class OwnedString {
43-
const char *Data;
44-
size_t Length;
45-
StringOwnership Ownership = StringOwnership::Unowned;
46-
47-
void release() {
48-
if (Ownership == StringOwnership::Copied)
49-
free(const_cast<char *>(Data));
50-
}
34+
/// An owner that keeps the buffer of a ref counted \c OwnedString alive.
35+
class TextOwner final : public llvm::ThreadSafeRefCountedBase<TextOwner>,
36+
public llvm::TrailingObjects<TextOwner, char> {
37+
TextOwner(StringRef Text) {
38+
std::uninitialized_copy(Text.begin(), Text.end(),
39+
getTrailingObjects<char>());
40+
}
5141

52-
void initialize(const char* Data, size_t Length, StringOwnership Ownership) {
53-
this->Length = Length;
54-
this->Ownership = Ownership;
55-
if (Ownership == StringOwnership::Copied && Data) {
56-
char *substring = static_cast<char *>(malloc(Length + 1));
57-
assert(substring && "expected successful malloc of copy");
42+
public:
43+
static TextOwner *make(StringRef Text) {
44+
auto size = totalSizeToAlloc<char>(Text.size());
45+
void *data = ::operator new(size);
46+
return new (data) TextOwner(Text);
47+
}
5848

59-
memcpy(substring, Data, Length);
60-
substring[Length] = '\0';
49+
const char *getText() const { return getTrailingObjects<char>(); }
50+
};
6151

62-
this->Data = substring;
63-
}
64-
else
65-
this->Data = Data;
66-
}
67-
OwnedString(const char* Data, size_t Length, StringOwnership Ownership) {
68-
initialize(Data, Length, Ownership);
69-
}
70-
public:
71-
OwnedString(): OwnedString(nullptr, 0, StringOwnership::Unowned) {}
52+
/// The text this owned string represents
53+
StringRef Text;
7254

73-
OwnedString(const char *Data, size_t Length):
74-
OwnedString(Data, Length, StringOwnership::Copied) {}
55+
/// In case of a ref counted string an owner that keeps the buffer \c Text
56+
/// references alive.
57+
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr;
7558

76-
OwnedString(StringRef Str) : OwnedString(Str.data(), Str.size()) {}
59+
OwnedString(StringRef Text, llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr)
60+
: Text(Text), OwnedPtr(OwnedPtr) {}
7761

78-
OwnedString(const char *Data) : OwnedString(StringRef(Data)) {}
62+
public:
63+
OwnedString() : OwnedString(/*Text=*/StringRef(), /*OwnedPtr=*/nullptr) {}
7964

80-
OwnedString(const OwnedString &Other):
81-
OwnedString(Other.Data, Other.Length, Other.Ownership) {}
65+
/// Create a ref counted \c OwnedString that is initialized with the text of
66+
/// the given \c StringRef.
67+
OwnedString(StringRef Str) : OwnedString(makeRefCounted(Str)) {}
8268

83-
OwnedString(OwnedString &&Other): Data(Other.Data), Length(Other.Length),
84-
Ownership(Other.Ownership) {
85-
Other.Data = nullptr;
86-
Other.Ownership = StringOwnership::Unowned;
87-
}
69+
/// Create a ref counted \c OwnedString that is initialized with the text of
70+
/// the given buffer.
71+
OwnedString(const char *Str) : OwnedString(StringRef(Str)) {}
8872

89-
OwnedString& operator=(const OwnedString &Other) {
90-
if (&Other != this) {
91-
release();
92-
initialize(Other.Data, Other.Length, Other.Ownership);
93-
}
94-
return *this;
73+
/// Create an \c OwnedString that references the given string. The
74+
/// \c OwnedString will not take ownership of that buffer and will assume that
75+
/// the buffer outlives its lifetime.
76+
static OwnedString makeUnowned(StringRef Str) {
77+
return OwnedString(Str, /*OwnedPtr=*/nullptr);
9578
}
9679

97-
OwnedString& operator=(OwnedString &&Other) {
98-
if (&Other != this) {
99-
release();
100-
this->Data = Other.Data;
101-
this->Length = Other.Length;
102-
this->Ownership = Other.Ownership;
103-
Other.Ownership = StringOwnership::Unowned;
104-
Other.Data = nullptr;
80+
/// Create an \c OwnedString that keeps its contents in a reference counted
81+
/// buffer. The contents of \p Str will be copied initially and are allowed to
82+
/// be disposed after the \c OwnedString has been created.
83+
static OwnedString makeRefCounted(StringRef Str) {
84+
if (Str.empty()) {
85+
// Copying an empty string doesn't make sense. Just create an unowned
86+
// string that points to the empty string.
87+
return makeUnowned(Str);
88+
} else {
89+
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr(TextOwner::make(Str));
90+
return OwnedString(StringRef(OwnedPtr->getText(), Str.size()),
91+
std::move(OwnedPtr));
10592
}
106-
return *this;
107-
}
108-
109-
OwnedString copy() const {
110-
return OwnedString(Data, Length, StringOwnership::Copied);
11193
}
11294

11395
/// Returns the length of the string in bytes.
114-
size_t size() const {
115-
return Length;
116-
}
96+
size_t size() const { return Text.size(); }
11797

11898
/// Returns true if the length is 0.
119-
bool empty() const {
120-
return Length == 0;
121-
}
99+
bool empty() const { return size() == 0; }
122100

123101
/// Returns a StringRef to the underlying data. No copy is made and no
124102
/// ownership changes take place.
125-
StringRef str() const {
126-
return StringRef { Data, Length };
127-
}
103+
StringRef str() const { return Text; }
128104

129105
bool operator==(const OwnedString &Right) const {
130106
return str() == Right.str();
131107
}
132-
133-
~OwnedString() {
134-
release();
135-
}
136108
};
137109

138110
} // end namespace swift

include/swift/Syntax/RawSyntax.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,12 +404,17 @@ class RawSyntax final
404404
return static_cast<tok>(Bits.Token.TokenKind);
405405
}
406406

407-
/// Return the text of the token.
408-
StringRef getTokenText() const {
407+
/// Return the text of the token as an \c OwnedString. Keeping a reference to
408+
/// this string will keep it alive even if the syntax node gets freed.
409+
OwnedString getOwnedTokenText() const {
409410
assert(isToken());
410-
return getTrailingObjects<OwnedString>()->str();
411+
return *getTrailingObjects<OwnedString>();
411412
}
412413

414+
/// Return the text of the token as a reference. The referenced buffer may
415+
/// disappear when the syntax node gets freed.
416+
StringRef getTokenText() const { return getOwnedTokenText().str(); }
417+
413418
/// Return the leading trivia list of the token.
414419
ArrayRef<TriviaPiece> getLeadingTrivia() const {
415420
assert(isToken());
@@ -434,7 +439,7 @@ class RawSyntax final
434439
/// trivia instead.
435440
RC<RawSyntax>
436441
withLeadingTrivia(ArrayRef<TriviaPiece> NewLeadingTrivia) const {
437-
return make(getTokenKind(), getTokenText(), NewLeadingTrivia,
442+
return make(getTokenKind(), getOwnedTokenText(), NewLeadingTrivia,
438443
getTrailingTrivia(), getPresence());
439444
}
440445

@@ -446,7 +451,7 @@ class RawSyntax final
446451
/// trivia instead.
447452
RC<RawSyntax>
448453
withTrailingTrivia(ArrayRef<TriviaPiece> NewTrailingTrivia) const {
449-
return make(getTokenKind(), getTokenText(), getLeadingTrivia(),
454+
return make(getTokenKind(), getOwnedTokenText(), getLeadingTrivia(),
450455
NewTrailingTrivia, getPresence());
451456
}
452457

include/swift/Syntax/Serialization/SyntaxDeserialization.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,9 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
162162
StringRef nodeIdString;
163163
in.mapRequired("id", nodeIdString);
164164
unsigned nodeId = std::atoi(nodeIdString.data());
165-
value =
166-
swift::RawSyntax::make(tokenKind, text, leadingTrivia, trailingTrivia,
167-
presence, /*Arena=*/nullptr, nodeId);
165+
value = swift::RawSyntax::make(
166+
tokenKind, swift::OwnedString::makeRefCounted(text), leadingTrivia,
167+
trailingTrivia, presence, /*Arena=*/nullptr, nodeId);
168168
} else {
169169
swift::SyntaxKind kind;
170170
in.mapRequired("kind", kind);

include/swift/Syntax/Trivia.h.gyb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,8 @@ struct MappingTraits<swift::syntax::TriviaPiece> {
454454
% else:
455455
StringRef text;
456456
in.mapRequired("value", text);
457-
return swift::syntax::TriviaPiece(kind, text);
457+
return swift::syntax::TriviaPiece(
458+
kind, swift::OwnedString::makeRefCounted(text));
458459
% end
459460
break;
460461
}

lib/Parse/Lexer.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,7 +2182,8 @@ void Lexer::lexImpl() {
21822182
size_t BOMLen = ContentStart - BufferStart;
21832183
assert(BOMLen == 3 && "UTF-8 BOM is 3 bytes");
21842184
// Add UTF-8 BOM to LeadingTrivia.
2185-
LeadingTrivia.push_back(TriviaPiece::garbageText({CurPtr, BOMLen}));
2185+
auto Text = OwnedString::makeRefCounted(StringRef(CurPtr, BOMLen));
2186+
LeadingTrivia.push_back(TriviaPiece::garbageText(Text));
21862187
CurPtr += BOMLen;
21872188
}
21882189
NextToken.setAtStartOfLine(true);
@@ -2407,18 +2408,18 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24072408
bool isDocComment = CurPtr[1] == '/';
24082409
skipSlashSlashComment(/*EatNewline=*/false);
24092410
size_t Length = CurPtr - TriviaStart;
2410-
Pieces.push_back(isDocComment
2411-
? TriviaPiece::docLineComment({TriviaStart, Length})
2412-
: TriviaPiece::lineComment({TriviaStart, Length}));
2411+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2412+
Pieces.push_back(isDocComment ? TriviaPiece::docLineComment(Text)
2413+
: TriviaPiece::lineComment(Text));
24132414
goto Restart;
24142415
} else if (*CurPtr == '*') {
24152416
// '/* ... */' comment.
24162417
bool isDocComment = CurPtr[1] == '*';
24172418
skipSlashStarComment();
24182419
size_t Length = CurPtr - TriviaStart;
2419-
Pieces.push_back(isDocComment
2420-
? TriviaPiece::docBlockComment({TriviaStart, Length})
2421-
: TriviaPiece::blockComment({TriviaStart, Length}));
2420+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2421+
Pieces.push_back(isDocComment ? TriviaPiece::docBlockComment(Text)
2422+
: TriviaPiece::blockComment(Text));
24222423
goto Restart;
24232424
}
24242425
break;
@@ -2430,7 +2431,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24302431
diagnose(TriviaStart, diag::lex_hashbang_not_allowed);
24312432
skipHashbang(/*EatNewline=*/false);
24322433
size_t Length = CurPtr - TriviaStart;
2433-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2434+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2435+
Pieces.push_back(TriviaPiece::garbageText(Text));
24342436
goto Restart;
24352437
}
24362438
break;
@@ -2439,7 +2441,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24392441
if (tryLexConflictMarker(/*EatNewline=*/false)) {
24402442
// Conflict marker.
24412443
size_t Length = CurPtr - TriviaStart;
2442-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2444+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2445+
Pieces.push_back(TriviaPiece::garbageText(Text));
24432446
goto Restart;
24442447
}
24452448
break;
@@ -2448,7 +2451,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24482451
case NulCharacterKind::Embedded: {
24492452
diagnoseEmbeddedNul(Diags, CurPtr - 1);
24502453
size_t Length = CurPtr - TriviaStart;
2451-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2454+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2455+
Pieces.push_back(TriviaPiece::garbageText(Text));
24522456
goto Restart;
24532457
}
24542458
case NulCharacterKind::CodeCompletion:
@@ -2494,7 +2498,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24942498
}
24952499

24962500
size_t Length = CurPtr - TriviaStart;
2497-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2501+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2502+
Pieces.push_back(TriviaPiece::garbageText(Text));
24982503
goto Restart;
24992504
}
25002505
// Reset the cursor.

lib/Parse/Parser.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -312,15 +312,15 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
312312
syntax::AbsolutePosition RunningPos;
313313

314314
tokenize(
315-
LangOpts, SM, BufferID, Offset, EndOffset,
316-
Diags,
315+
LangOpts, SM, BufferID, Offset, EndOffset, Diags,
317316
CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithTrivia,
318317
/*TokenizeInterpolatedString=*/false,
319318
/*SplitTokens=*/ArrayRef<Token>(),
320319
[&](const Token &Tok, const Trivia &LeadingTrivia,
321320
const Trivia &TrailingTrivia) {
321+
auto Text = OwnedString::makeRefCounted(Tok.getText());
322322
auto ThisToken =
323-
RawSyntax::make(Tok.getKind(), Tok.getText(), LeadingTrivia.Pieces,
323+
RawSyntax::make(Tok.getKind(), Text, LeadingTrivia.Pieces,
324324
TrailingTrivia.Pieces, SourcePresence::Present);
325325

326326
auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos);

lib/Parse/SyntaxParsingContext.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,9 @@ void SyntaxParsingContext::addToken(Token &Tok, Trivia &LeadingTrivia,
163163
return;
164164

165165
auto &Arena = getArena();
166-
addRawSyntax(RawSyntax::getToken(Arena, Tok.getKind(), Tok.getText(),
167-
LeadingTrivia.Pieces,
168-
TrailingTrivia.Pieces));
166+
auto Text = OwnedString::makeRefCounted(Tok.getText());
167+
addRawSyntax(RawSyntax::getToken(
168+
Arena, Tok.getKind(), Text, LeadingTrivia.Pieces, TrailingTrivia.Pieces));
169169
}
170170

171171
/// Add Syntax to the parts.
@@ -313,7 +313,7 @@ void finalizeSourceFile(RootContextData &RootData,
313313
}
314314

315315
if (!EOFToken)
316-
EOFToken = RawSyntax::missing(tok::eof, "");
316+
EOFToken = RawSyntax::missing(tok::eof, OwnedString::makeUnowned(""));
317317

318318
auto newRaw = SyntaxFactory::createRaw(
319319
SyntaxKind::SourceFile,
@@ -352,7 +352,8 @@ void SyntaxParsingContext::synthesize(tok Kind, StringRef Text) {
352352
return;
353353
if (Text.empty())
354354
Text = getTokenText(Kind);
355-
getStorage().push_back(RawSyntax::missing(Kind, Text));
355+
auto OwnedText = OwnedString::makeRefCounted(Text);
356+
getStorage().push_back(RawSyntax::missing(Kind, OwnedText));
356357
}
357358

358359
void SyntaxParsingContext::synthesize(SyntaxKind Kind) {

0 commit comments

Comments
 (0)