Skip to content

Commit 79e9113

Browse files
authored
Merge pull request #18677 from ahoppen/ref-counted-owned-string
[libSyntax] Add a reference counted version of OwnedString
2 parents 7a7856c + ac512d4 commit 79e9113

File tree

10 files changed

+127
-268
lines changed

10 files changed

+127
-268
lines changed

include/swift/Basic/OwnedString.h

Lines changed: 51 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -22,117 +22,89 @@
2222

2323
#include "llvm/ADT/IntrusiveRefCntPtr.h"
2424
#include "llvm/ADT/StringRef.h"
25+
#include "llvm/Support/TrailingObjects.h"
2526

2627
using llvm::StringRef;
2728

2829
namespace swift {
2930

30-
enum class StringOwnership {
31-
/// An OwnedString holds a weak reference to the underlying string storage
32-
/// and will never attempt to free it.
33-
Unowned,
34-
35-
/// An OwnedString has its own copy of the underlying string storage and
36-
/// will free the storage upon its destruction.
37-
Copied,
38-
};
39-
4031
/// Holds a string - either statically allocated or dynamically allocated
4132
/// and owned by this type.
4233
class OwnedString {
43-
const char *Data;
44-
size_t Length;
45-
StringOwnership Ownership = StringOwnership::Unowned;
46-
47-
void release() {
48-
if (Ownership == StringOwnership::Copied)
49-
free(const_cast<char *>(Data));
50-
}
34+
/// An owner that keeps the buffer of a ref counted \c OwnedString alive.
35+
class TextOwner final : public llvm::ThreadSafeRefCountedBase<TextOwner>,
36+
public llvm::TrailingObjects<TextOwner, char> {
37+
TextOwner(StringRef Text) {
38+
std::uninitialized_copy(Text.begin(), Text.end(),
39+
getTrailingObjects<char>());
40+
}
5141

52-
void initialize(const char* Data, size_t Length, StringOwnership Ownership) {
53-
this->Length = Length;
54-
this->Ownership = Ownership;
55-
if (Ownership == StringOwnership::Copied && Data) {
56-
char *substring = static_cast<char *>(malloc(Length + 1));
57-
assert(substring && "expected successful malloc of copy");
42+
public:
43+
static TextOwner *make(StringRef Text) {
44+
auto size = totalSizeToAlloc<char>(Text.size());
45+
void *data = ::operator new(size);
46+
return new (data) TextOwner(Text);
47+
}
5848

59-
memcpy(substring, Data, Length);
60-
substring[Length] = '\0';
49+
const char *getText() const { return getTrailingObjects<char>(); }
50+
};
6151

62-
this->Data = substring;
63-
}
64-
else
65-
this->Data = Data;
66-
}
67-
OwnedString(const char* Data, size_t Length, StringOwnership Ownership) {
68-
initialize(Data, Length, Ownership);
69-
}
70-
public:
71-
OwnedString(): OwnedString(nullptr, 0, StringOwnership::Unowned) {}
52+
/// The text this owned string represents
53+
StringRef Text;
7254

73-
OwnedString(const char *Data, size_t Length):
74-
OwnedString(Data, Length, StringOwnership::Copied) {}
55+
/// In case of a ref counted string an owner that keeps the buffer \c Text
56+
/// references alive.
57+
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr;
7558

76-
OwnedString(StringRef Str) : OwnedString(Str.data(), Str.size()) {}
59+
OwnedString(StringRef Text, llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr)
60+
: Text(Text), OwnedPtr(OwnedPtr) {}
7761

78-
OwnedString(const char *Data) : OwnedString(StringRef(Data)) {}
62+
public:
63+
OwnedString() : OwnedString(/*Text=*/StringRef(), /*OwnedPtr=*/nullptr) {}
7964

80-
OwnedString(const OwnedString &Other):
81-
OwnedString(Other.Data, Other.Length, Other.Ownership) {}
65+
/// Create a ref counted \c OwnedString that is initialized with the text of
66+
/// the given \c StringRef.
67+
OwnedString(StringRef Str) : OwnedString(makeRefCounted(Str)) {}
8268

83-
OwnedString(OwnedString &&Other): Data(Other.Data), Length(Other.Length),
84-
Ownership(Other.Ownership) {
85-
Other.Data = nullptr;
86-
Other.Ownership = StringOwnership::Unowned;
87-
}
69+
/// Create a ref counted \c OwnedString that is initialized with the text of
70+
/// the given buffer.
71+
OwnedString(const char *Str) : OwnedString(StringRef(Str)) {}
8872

89-
OwnedString& operator=(const OwnedString &Other) {
90-
if (&Other != this) {
91-
release();
92-
initialize(Other.Data, Other.Length, Other.Ownership);
93-
}
94-
return *this;
73+
/// Create an \c OwnedString that references the given string. The
74+
/// \c OwnedString will not take ownership of that buffer and will assume that
75+
/// the buffer outlives its lifetime.
76+
static OwnedString makeUnowned(StringRef Str) {
77+
return OwnedString(Str, /*OwnedPtr=*/nullptr);
9578
}
9679

97-
OwnedString& operator=(OwnedString &&Other) {
98-
if (&Other != this) {
99-
release();
100-
this->Data = Other.Data;
101-
this->Length = Other.Length;
102-
this->Ownership = Other.Ownership;
103-
Other.Ownership = StringOwnership::Unowned;
104-
Other.Data = nullptr;
80+
/// Create an \c OwnedString that keeps its contents in a reference counted
81+
/// buffer. The contents of \p Str will be copied initially and are allowed to
82+
/// be disposed after the \c OwnedString has been created.
83+
static OwnedString makeRefCounted(StringRef Str) {
84+
if (Str.empty()) {
85+
// Copying an empty string doesn't make sense. Just create an unowned
86+
// string that points to the empty string.
87+
return makeUnowned(Str);
88+
} else {
89+
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr(TextOwner::make(Str));
90+
return OwnedString(StringRef(OwnedPtr->getText(), Str.size()),
91+
std::move(OwnedPtr));
10592
}
106-
return *this;
107-
}
108-
109-
OwnedString copy() const {
110-
return OwnedString(Data, Length, StringOwnership::Copied);
11193
}
11294

11395
/// Returns the length of the string in bytes.
114-
size_t size() const {
115-
return Length;
116-
}
96+
size_t size() const { return Text.size(); }
11797

11898
/// Returns true if the length is 0.
119-
bool empty() const {
120-
return Length == 0;
121-
}
99+
bool empty() const { return size() == 0; }
122100

123101
/// Returns a StringRef to the underlying data. No copy is made and no
124102
/// ownership changes take place.
125-
StringRef str() const {
126-
return StringRef { Data, Length };
127-
}
103+
StringRef str() const { return Text; }
128104

129105
bool operator==(const OwnedString &Right) const {
130106
return str() == Right.str();
131107
}
132-
133-
~OwnedString() {
134-
release();
135-
}
136108
};
137109

138110
} // end namespace swift

include/swift/Syntax/RawSyntax.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,12 +404,17 @@ class RawSyntax final
404404
return static_cast<tok>(Bits.Token.TokenKind);
405405
}
406406

407-
/// Return the text of the token.
408-
StringRef getTokenText() const {
407+
/// Return the text of the token as an \c OwnedString. Keeping a reference to
408+
/// this string will keep it alive even if the syntax node gets freed.
409+
OwnedString getOwnedTokenText() const {
409410
assert(isToken());
410-
return getTrailingObjects<OwnedString>()->str();
411+
return *getTrailingObjects<OwnedString>();
411412
}
412413

414+
/// Return the text of the token as a reference. The referenced buffer may
415+
/// disappear when the syntax node gets freed.
416+
StringRef getTokenText() const { return getOwnedTokenText().str(); }
417+
413418
/// Return the leading trivia list of the token.
414419
ArrayRef<TriviaPiece> getLeadingTrivia() const {
415420
assert(isToken());
@@ -434,7 +439,7 @@ class RawSyntax final
434439
/// trivia instead.
435440
RC<RawSyntax>
436441
withLeadingTrivia(ArrayRef<TriviaPiece> NewLeadingTrivia) const {
437-
return make(getTokenKind(), getTokenText(), NewLeadingTrivia,
442+
return make(getTokenKind(), getOwnedTokenText(), NewLeadingTrivia,
438443
getTrailingTrivia(), getPresence());
439444
}
440445

@@ -446,7 +451,7 @@ class RawSyntax final
446451
/// trivia instead.
447452
RC<RawSyntax>
448453
withTrailingTrivia(ArrayRef<TriviaPiece> NewTrailingTrivia) const {
449-
return make(getTokenKind(), getTokenText(), getLeadingTrivia(),
454+
return make(getTokenKind(), getOwnedTokenText(), getLeadingTrivia(),
450455
NewTrailingTrivia, getPresence());
451456
}
452457

include/swift/Syntax/Serialization/SyntaxDeserialization.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,9 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
162162
StringRef nodeIdString;
163163
in.mapRequired("id", nodeIdString);
164164
unsigned nodeId = std::atoi(nodeIdString.data());
165-
value =
166-
swift::RawSyntax::make(tokenKind, text, leadingTrivia, trailingTrivia,
167-
presence, /*Arena=*/nullptr, nodeId);
165+
value = swift::RawSyntax::make(
166+
tokenKind, swift::OwnedString::makeRefCounted(text), leadingTrivia,
167+
trailingTrivia, presence, /*Arena=*/nullptr, nodeId);
168168
} else {
169169
swift::SyntaxKind kind;
170170
in.mapRequired("kind", kind);

include/swift/Syntax/Trivia.h.gyb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,8 @@ struct MappingTraits<swift::syntax::TriviaPiece> {
496496
% else:
497497
StringRef text;
498498
in.mapRequired("value", text);
499-
return swift::syntax::TriviaPiece(kind, text);
499+
return swift::syntax::TriviaPiece(
500+
kind, swift::OwnedString::makeRefCounted(text));
500501
% end
501502
break;
502503
}

lib/Parse/Lexer.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,7 +2182,8 @@ void Lexer::lexImpl() {
21822182
size_t BOMLen = ContentStart - BufferStart;
21832183
assert(BOMLen == 3 && "UTF-8 BOM is 3 bytes");
21842184
// Add UTF-8 BOM to LeadingTrivia.
2185-
LeadingTrivia.push_back(TriviaPiece::garbageText({CurPtr, BOMLen}));
2185+
auto Text = OwnedString::makeRefCounted(StringRef(CurPtr, BOMLen));
2186+
LeadingTrivia.push_back(TriviaPiece::garbageText(Text));
21862187
CurPtr += BOMLen;
21872188
}
21882189
NextToken.setAtStartOfLine(true);
@@ -2407,18 +2408,18 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24072408
bool isDocComment = CurPtr[1] == '/';
24082409
skipSlashSlashComment(/*EatNewline=*/false);
24092410
size_t Length = CurPtr - TriviaStart;
2410-
Pieces.push_back(isDocComment
2411-
? TriviaPiece::docLineComment({TriviaStart, Length})
2412-
: TriviaPiece::lineComment({TriviaStart, Length}));
2411+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2412+
Pieces.push_back(isDocComment ? TriviaPiece::docLineComment(Text)
2413+
: TriviaPiece::lineComment(Text));
24132414
goto Restart;
24142415
} else if (*CurPtr == '*') {
24152416
// '/* ... */' comment.
24162417
bool isDocComment = CurPtr[1] == '*';
24172418
skipSlashStarComment();
24182419
size_t Length = CurPtr - TriviaStart;
2419-
Pieces.push_back(isDocComment
2420-
? TriviaPiece::docBlockComment({TriviaStart, Length})
2421-
: TriviaPiece::blockComment({TriviaStart, Length}));
2420+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2421+
Pieces.push_back(isDocComment ? TriviaPiece::docBlockComment(Text)
2422+
: TriviaPiece::blockComment(Text));
24222423
goto Restart;
24232424
}
24242425
break;
@@ -2430,7 +2431,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24302431
diagnose(TriviaStart, diag::lex_hashbang_not_allowed);
24312432
skipHashbang(/*EatNewline=*/false);
24322433
size_t Length = CurPtr - TriviaStart;
2433-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2434+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2435+
Pieces.push_back(TriviaPiece::garbageText(Text));
24342436
goto Restart;
24352437
}
24362438
break;
@@ -2439,7 +2441,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24392441
if (tryLexConflictMarker(/*EatNewline=*/false)) {
24402442
// Conflict marker.
24412443
size_t Length = CurPtr - TriviaStart;
2442-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2444+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2445+
Pieces.push_back(TriviaPiece::garbageText(Text));
24432446
goto Restart;
24442447
}
24452448
break;
@@ -2448,7 +2451,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24482451
case NulCharacterKind::Embedded: {
24492452
diagnoseEmbeddedNul(Diags, CurPtr - 1);
24502453
size_t Length = CurPtr - TriviaStart;
2451-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2454+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2455+
Pieces.push_back(TriviaPiece::garbageText(Text));
24522456
goto Restart;
24532457
}
24542458
case NulCharacterKind::CodeCompletion:
@@ -2494,7 +2498,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
24942498
}
24952499

24962500
size_t Length = CurPtr - TriviaStart;
2497-
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
2501+
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
2502+
Pieces.push_back(TriviaPiece::garbageText(Text));
24982503
goto Restart;
24992504
}
25002505
// Reset the cursor.

lib/Parse/Parser.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -312,15 +312,15 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
312312
syntax::AbsolutePosition RunningPos;
313313

314314
tokenize(
315-
LangOpts, SM, BufferID, Offset, EndOffset,
316-
Diags,
315+
LangOpts, SM, BufferID, Offset, EndOffset, Diags,
317316
CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithTrivia,
318317
/*TokenizeInterpolatedString=*/false,
319318
/*SplitTokens=*/ArrayRef<Token>(),
320319
[&](const Token &Tok, const Trivia &LeadingTrivia,
321320
const Trivia &TrailingTrivia) {
321+
auto Text = OwnedString::makeRefCounted(Tok.getText());
322322
auto ThisToken =
323-
RawSyntax::make(Tok.getKind(), Tok.getText(), LeadingTrivia.Pieces,
323+
RawSyntax::make(Tok.getKind(), Text, LeadingTrivia.Pieces,
324324
TrailingTrivia.Pieces, SourcePresence::Present);
325325

326326
auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos);

lib/Parse/SyntaxParsingContext.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,9 @@ void SyntaxParsingContext::addToken(Token &Tok, Trivia &LeadingTrivia,
163163
return;
164164

165165
auto &Arena = getArena();
166-
addRawSyntax(RawSyntax::getToken(Arena, Tok.getKind(), Tok.getText(),
167-
LeadingTrivia.Pieces,
168-
TrailingTrivia.Pieces));
166+
auto Text = OwnedString::makeRefCounted(Tok.getText());
167+
addRawSyntax(RawSyntax::getToken(
168+
Arena, Tok.getKind(), Text, LeadingTrivia.Pieces, TrailingTrivia.Pieces));
169169
}
170170

171171
/// Add Syntax to the parts.
@@ -313,7 +313,7 @@ void finalizeSourceFile(RootContextData &RootData,
313313
}
314314

315315
if (!EOFToken)
316-
EOFToken = RawSyntax::missing(tok::eof, "");
316+
EOFToken = RawSyntax::missing(tok::eof, OwnedString::makeUnowned(""));
317317

318318
auto newRaw = SyntaxFactory::createRaw(
319319
SyntaxKind::SourceFile,
@@ -352,7 +352,8 @@ void SyntaxParsingContext::synthesize(tok Kind, StringRef Text) {
352352
return;
353353
if (Text.empty())
354354
Text = getTokenText(Kind);
355-
getStorage().push_back(RawSyntax::missing(Kind, Text));
355+
auto OwnedText = OwnedString::makeRefCounted(Text);
356+
getStorage().push_back(RawSyntax::missing(Kind, OwnedText));
356357
}
357358

358359
void SyntaxParsingContext::synthesize(SyntaxKind Kind) {

0 commit comments

Comments
 (0)