Skip to content

[libSyntax] Add a reference counted version of OwnedString #18677

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 51 additions & 79 deletions include/swift/Basic/OwnedString.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,117 +22,89 @@

#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/TrailingObjects.h"

using llvm::StringRef;

namespace swift {

enum class StringOwnership {
/// An OwnedString holds a weak reference to the underlying string storage
/// and will never attempt to free it.
Unowned,

/// An OwnedString has its own copy of the underlying string storage and
/// will free the storage upon its destruction.
Copied,
};

/// Holds a string - either statically allocated or dynamically allocated
/// and owned by this type.
class OwnedString {
const char *Data;
size_t Length;
StringOwnership Ownership = StringOwnership::Unowned;

void release() {
if (Ownership == StringOwnership::Copied)
free(const_cast<char *>(Data));
}
/// An owner that keeps the buffer of a ref counted \c OwnedString alive.
class TextOwner final : public llvm::ThreadSafeRefCountedBase<TextOwner>,
public llvm::TrailingObjects<TextOwner, char> {
TextOwner(StringRef Text) {
std::uninitialized_copy(Text.begin(), Text.end(),
getTrailingObjects<char>());
}

void initialize(const char* Data, size_t Length, StringOwnership Ownership) {
this->Length = Length;
this->Ownership = Ownership;
if (Ownership == StringOwnership::Copied && Data) {
char *substring = static_cast<char *>(malloc(Length + 1));
assert(substring && "expected successful malloc of copy");
public:
static TextOwner *make(StringRef Text) {
auto size = totalSizeToAlloc<char>(Text.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be NUL-terminated?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since StringRef already contains a length, there is no need to NUL terminate them.

void *data = ::operator new(size);
return new (data) TextOwner(Text);
}

memcpy(substring, Data, Length);
substring[Length] = '\0';
const char *getText() const { return getTrailingObjects<char>(); }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any specific reasons to put char* as a trailing object instead of a regular member?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It allows us to allocate the storage with a single allocation instead of two which should be less overhead.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, OK. The patch LGTM!

};

this->Data = substring;
}
else
this->Data = Data;
}
OwnedString(const char* Data, size_t Length, StringOwnership Ownership) {
initialize(Data, Length, Ownership);
}
public:
OwnedString(): OwnedString(nullptr, 0, StringOwnership::Unowned) {}
/// The text this owned string represents
StringRef Text;

OwnedString(const char *Data, size_t Length):
OwnedString(Data, Length, StringOwnership::Copied) {}
/// In case of a ref counted string an owner that keeps the buffer \c Text
/// references alive.
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr;

OwnedString(StringRef Str) : OwnedString(Str.data(), Str.size()) {}
OwnedString(StringRef Text, llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr)
: Text(Text), OwnedPtr(OwnedPtr) {}

OwnedString(const char *Data) : OwnedString(StringRef(Data)) {}
public:
OwnedString() : OwnedString(/*Text=*/StringRef(), /*OwnedPtr=*/nullptr) {}

OwnedString(const OwnedString &Other):
OwnedString(Other.Data, Other.Length, Other.Ownership) {}
/// Create a ref counted \c OwnedString that is initialized with the text of
/// the given \c StringRef.
OwnedString(StringRef Str) : OwnedString(makeRefCounted(Str)) {}

OwnedString(OwnedString &&Other): Data(Other.Data), Length(Other.Length),
Ownership(Other.Ownership) {
Other.Data = nullptr;
Other.Ownership = StringOwnership::Unowned;
}
/// Create a ref counted \c OwnedString that is initialized with the text of
/// the given buffer.
OwnedString(const char *Str) : OwnedString(StringRef(Str)) {}

OwnedString& operator=(const OwnedString &Other) {
if (&Other != this) {
release();
initialize(Other.Data, Other.Length, Other.Ownership);
}
return *this;
/// Create an \c OwnedString that references the given string. The
/// \c OwnedString will not take ownership of that buffer and will assume that
/// the buffer outlives its lifetime.
static OwnedString makeUnowned(StringRef Str) {
return OwnedString(Str, /*OwnedPtr=*/nullptr);
}

OwnedString& operator=(OwnedString &&Other) {
if (&Other != this) {
release();
this->Data = Other.Data;
this->Length = Other.Length;
this->Ownership = Other.Ownership;
Other.Ownership = StringOwnership::Unowned;
Other.Data = nullptr;
/// Create an \c OwnedString that keeps its contents in a reference counted
/// buffer. The contents of \p Str will be copied initially and are allowed to
/// be disposed after the \c OwnedString has been created.
static OwnedString makeRefCounted(StringRef Str) {
if (Str.empty()) {
// Copying an empty string doesn't make sense. Just create an unowned
// string that points to the empty string.
return makeUnowned(Str);
} else {
llvm::IntrusiveRefCntPtr<TextOwner> OwnedPtr(TextOwner::make(Str));
return OwnedString(StringRef(OwnedPtr->getText(), Str.size()),
std::move(OwnedPtr));
}
return *this;
}

OwnedString copy() const {
return OwnedString(Data, Length, StringOwnership::Copied);
}

/// Returns the length of the string in bytes.
size_t size() const {
return Length;
}
size_t size() const { return Text.size(); }

/// Returns true if the length is 0.
bool empty() const {
return Length == 0;
}
bool empty() const { return size() == 0; }

/// Returns a StringRef to the underlying data. No copy is made and no
/// ownership changes take place.
StringRef str() const {
return StringRef { Data, Length };
}
StringRef str() const { return Text; }

bool operator==(const OwnedString &Right) const {
return str() == Right.str();
}

~OwnedString() {
release();
}
};

} // end namespace swift
Expand Down
15 changes: 10 additions & 5 deletions include/swift/Syntax/RawSyntax.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,12 +404,17 @@ class RawSyntax final
return static_cast<tok>(Bits.Token.TokenKind);
}

/// Return the text of the token.
StringRef getTokenText() const {
/// Return the text of the token as an \c OwnedString. Keeping a reference to
/// this string will keep it alive even if the syntax node gets freed.
OwnedString getOwnedTokenText() const {
assert(isToken());
return getTrailingObjects<OwnedString>()->str();
return *getTrailingObjects<OwnedString>();
}

/// Return the text of the token as a reference. The referenced buffer may
/// disappear when the syntax node gets freed.
StringRef getTokenText() const { return getOwnedTokenText().str(); }

/// Return the leading trivia list of the token.
ArrayRef<TriviaPiece> getLeadingTrivia() const {
assert(isToken());
Expand All @@ -434,7 +439,7 @@ class RawSyntax final
/// trivia instead.
RC<RawSyntax>
withLeadingTrivia(ArrayRef<TriviaPiece> NewLeadingTrivia) const {
return make(getTokenKind(), getTokenText(), NewLeadingTrivia,
return make(getTokenKind(), getOwnedTokenText(), NewLeadingTrivia,
getTrailingTrivia(), getPresence());
}

Expand All @@ -446,7 +451,7 @@ class RawSyntax final
/// trivia instead.
RC<RawSyntax>
withTrailingTrivia(ArrayRef<TriviaPiece> NewTrailingTrivia) const {
return make(getTokenKind(), getTokenText(), getLeadingTrivia(),
return make(getTokenKind(), getOwnedTokenText(), getLeadingTrivia(),
NewTrailingTrivia, getPresence());
}

Expand Down
6 changes: 3 additions & 3 deletions include/swift/Syntax/Serialization/SyntaxDeserialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,9 @@ template <> struct MappingTraits<swift::RC<swift::RawSyntax>> {
StringRef nodeIdString;
in.mapRequired("id", nodeIdString);
unsigned nodeId = std::atoi(nodeIdString.data());
value =
swift::RawSyntax::make(tokenKind, text, leadingTrivia, trailingTrivia,
presence, /*Arena=*/nullptr, nodeId);
value = swift::RawSyntax::make(
tokenKind, swift::OwnedString::makeRefCounted(text), leadingTrivia,
trailingTrivia, presence, /*Arena=*/nullptr, nodeId);
Copy link
Member

@rintaro rintaro Aug 14, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about make RawSyntax::make() to receive StringRef, then construct OwnedString in it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, you want to control Owned/Unowned in call site. OK then.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that was my idea behind it.

} else {
swift::SyntaxKind kind;
in.mapRequired("kind", kind);
Expand Down
3 changes: 2 additions & 1 deletion include/swift/Syntax/Trivia.h.gyb
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,8 @@ struct MappingTraits<swift::syntax::TriviaPiece> {
% else:
StringRef text;
in.mapRequired("value", text);
return swift::syntax::TriviaPiece(kind, text);
return swift::syntax::TriviaPiece(
kind, swift::OwnedString::makeRefCounted(text));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto for TriviaPiece (receive StringRef).

% end
break;
}
Expand Down
27 changes: 16 additions & 11 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2182,7 +2182,8 @@ void Lexer::lexImpl() {
size_t BOMLen = ContentStart - BufferStart;
assert(BOMLen == 3 && "UTF-8 BOM is 3 bytes");
// Add UTF-8 BOM to LeadingTrivia.
LeadingTrivia.push_back(TriviaPiece::garbageText({CurPtr, BOMLen}));
auto Text = OwnedString::makeRefCounted(StringRef(CurPtr, BOMLen));
LeadingTrivia.push_back(TriviaPiece::garbageText(Text));
CurPtr += BOMLen;
}
NextToken.setAtStartOfLine(true);
Expand Down Expand Up @@ -2407,18 +2408,18 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
bool isDocComment = CurPtr[1] == '/';
skipSlashSlashComment(/*EatNewline=*/false);
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(isDocComment
? TriviaPiece::docLineComment({TriviaStart, Length})
: TriviaPiece::lineComment({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(isDocComment ? TriviaPiece::docLineComment(Text)
: TriviaPiece::lineComment(Text));
goto Restart;
} else if (*CurPtr == '*') {
// '/* ... */' comment.
bool isDocComment = CurPtr[1] == '*';
skipSlashStarComment();
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(isDocComment
? TriviaPiece::docBlockComment({TriviaStart, Length})
: TriviaPiece::blockComment({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(isDocComment ? TriviaPiece::docBlockComment(Text)
: TriviaPiece::blockComment(Text));
goto Restart;
}
break;
Expand All @@ -2430,7 +2431,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
diagnose(TriviaStart, diag::lex_hashbang_not_allowed);
skipHashbang(/*EatNewline=*/false);
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
break;
Expand All @@ -2439,7 +2441,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
if (tryLexConflictMarker(/*EatNewline=*/false)) {
// Conflict marker.
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
break;
Expand All @@ -2448,7 +2451,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
case NulCharacterKind::Embedded: {
diagnoseEmbeddedNul(Diags, CurPtr - 1);
size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
case NulCharacterKind::CodeCompletion:
Expand Down Expand Up @@ -2494,7 +2498,8 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
}

size_t Length = CurPtr - TriviaStart;
Pieces.push_back(TriviaPiece::garbageText({TriviaStart, Length}));
auto Text = OwnedString::makeRefCounted(StringRef(TriviaStart, Length));
Pieces.push_back(TriviaPiece::garbageText(Text));
goto Restart;
}
// Reset the cursor.
Expand Down
6 changes: 3 additions & 3 deletions lib/Parse/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,15 +312,15 @@ swift::tokenizeWithTrivia(const LangOptions &LangOpts, const SourceManager &SM,
syntax::AbsolutePosition RunningPos;

tokenize(
LangOpts, SM, BufferID, Offset, EndOffset,
Diags,
LangOpts, SM, BufferID, Offset, EndOffset, Diags,
CommentRetentionMode::AttachToNextToken, TriviaRetentionMode::WithTrivia,
/*TokenizeInterpolatedString=*/false,
/*SplitTokens=*/ArrayRef<Token>(),
[&](const Token &Tok, const Trivia &LeadingTrivia,
const Trivia &TrailingTrivia) {
auto Text = OwnedString::makeRefCounted(Tok.getText());
auto ThisToken =
RawSyntax::make(Tok.getKind(), Tok.getText(), LeadingTrivia.Pieces,
RawSyntax::make(Tok.getKind(), Text, LeadingTrivia.Pieces,
TrailingTrivia.Pieces, SourcePresence::Present);

auto ThisTokenPos = ThisToken->accumulateAbsolutePosition(RunningPos);
Expand Down
11 changes: 6 additions & 5 deletions lib/Parse/SyntaxParsingContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ void SyntaxParsingContext::addToken(Token &Tok, Trivia &LeadingTrivia,
return;

auto &Arena = getArena();
addRawSyntax(RawSyntax::getToken(Arena, Tok.getKind(), Tok.getText(),
LeadingTrivia.Pieces,
TrailingTrivia.Pieces));
auto Text = OwnedString::makeRefCounted(Tok.getText());
addRawSyntax(RawSyntax::getToken(
Arena, Tok.getKind(), Text, LeadingTrivia.Pieces, TrailingTrivia.Pieces));
}

/// Add Syntax to the parts.
Expand Down Expand Up @@ -313,7 +313,7 @@ void finalizeSourceFile(RootContextData &RootData,
}

if (!EOFToken)
EOFToken = RawSyntax::missing(tok::eof, "");
EOFToken = RawSyntax::missing(tok::eof, OwnedString::makeUnowned(""));

auto newRaw = SyntaxFactory::createRaw(
SyntaxKind::SourceFile,
Expand Down Expand Up @@ -352,7 +352,8 @@ void SyntaxParsingContext::synthesize(tok Kind, StringRef Text) {
return;
if (Text.empty())
Text = getTokenText(Kind);
getStorage().push_back(RawSyntax::missing(Kind, Text));
auto OwnedText = OwnedString::makeRefCounted(Text);
getStorage().push_back(RawSyntax::missing(Kind, OwnedText));
}

void SyntaxParsingContext::synthesize(SyntaxKind Kind) {
Expand Down
Loading