Skip to content

Commit 965c461

Browse files
committed
Add support for raw identifiers.
Raw identifiers are backtick-delimited identifiers that can contain any non-identifier character other than the backtick itself, CR, LF, or other non-printable ASCII code units, and which are also not composed entirely of operator characters.
1 parent aea6b38 commit 965c461

34 files changed

+537
-100
lines changed

include/swift/AST/ASTDemangler.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ class ASTBuilder {
306306

307307
static GenericTypeDecl *getAcceptableTypeDeclCandidate(ValueDecl *decl,
308308
Demangle::Node::Kind kind);
309+
310+
/// Returns an identifier with the given name, automatically removing any
311+
/// surrounding backticks that are present for raw identifiers.
312+
Identifier getIdentifier(StringRef name);
309313
};
310314

311315
SWIFT_END_INLINE_NAMESPACE

include/swift/AST/ASTPrinter.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -444,9 +444,9 @@ void printWithCompatibilityFeatureChecks(ASTPrinter &printer,
444444
Decl *decl,
445445
llvm::function_ref<void()> printBody);
446446

447-
/// Determine whether we need to escape the given keyword within the
448-
/// given context, by wrapping it in backticks.
449-
bool escapeKeywordInContext(StringRef keyword, PrintNameContext context);
447+
/// Determine whether we need to escape the given name within the given
448+
/// context, by wrapping it in backticks.
449+
bool escapeIdentifierInContext(Identifier name, PrintNameContext context);
450450

451451
} // namespace swift
452452

include/swift/AST/Identifier.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,22 @@ class Identifier {
101101

102102
/// isOperator - Return true if this identifier is an operator, false if it is
103103
/// a normal identifier.
104-
/// FIXME: We should maybe cache this.
105104
bool isOperator() const {
106105
if (empty())
107106
return false;
108107
if (isEditorPlaceholder())
109108
return false;
110-
if ((unsigned char)Pointer[0] < 0x80)
111-
return isOperatorStartCodePoint((unsigned char)Pointer[0]);
112109

113110
// Handle the high unicode case out of line.
114111
return isOperatorSlow();
115112
}
116113

114+
/// Returns true if this identifier contains non-identifier characters and
115+
/// must always be escaped with backticks, even in contexts were other
116+
/// escaped identifiers could omit backticks (like keywords as argument
117+
/// labels).
118+
bool mustAlwaysBeEscaped() const;
119+
117120
bool isArithmeticOperator() const {
118121
return is("+") || is("-") || is("*") || is("/") || is("%");
119122
}
@@ -350,6 +353,10 @@ class DeclBaseName {
350353
return !isSpecial() && getIdentifier().isOperator();
351354
}
352355

356+
bool mustAlwaysBeEscaped() const {
357+
return !isSpecial() && getIdentifier().mustAlwaysBeEscaped();
358+
}
359+
353360
bool isEditorPlaceholder() const {
354361
return !isSpecial() && getIdentifier().isEditorPlaceholder();
355362
}
@@ -571,7 +578,12 @@ class DeclName {
571578
bool isOperator() const {
572579
return getBaseName().isOperator();
573580
}
574-
581+
582+
/// True if this name is an escaped identifier.
583+
bool mustAlwaysBeEscaped() const {
584+
return getBaseName().mustAlwaysBeEscaped();
585+
}
586+
575587
/// True if this name should be found by a decl ref or member ref under the
576588
/// name specified by 'refName'.
577589
///
@@ -728,6 +740,8 @@ class DeclNameRef {
728740
return FullName.isOperator();
729741
}
730742

743+
bool mustAlwaysBeEscaped() const { return FullName.mustAlwaysBeEscaped(); }
744+
731745
bool isCompoundName() const {
732746
return FullName.isCompoundName();
733747
}

include/swift/Basic/Mangler.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@ class Mangler {
114114
print(llvm::dbgs());
115115
}
116116

117+
/// Appends the given raw identifier to the buffer in the form required to
118+
/// mangle it. This handles the transformations needed for such identifiers
119+
/// to retain compatibility with older runtimes.
120+
static void
121+
appendRawIdentifierForRuntime(StringRef ident,
122+
llvm::SmallVectorImpl<char> &buffer);
123+
117124
protected:
118125
/// Removes the last characters of the buffer by setting it's size to a
119126
/// smaller value.
@@ -143,7 +150,7 @@ class Mangler {
143150
SWIFT_DEBUG_DUMP;
144151

145152
/// Appends a mangled identifier string.
146-
void appendIdentifier(StringRef ident);
153+
void appendIdentifier(StringRef ident, bool allowRawIdentifiers = true);
147154

148155
// NOTE: the addSubstitution functions perform the value computation before
149156
// the assignment because there is no sequence point synchronising the

include/swift/Demangling/ManglingUtils.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,16 @@ inline bool isWordEnd(char ch, char prevCh) {
5858
return false;
5959
}
6060

61+
/// Returns true if \p ch is a valid character which may appear at the start
62+
/// of a symbol mangling.
63+
inline bool isValidSymbolStart(char ch) {
64+
return isLetter(ch) || ch == '_' || ch == '$';
65+
}
66+
6167
/// Returns true if \p ch is a valid character which may appear in a symbol
62-
/// mangling.
68+
/// mangling anywhere other than the first character.
6369
inline bool isValidSymbolChar(char ch) {
64-
return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$';
70+
return isValidSymbolStart(ch) || isDigit(ch);
6571
}
6672

6773
/// Returns true if \p str contains any character which may not appear in a

include/swift/IDE/CompletionLookup.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ class CompletionLookup final : public swift::VisibleDeclConsumer {
335335
void addValueBaseName(CodeCompletionResultBuilder &Builder,
336336
DeclBaseName Name);
337337

338+
void addIdentifier(CodeCompletionResultBuilder &Builder, Identifier Name);
339+
338340
void addLeadingDot(CodeCompletionResultBuilder &Builder);
339341

340342
void addTypeAnnotation(CodeCompletionResultBuilder &Builder, Type T,

include/swift/Parse/Lexer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,11 @@ class Lexer {
395395
/// identifier, without escaping characters.
396396
static bool isIdentifier(StringRef identifier);
397397

398+
// Returns true if the given string is a raw identifier that must always
399+
// be escaped by backticks when printing it back in source form or writing
400+
// its name into runtime metadata.
401+
static bool identifierMustAlwaysBeEscaped(StringRef str);
402+
398403
/// Determine the token kind of the string, given that it is a valid
399404
/// non-operator identifier. Return tok::identifier if the string is not a
400405
/// reserved word.

lib/AST/ASTDemangler.cpp

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ TypeDecl *ASTBuilder::createTypeDecl(NodePointer node) {
9494
if (proto == nullptr)
9595
return nullptr;
9696

97-
auto name = Ctx.getIdentifier(node->getChild(1)->getText());
97+
auto name = getIdentifier(node->getChild(1)->getText());
9898
return proto->getAssociatedType(name);
9999
}
100100

@@ -110,10 +110,9 @@ ASTBuilder::createBuiltinType(StringRef builtinName,
110110

111111
StringRef strippedName =
112112
builtinName.drop_front(BUILTIN_TYPE_NAME_PREFIX.size());
113-
Ctx.TheBuiltinModule->lookupValue(Ctx.getIdentifier(strippedName),
114-
NLKind::QualifiedLookup,
115-
decls);
116-
113+
Ctx.TheBuiltinModule->lookupValue(getIdentifier(strippedName),
114+
NLKind::QualifiedLookup, decls);
115+
117116
if (decls.size() == 1 && isa<TypeDecl>(decls[0]))
118117
return cast<TypeDecl>(decls[0])->getDeclaredInterfaceType();
119118
}
@@ -348,7 +347,7 @@ Type ASTBuilder::createTupleType(ArrayRef<Type> eltTypes, ArrayRef<StringRef> la
348347
for (unsigned i : indices(eltTypes)) {
349348
Identifier label;
350349
if (!labels[i].empty())
351-
label = Ctx.getIdentifier(labels[i]);
350+
label = getIdentifier(labels[i]);
352351
elements.emplace_back(eltTypes[i], label);
353352
}
354353

@@ -408,7 +407,7 @@ Type ASTBuilder::createFunctionType(
408407
if (!type->isMaterializable())
409408
return Type();
410409

411-
auto label = Ctx.getIdentifier(param.getLabel());
410+
auto label = getIdentifier(param.getLabel());
412411
auto flags = param.getFlags();
413412
auto ownership =
414413
ParamDecl::getParameterSpecifierForValueOwnership(asValueOwnership(flags.getOwnership()));
@@ -878,7 +877,7 @@ Type ASTBuilder::createGenericTypeParameterType(unsigned depth,
878877

879878
Type ASTBuilder::createDependentMemberType(StringRef member,
880879
Type base) {
881-
auto identifier = Ctx.getIdentifier(member);
880+
auto identifier = getIdentifier(member);
882881

883882
if (auto *archetype = base->getAs<ArchetypeType>()) {
884883
if (Type memberType = archetype->getNestedTypeByName(identifier))
@@ -895,7 +894,7 @@ Type ASTBuilder::createDependentMemberType(StringRef member,
895894
Type ASTBuilder::createDependentMemberType(StringRef member,
896895
Type base,
897896
ProtocolDecl *protocol) {
898-
auto identifier = Ctx.getIdentifier(member);
897+
auto identifier = getIdentifier(member);
899898

900899
if (auto *archetype = base->getAs<ArchetypeType>()) {
901900
if (auto assocType = protocol->getAssociatedType(identifier))
@@ -1135,7 +1134,7 @@ ASTBuilder::getAcceptableTypeDeclCandidate(ValueDecl *decl,
11351134

11361135
DeclContext *ASTBuilder::getNotionalDC() {
11371136
if (!NotionalDC) {
1138-
NotionalDC = ModuleDecl::createEmpty(Ctx.getIdentifier(".RemoteAST"), Ctx);
1137+
NotionalDC = ModuleDecl::createEmpty(getIdentifier(".RemoteAST"), Ctx);
11391138
NotionalDC = new (Ctx) TopLevelCodeDecl(NotionalDC);
11401139
}
11411140
return NotionalDC;
@@ -1308,7 +1307,7 @@ ASTBuilder::findDeclContext(NodePointer node) {
13081307
Demangle::Node::Kind::PrivateDeclName) {
13091308
name = declNameNode->getChild(1)->getText();
13101309
privateDiscriminator =
1311-
Ctx.getIdentifier(declNameNode->getChild(0)->getText());
1310+
getIdentifier(declNameNode->getChild(0)->getText());
13121311

13131312
} else if (declNameNode->getKind() ==
13141313
Demangle::Node::Kind::RelatedEntityDeclName) {
@@ -1336,14 +1335,14 @@ ASTBuilder::findDeclContext(NodePointer node) {
13361335
return nullptr;
13371336

13381337
for (auto *module : potentialModules)
1339-
if (auto typeDecl = findTypeDecl(module, Ctx.getIdentifier(name),
1338+
if (auto typeDecl = findTypeDecl(module, getIdentifier(name),
13401339
privateDiscriminator, node->getKind()))
13411340
return typeDecl;
13421341
return nullptr;
13431342
}
13441343

13451344
if (auto *dc = findDeclContext(child))
1346-
if (auto typeDecl = findTypeDecl(dc, Ctx.getIdentifier(name),
1345+
if (auto typeDecl = findTypeDecl(dc, getIdentifier(name),
13471346
privateDiscriminator, node->getKind()))
13481347
return typeDecl;
13491348

@@ -1542,7 +1541,7 @@ GenericTypeDecl *ASTBuilder::findForeignTypeDecl(StringRef name,
15421541
found);
15431542
break;
15441543
}
1545-
importer->lookupValue(Ctx.getIdentifier(name), consumer);
1544+
importer->lookupValue(getIdentifier(name), consumer);
15461545
if (consumer.Result)
15471546
consumer.Result = getAcceptableTypeDeclCandidate(consumer.Result, kind);
15481547
break;
@@ -1552,3 +1551,28 @@ GenericTypeDecl *ASTBuilder::findForeignTypeDecl(StringRef name,
15521551

15531552
return consumer.Result;
15541553
}
1554+
1555+
Identifier ASTBuilder::getIdentifier(StringRef name) {
1556+
if (name.size() > 1 && name.front() == '`' && name.back() == '`') {
1557+
// Raw identifiers have backticks affixed before mangling. We need to
1558+
// remove those before creating the Identifier for the AST, which doesn't
1559+
// encode the backticks.
1560+
std::string fixedName;
1561+
for (size_t i = 1; i < name.size() - 1; ++i) {
1562+
unsigned char ch = name[i];
1563+
// Raw identifiers have the space (U+0020) replaced with a non-breaking
1564+
// space (U+00A0, UTF-8: 0xC2 0xA0) in their mangling so that parts of
1565+
// the runtime that use space as a delimiter remain compatible with
1566+
// these identifiers. Flip it back.
1567+
if (ch == 0xc2 && i < name.size() - 2 &&
1568+
(unsigned char)name[i + 1] == 0xa0) {
1569+
fixedName.push_back(' ');
1570+
++i;
1571+
} else {
1572+
fixedName.push_back(ch);
1573+
}
1574+
}
1575+
return Ctx.getIdentifier(fixedName);
1576+
}
1577+
return Ctx.getIdentifier(name);
1578+
}

lib/AST/ASTMangler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,7 @@ void ASTMangler::appendDeclName(const ValueDecl *decl, DeclBaseName name) {
11461146
"synthesized type's original name must be a valid Swift identifier");
11471147
appendIdentifier(synthesizedTypeAttr->originalTypeName);
11481148
} else if (name.isOperator()) {
1149-
appendIdentifier(translateOperator(name.getIdentifier().str()));
1149+
appendIdentifier(translateOperator(name.getIdentifier().str()), /*allowRawIdentifiers=*/ false);
11501150
switch (decl->getAttrs().getUnaryOperatorKind()) {
11511151
case UnaryOperatorKind::Prefix:
11521152
appendOperator("op");
@@ -4719,7 +4719,7 @@ void ASTMangler::appendMacroExpansionContext(
47194719
appendIdentifier(origDC->getParentModule()->getName().str());
47204720

47214721
auto *SF = origDC->getParentSourceFile();
4722-
appendIdentifier(llvm::sys::path::filename(SF->getFilename()));
4722+
appendIdentifier(llvm::sys::path::filename(SF->getFilename()), /*allowRawIdentifiers=*/false);
47234723

47244724
auto lineColumn = sourceMgr.getLineAndColumnInBuffer(loc);
47254725
appendOperator("fMX", Index(lineColumn.first), Index(lineColumn.second));

lib/AST/ASTPrinter.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -558,10 +558,9 @@ ASTPrinter &operator<<(ASTPrinter &printer, tok keyword) {
558558
}
559559

560560
/// Determine whether to escape the given keyword in the given context.
561-
bool swift::escapeKeywordInContext(
562-
StringRef keyword,
563-
PrintNameContext context
564-
) {
561+
bool swift::escapeIdentifierInContext(Identifier name,
562+
PrintNameContext context) {
563+
StringRef keyword = name.str();
565564
bool isKeyword = llvm::StringSwitch<bool>(keyword)
566565
#define KEYWORD(KW) \
567566
.Case(#KW, true)
@@ -571,7 +570,7 @@ bool swift::escapeKeywordInContext(
571570
switch (context) {
572571
case PrintNameContext::Normal:
573572
case PrintNameContext::Attribute:
574-
return isKeyword;
573+
return isKeyword || name.mustAlwaysBeEscaped();
575574
case PrintNameContext::Keyword:
576575
case PrintNameContext::IntroducerKeyword:
577576
return false;
@@ -581,12 +580,12 @@ bool swift::escapeKeywordInContext(
581580
return isKeyword && keyword != "Self";
582581

583582
case PrintNameContext::TypeMember:
584-
return isKeyword || !canBeMemberName(keyword);
583+
return isKeyword || !canBeMemberName(keyword) || name.mustAlwaysBeEscaped();
585584

586585
case PrintNameContext::FunctionParameterExternal:
587586
case PrintNameContext::FunctionParameterLocal:
588587
case PrintNameContext::TupleElement:
589-
return !canBeArgumentLabel(keyword);
588+
return !canBeArgumentLabel(keyword) || name.mustAlwaysBeEscaped();
590589
}
591590

592591
llvm_unreachable("Unhandled PrintNameContext in switch.");
@@ -601,12 +600,12 @@ void ASTPrinter::printName(Identifier Name, PrintNameContext Context) {
601600
return;
602601
}
603602

604-
bool shouldEscapeKeyword = escapeKeywordInContext(Name.str(), Context);
603+
bool shouldEscapeIdentifier = escapeIdentifierInContext(Name, Context);
605604

606-
if (shouldEscapeKeyword)
605+
if (shouldEscapeIdentifier)
607606
*this << "`";
608607
*this << Name.str();
609-
if (shouldEscapeKeyword)
608+
if (shouldEscapeIdentifier)
610609
*this << "`";
611610

612611
printNamePost(Context);

lib/AST/Attr.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1533,7 +1533,8 @@ bool DeclAttribute::printImpl(ASTPrinter &Printer, const PrintOptions &Options,
15331533
StringRef nameText = name.getName().getString(buffer);
15341534
bool shouldEscape =
15351535
!name.getName().isSpecial() &&
1536-
(escapeKeywordInContext(nameText, PrintNameContext::Normal) ||
1536+
(escapeIdentifierInContext(name.getName().getBaseIdentifier(),
1537+
PrintNameContext::Normal) ||
15371538
nameText == "$");
15381539
Printer << "(";
15391540
if (shouldEscape)

lib/AST/Identifier.cpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, swift::ObjCSelector S) {
7878
return OS;
7979
}
8080

81-
bool Identifier::isOperatorSlow() const {
82-
StringRef data = str();
83-
auto *s = reinterpret_cast<llvm::UTF8 const *>(data.begin()),
84-
*end = reinterpret_cast<llvm::UTF8 const *>(data.end());
85-
llvm::UTF32 codePoint;
86-
llvm::ConversionResult res =
87-
llvm::convertUTF8Sequence(&s, end, &codePoint, llvm::strictConversion);
88-
assert(res == llvm::conversionOK && "invalid UTF-8 in identifier?!");
89-
(void)res;
90-
return !empty() && isOperatorStartCodePoint(codePoint);
81+
bool Identifier::isOperatorSlow() const { return Lexer::isOperator(str()); }
82+
83+
bool Identifier::mustAlwaysBeEscaped() const {
84+
return Lexer::identifierMustAlwaysBeEscaped(str());
9185
}
9286

9387
int Identifier::compare(Identifier other) const {

0 commit comments

Comments
 (0)