Skip to content

Commit 3ee8034

Browse files
authored
Merge pull request #58997 from ahoppen/pr/regex-literal-parsing-swiftsyntax
[SwiftSyntax] Parse regex literals using a fallback lexer implemented in C++
2 parents 7a49320 + 1d750f9 commit 3ee8034

File tree

8 files changed

+302
-19
lines changed

8 files changed

+302
-19
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===--- SyntaxRegexFallbackLexing.h --------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
namespace swift {
14+
/// SwiftSyntax parsing currently doesn't link against
15+
/// SwiftExperimentalStringProcessing and is thus missing the regex lexing
16+
/// functions defined in it. This registers a fallback regex-lexing function
17+
/// implemented in C++ that is sufficient to generate a valid SwiftSyntax tree.
18+
/// The regex parser registered by this function will accept all regex literals
19+
/// and is not suited for normal compilation.
20+
void registerSyntaxFallbackRegexParser();
21+
} // end namespace swift

lib/Parse/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ add_swift_host_library(swiftParse STATIC
2424
ParseType.cpp
2525
PersistentParserState.cpp
2626
SyntaxParsingCache.cpp
27-
SyntaxParsingContext.cpp)
27+
SyntaxParsingContext.cpp
28+
SyntaxRegexFallbackLexing.cpp)
2829
_swift_gyb_target_sources(swiftParse PRIVATE
2930
ParsedSyntaxBuilders.cpp.gyb
3031
ParsedSyntaxNodes.cpp.gyb
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
//===--- SyntaxRegexFallbackLexing.cpp ------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "swift/Parse/SyntaxRegexFallbackLexing.h"
14+
#include "swift/AST/DiagnosticEngine.h"
15+
#include "swift/AST/DiagnosticsParse.h"
16+
#include "swift/Parse/Lexer.h"
17+
#include "swift/Parse/RegexParserBridging.h"
18+
#include <mutex>
19+
20+
using namespace swift;
21+
22+
template <typename... DiagArgTypes, typename... ArgTypes>
23+
static void diagnose(BridgedOptionalDiagnosticEngine bridgedDiag,
24+
const char *ptr, Diag<DiagArgTypes...> DiagID,
25+
ArgTypes &&...Args) {
26+
if (auto *Diag = static_cast<DiagnosticEngine *>(bridgedDiag.object)) {
27+
Diag->diagnose(SourceLoc(llvm::SMLoc::getFromPointer(ptr)), DiagID,
28+
std::forward<ArgTypes>(Args)...);
29+
}
30+
}
31+
32+
bool syntaxparse_lexRegexLiteral(
33+
const char **InputPtr, const char *BufferEnd, bool MustBeRegex,
34+
BridgedOptionalDiagnosticEngine BridgedDiagEngine) {
35+
36+
const char *Ptr = *InputPtr;
37+
38+
// Count leading '#'.
39+
while (*Ptr == '#') {
40+
++Ptr;
41+
}
42+
if (*Ptr != '/') {
43+
// This wasn't a regex literal.
44+
return true;
45+
}
46+
47+
unsigned customDelimiterLen = Ptr - *InputPtr;
48+
49+
++Ptr;
50+
51+
// If the delimiter allows multi-line, try skipping over any whitespace to a
52+
// newline character. If we can do that, we enter multi-line mode.
53+
bool allowsMultiline = customDelimiterLen != 0;
54+
const char *firstNewline = nullptr;
55+
if (allowsMultiline) {
56+
while (Ptr != BufferEnd) {
57+
switch (*Ptr) {
58+
case ' ':
59+
case '\t':
60+
++Ptr;
61+
continue;
62+
case '\r':
63+
case '\n':
64+
firstNewline = Ptr;
65+
break;
66+
default:
67+
break;
68+
}
69+
break;
70+
}
71+
}
72+
73+
bool isMultilineLiteral = (firstNewline != nullptr);
74+
75+
while (true) {
76+
switch (*Ptr++) {
77+
case '\r':
78+
case '\n':
79+
if (!isMultilineLiteral) {
80+
diagnose(BridgedDiagEngine, Ptr, diag::lex_regex_literal_unterminated);
81+
*InputPtr = Ptr - 1;
82+
return false;
83+
}
84+
break;
85+
case '\\':
86+
if (Ptr != BufferEnd) {
87+
if (!isMultilineLiteral && (*Ptr == '\r' || *Ptr == '\n')) {
88+
diagnose(BridgedDiagEngine, Ptr, diag::lex_regex_literal_unterminated);
89+
*InputPtr = Ptr - 1;
90+
return false;
91+
}
92+
if (validateUTF8CharacterAndAdvance(Ptr, BufferEnd) == ~0U)
93+
diagnose(BridgedDiagEngine, Ptr, diag::lex_invalid_utf8);
94+
}
95+
break;
96+
case '/': {
97+
const char *AfterSlashPos = Ptr;
98+
99+
// Eat '#' up to the open delimeter length.
100+
while (*Ptr == '#' && (Ptr - AfterSlashPos) <= customDelimiterLen) {
101+
++Ptr;
102+
}
103+
104+
if ((Ptr - AfterSlashPos) != customDelimiterLen) {
105+
// '#' count didn't match. Reset the cursor after the '/' and move on.
106+
Ptr = AfterSlashPos;
107+
break;
108+
}
109+
110+
// Found the closing delimiter. Finish.
111+
*InputPtr = Ptr;
112+
return false;
113+
}
114+
case '\0': {
115+
if (Ptr - 1 == BufferEnd) {
116+
// Reached to EOF.
117+
diagnose(BridgedDiagEngine, Ptr, diag::lex_regex_literal_unterminated);
118+
// In multi-line mode, we don't want to skip over what is likely
119+
// otherwise valid Swift code, so resume from the first newline.
120+
*InputPtr = firstNewline ? firstNewline : (Ptr - 1);
121+
return false;
122+
}
123+
124+
// TODO: Warn to match the behavior of String literal lexer?
125+
// For now, just ignore them.
126+
break;
127+
}
128+
default: {
129+
--Ptr;
130+
if (validateUTF8CharacterAndAdvance(Ptr, BufferEnd) == ~0U)
131+
diagnose(BridgedDiagEngine, Ptr, diag::lex_invalid_utf8);
132+
break;
133+
}
134+
}
135+
}
136+
}
137+
138+
bool syntaxparse_parseRegexLiteral(const char *InputPtr, unsigned *VersionOut,
139+
void *CaptureStructureOut,
140+
unsigned CaptureStructureSize,
141+
BridgedSourceLoc DiagnosticBaseLoc,
142+
BridgedDiagnosticEngine BridgedDiagEngine) {
143+
*VersionOut = ~0u;
144+
return /*hasError*/ false;
145+
}
146+
147+
void swift::registerSyntaxFallbackRegexParser() {
148+
static std::once_flag flag;
149+
std::call_once(flag, []() {
150+
Parser_registerRegexLiteralLexingFn(syntaxparse_lexRegexLiteral);
151+
Parser_registerRegexLiteralParsingFn(syntaxparse_parseRegexLiteral);
152+
});
153+
}

test/Syntax/round_trip_regex.swift

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %empty-directory(%t)
2+
// RUN: %swift-syntax-test -input-source-filename %s -parse-gen -fail-on-parse-error > %t/afterRoundtrip.swift
3+
// RUN: diff -u %s %t/afterRoundtrip.swift
4+
5+
_ = /abc/
6+
_ = #/abc/#
7+
_ = ##/abc/##
8+
9+
func foo<T>(_ x: T...) {}
10+
foo(/abc/, #/abc/#, ##/abc/##)
11+
12+
let arr = [/abc/, #/abc/#, ##/abc/##]
13+
14+
_ = /\w+/.self
15+
_ = #/\w+/#.self
16+
_ = ##/\w+/##.self
17+
18+
_ = /#\/\#\\/
19+
_ = #/#/\/\#\\/#
20+
_ = ##/#|\|\#\\/##
21+
22+
_ = #/
23+
multiline
24+
/#
25+
26+
_ = #/
27+
double
28+
multiline
29+
/#
30+
31+
_ = #/
32+
\
33+
/#
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %empty-directory(%t)
2+
// RUN: not %swift-syntax-test -input-source-filename %s -parse-gen -fail-on-parse-error > %t/afterRoundtrip.swift 2> %t/errors.swift
3+
// RUN: diff -u %s %t/afterRoundtrip.swift
4+
// RUN: cat %t/errors.swift | %FileCheck %s
5+
6+
// Escaping newlines is not supported
7+
_ = /\
8+
/
9+
10+
// CHECK: 7:7: error: unterminated regex literal
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// RUN: %empty-directory(%t)
2+
// RUN: %swift-syntax-test -input-source-filename %s -parse-gen > %t/afterRoundtrip.swift
3+
// RUN: diff -u %s %t/afterRoundtrip.swift
4+
5+
_ = /abc
6+
_ = #/abc
7+
_ = #/abc/
8+
_ = ##/abc/#
9+
10+
_ #/x
11+
/#
12+
_ #/
13+
x/#
14+
15+
_ //#
16+
_ /x/#

tools/libSwiftSyntaxParser/libSwiftSyntaxParser.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "swift/Basic/SourceManager.h"
2222
#include "swift/Parse/Parser.h"
2323
#include "swift/Parse/SyntaxParseActions.h"
24+
#include "swift/Parse/SyntaxRegexFallbackLexing.h"
2425
#include "swift/Syntax/Serialization/SyntaxSerialization.h"
2526
#include "swift/Syntax/SyntaxNodes.h"
2627
#include "swift/Subsystems.h"
@@ -478,6 +479,7 @@ struct SynParserDiagConsumer: public DiagnosticConsumer {
478479
};
479480

480481
swiftparse_client_node_t SynParser::parse(const char *source, size_t len) {
482+
registerSyntaxFallbackRegexParser();
481483
SourceManager SM;
482484
unsigned bufID = SM.addNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(
483485
StringRef(source, len), "syntax_parse_source"));
@@ -489,6 +491,10 @@ swiftparse_client_node_t SynParser::parse(const char *source, size_t len) {
489491
// Not ready yet:
490492
// langOpts.EnableASTScopeLookup = true;
491493

494+
// Always enable bare /.../ regex literal in syntax parser.
495+
langOpts.EnableExperimentalStringProcessing = true;
496+
langOpts.EnableBareSlashRegexLiterals = true;
497+
492498
auto parseActions =
493499
std::make_shared<CLibParseActions>(*this, SM, bufID);
494500
// We have to use SourceFileKind::Main to avoid diagnostics like

0 commit comments

Comments
 (0)