|
| 1 | +//===--- SyntaxRegexFallbackLexing.cpp ------------------------------------===// |
| 2 | +// |
| 3 | +// This source file is part of the Swift.org open source project |
| 4 | +// |
| 5 | +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors |
| 6 | +// Licensed under Apache License v2.0 with Runtime Library Exception |
| 7 | +// |
| 8 | +// See https://swift.org/LICENSE.txt for license information |
| 9 | +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| 10 | +// |
| 11 | +//===----------------------------------------------------------------------===// |
| 12 | + |
| 13 | +#include "swift/Parse/SyntaxRegexFallbackLexing.h" |
| 14 | +#include "swift/AST/DiagnosticEngine.h" |
| 15 | +#include "swift/AST/DiagnosticsParse.h" |
| 16 | +#include "swift/Parse/Lexer.h" |
| 17 | +#include "swift/Parse/RegexParserBridging.h" |
| 18 | +#include <mutex> |
| 19 | + |
| 20 | +using namespace swift; |
| 21 | + |
| 22 | +template <typename... DiagArgTypes, typename... ArgTypes> |
| 23 | +static void diagnose(BridgedOptionalDiagnosticEngine bridgedDiag, |
| 24 | + const char *ptr, Diag<DiagArgTypes...> DiagID, |
| 25 | + ArgTypes &&...Args) { |
| 26 | + if (auto *Diag = static_cast<DiagnosticEngine *>(bridgedDiag.object)) { |
| 27 | + Diag->diagnose(SourceLoc(llvm::SMLoc::getFromPointer(ptr)), DiagID, |
| 28 | + std::forward<ArgTypes>(Args)...); |
| 29 | + } |
| 30 | +} |
| 31 | + |
| 32 | +bool syntaxparse_lexRegexLiteral( |
| 33 | + const char **InputPtr, const char *BufferEnd, bool MustBeRegex, |
| 34 | + BridgedOptionalDiagnosticEngine BridgedDiagEngine) { |
| 35 | + |
| 36 | + const char *Ptr = *InputPtr; |
| 37 | + |
| 38 | + // Count leading '#'. |
| 39 | + while (*Ptr == '#') { |
| 40 | + ++Ptr; |
| 41 | + } |
| 42 | + if (*Ptr != '/') { |
| 43 | + // This wasn't a regex literal. |
| 44 | + return true; |
| 45 | + } |
| 46 | + |
| 47 | + unsigned customDelimiterLen = Ptr - *InputPtr; |
| 48 | + |
| 49 | + ++Ptr; |
| 50 | + |
| 51 | + // If the delimiter allows multi-line, try skipping over any whitespace to a |
| 52 | + // newline character. If we can do that, we enter multi-line mode. |
| 53 | + bool allowsMultiline = customDelimiterLen != 0; |
| 54 | + const char *firstNewline = nullptr; |
| 55 | + if (allowsMultiline) { |
| 56 | + while (Ptr != BufferEnd) { |
| 57 | + switch (*Ptr) { |
| 58 | + case ' ': |
| 59 | + case '\t': |
| 60 | + ++Ptr; |
| 61 | + continue; |
| 62 | + case '\r': |
| 63 | + case '\n': |
| 64 | + firstNewline = Ptr; |
| 65 | + break; |
| 66 | + default: |
| 67 | + break; |
| 68 | + } |
| 69 | + break; |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + while (true) { |
| 74 | + switch (*Ptr++) { |
| 75 | + case '\r': |
| 76 | + case '\n': |
| 77 | + if (firstNewline == nullptr) { |
| 78 | + // not a multiline literal |
| 79 | + diagnose(BridgedDiagEngine, Ptr, diag::lex_regex_literal_unterminated); |
| 80 | + *InputPtr = Ptr - 1; |
| 81 | + return false; |
| 82 | + } |
| 83 | + break; |
| 84 | + case '\\': |
| 85 | + if (Ptr != BufferEnd) { |
| 86 | + if (validateUTF8CharacterAndAdvance(Ptr, BufferEnd) == ~0U) |
| 87 | + diagnose(BridgedDiagEngine, Ptr, diag::lex_invalid_utf8); |
| 88 | + } |
| 89 | + break; |
| 90 | + case '/': { |
| 91 | + const char *AfterSlashPos = Ptr; |
| 92 | + |
| 93 | + // Eat '#' up to the open delimeter length. |
| 94 | + while (*Ptr == '#' && (Ptr - AfterSlashPos) <= customDelimiterLen) { |
| 95 | + ++Ptr; |
| 96 | + } |
| 97 | + |
| 98 | + if ((Ptr - AfterSlashPos) != customDelimiterLen) { |
| 99 | + // '#' count didn't match. Reset the cursor after the '/' and move on. |
| 100 | + Ptr = AfterSlashPos; |
| 101 | + break; |
| 102 | + } |
| 103 | + |
| 104 | + // Found the closing delimiter. Finish. |
| 105 | + *InputPtr = Ptr; |
| 106 | + return false; |
| 107 | + } |
| 108 | + case '\0': { |
| 109 | + if (Ptr - 1 == BufferEnd) { |
| 110 | + // Reached to EOF. |
| 111 | + diagnose(BridgedDiagEngine, Ptr, diag::lex_regex_literal_unterminated); |
| 112 | + // In multi-line mode, we don't want to skip over what is likely |
| 113 | + // otherwise valid Swift code, so resume from the first newline. |
| 114 | + *InputPtr = firstNewline ? firstNewline : (Ptr - 1); |
| 115 | + return false; |
| 116 | + } |
| 117 | + |
| 118 | + // TODO: Warn to match the behavior of String literal lexer? |
| 119 | + // For now, just ignore them. |
| 120 | + break; |
| 121 | + } |
| 122 | + default: { |
| 123 | + --Ptr; |
| 124 | + if (validateUTF8CharacterAndAdvance(Ptr, BufferEnd) == ~0U) |
| 125 | + diagnose(BridgedDiagEngine, Ptr, diag::lex_invalid_utf8); |
| 126 | + break; |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | +} |
| 131 | + |
| 132 | +bool syntaxparse_parseRegexLiteral(const char *InputPtr, unsigned *VersionOut, |
| 133 | + void *CaptureStructureOut, |
| 134 | + unsigned CaptureStructureSize, |
| 135 | + BridgedSourceLoc DiagnosticBaseLoc, |
| 136 | + BridgedDiagnosticEngine BridgedDiagEngine) { |
| 137 | + *VersionOut = ~0u; |
| 138 | + return /*hasError*/ false; |
| 139 | +} |
| 140 | + |
| 141 | +void swift::registerSyntaxFallbackRegexParser() { |
| 142 | + static std::once_flag flag; |
| 143 | + std::call_once(flag, []() { |
| 144 | + Parser_registerRegexLiteralLexingFn(syntaxparse_lexRegexLiteral); |
| 145 | + Parser_registerRegexLiteralParsingFn(syntaxparse_parseRegexLiteral); |
| 146 | + }); |
| 147 | +} |
0 commit comments