|
| 1 | +// |
| 2 | +// Hoa |
| 3 | +// |
| 4 | +// |
| 5 | +// @license |
| 6 | +// |
| 7 | +// New BSD License |
| 8 | +// |
| 9 | +// Copyright © 2007-2017, Hoa community. All rights reserved. |
| 10 | +// |
| 11 | +// Redistribution and use in source and binary forms, with or without |
| 12 | +// modification, are permitted provided that the following conditions are met: |
| 13 | +// * Redistributions of source code must retain the above copyright |
| 14 | +// notice, this list of conditions and the following disclaimer. |
| 15 | +// * Redistributions in binary form must reproduce the above copyright |
| 16 | +// notice, this list of conditions and the following disclaimer in the |
| 17 | +// documentation and/or other materials provided with the distribution. |
| 18 | +// * Neither the name of the Hoa nor the names of its contributors may be |
| 19 | +// used to endorse or promote products derived from this software without |
| 20 | +// specific prior written permission. |
| 21 | +// |
| 22 | +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 23 | +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 24 | +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 25 | +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE |
| 26 | +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 27 | +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 28 | +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 29 | +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 30 | +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 31 | +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 32 | +// POSSIBILITY OF SUCH DAMAGE. |
| 33 | +// |
| 34 | +// Grammar \Hoa\Regex\Grammar. |
| 35 | +// |
| 36 | +// Provide grammar of PCRE (Perl Compatible Regular Expression)for the LL(k) |
| 37 | +// parser. More informations at http://pcre.org/pcre.txt, sections pcrepattern & |
| 38 | +// pcresyntax. |
| 39 | +// |
| 40 | +// @copyright Copyright © 2007-2017 Hoa community. |
| 41 | +// @license New BSD License |
| 42 | +// |
| 43 | + |
| 44 | + |
| 45 | +// Skip. |
| 46 | +%skip nl \n |
| 47 | + |
| 48 | +// Character classes. |
| 49 | +%token negative_class_ \[\^ |
| 50 | +%token class_ \[ |
| 51 | +%token _class \] |
| 52 | +%token range \- |
| 53 | + |
| 54 | +// Internal options. |
| 55 | +%token internal_option \(\?[\-+]?[imsx]\) |
| 56 | + |
| 57 | +// Lookahead and lookbehind assertions. |
| 58 | +%token lookahead_ \(\?= |
| 59 | +%token negative_lookahead_ \(\?! |
| 60 | +%token lookbehind_ \(\?<= |
| 61 | +%token negative_lookbehind_ \(\?<! |
| 62 | + |
| 63 | +// Conditions. |
| 64 | +%token named_reference_ \(\?\(< -> nc |
| 65 | +%token absolute_reference_ \(\?\((?=\d) -> c |
| 66 | +%token relative_reference_ \(\?\((?=[\+\-]) -> c |
| 67 | +%token c:index [\+\-]?\d+ -> default |
| 68 | +%token assertion_reference_ \(\?\( |
| 69 | + |
| 70 | +// Comments. |
| 71 | +%token comment_ \(\?# -> co |
| 72 | +%token co:_comment \) -> default |
| 73 | +%token co:comment .*?(?=(?<!\\)\)) |
| 74 | + |
| 75 | +// Capturing group. |
| 76 | +%token named_capturing_ \(\?< -> nc |
| 77 | +%token nc:_named_capturing > -> default |
| 78 | +%token nc:capturing_name .+?(?=(?<!\\)>) |
| 79 | +%token non_capturing_ \(\?: |
| 80 | +%token non_capturing_reset_ \(\?\| |
| 81 | +%token atomic_group_ \(\?> |
| 82 | +%token capturing_ \( |
| 83 | +%token _capturing \) |
| 84 | + |
| 85 | +// Quantifiers (by default, greedy). |
| 86 | +%token zero_or_one_possessive \?\+ |
| 87 | +%token zero_or_one_lazy \?\? |
| 88 | +%token zero_or_one \? |
| 89 | +%token zero_or_more_possessive \*\+ |
| 90 | +%token zero_or_more_lazy \*\? |
| 91 | +%token zero_or_more \* |
| 92 | +%token one_or_more_possessive \+\+ |
| 93 | +%token one_or_more_lazy \+\? |
| 94 | +%token one_or_more \+ |
| 95 | +%token exactly_n \{[0-9]+\} |
| 96 | +%token n_to_m_possessive \{[0-9]+,[0-9]+\}\+ |
| 97 | +%token n_to_m_lazy \{[0-9]+,[0-9]+\}\? |
| 98 | +%token n_to_m \{[0-9]+,[0-9]+\} |
| 99 | +%token n_or_more_possessive \{[0-9]+,\}\+ |
| 100 | +%token n_or_more_lazy \{[0-9]+,\}\? |
| 101 | +%token n_or_more \{[0-9]+,\} |
| 102 | + |
| 103 | +// Alternation. |
| 104 | +%token alternation \| |
| 105 | + |
| 106 | +// Literal. |
| 107 | +%token character \\([aefnrt]|c[\x00-\x7f]) |
| 108 | +%token dynamic_character \\([0-7]{3}|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+}) |
| 109 | +// Please, see PCRESYNTAX(3), General Category properties, PCRE special category |
| 110 | +// properties and script names for \p{} and \P{}. |
| 111 | +%token character_type \\([CdDhHNRsSvVwWX]|[pP]{[^}]+}) |
| 112 | +%token anchor \\(bBAZzG)|\^|\$ |
| 113 | +%token match_point_reset \\K |
| 114 | +%token literal \\.|. |
| 115 | + |
| 116 | + |
| 117 | +// Rules. |
| 118 | + |
| 119 | +#expression: |
| 120 | + alternation() |
| 121 | + |
| 122 | +alternation: |
| 123 | + concatenation() ( ::alternation:: concatenation() #alternation )* |
| 124 | + |
| 125 | +concatenation: |
| 126 | + ( internal_options() | assertion() | quantification() | condition() ) |
| 127 | + ( ( internal_options() | assertion() | quantification() | condition() ) #concatenation )* |
| 128 | + |
| 129 | +#internal_options: |
| 130 | + <internal_option> |
| 131 | + |
| 132 | +#condition: |
| 133 | + ( |
| 134 | + ::named_reference_:: <capturing_name> ::_named_capturing:: #namedcondition |
| 135 | + | ( |
| 136 | + ::relative_reference_:: #relativecondition |
| 137 | + | ::absolute_reference_:: #absolutecondition |
| 138 | + ) |
| 139 | + <index> |
| 140 | + | ::assertion_reference_:: alternation() #assertioncondition |
| 141 | + ) |
| 142 | + ::_capturing:: concatenation()? |
| 143 | + ( ::alternation:: concatenation()? )? |
| 144 | + ::_capturing:: |
| 145 | + |
| 146 | +assertion: |
| 147 | + ( |
| 148 | + ::lookahead_:: #lookahead |
| 149 | + | ::negative_lookahead_:: #negativelookahead |
| 150 | + | ::lookbehind_:: #lookbehind |
| 151 | + | ::negative_lookbehind_:: #negativelookbehind |
| 152 | + ) |
| 153 | + alternation() ::_capturing:: |
| 154 | + |
| 155 | +quantification: |
| 156 | + ( class() | simple() ) ( quantifier() #quantification )? |
| 157 | + |
| 158 | +quantifier: |
| 159 | + <zero_or_one_possessive> | <zero_or_one_lazy> | <zero_or_one> |
| 160 | + | <zero_or_more_possessive> | <zero_or_more_lazy> | <zero_or_more> |
| 161 | + | <one_or_more_possessive> | <one_or_more_lazy> | <one_or_more> |
| 162 | + | <exactly_n> |
| 163 | + | <n_to_m_possessive> | <n_to_m_lazy> | <n_to_m> |
| 164 | + | <n_or_more_possessive> | <n_or_more_lazy> | <n_or_more> |
| 165 | + |
| 166 | +#class: |
| 167 | + ( |
| 168 | + ::negative_class_:: #negativeclass |
| 169 | + | ::class_:: |
| 170 | + ) |
| 171 | + ( range() | literal() )+ |
| 172 | + ::_class:: |
| 173 | + |
| 174 | +#range: |
| 175 | + literal() ::range:: literal() |
| 176 | + |
| 177 | +simple: |
| 178 | + capturing() |
| 179 | + | literal() |
| 180 | + |
| 181 | +capturing: |
| 182 | + ::comment_:: <comment>? ::_comment:: #comment |
| 183 | + | ( |
| 184 | + ::named_capturing_:: <capturing_name> ::_named_capturing:: #namedcapturing |
| 185 | + | ::non_capturing_:: #noncapturing |
| 186 | + | ::non_capturing_reset_:: #noncapturingreset |
| 187 | + | ::atomic_group_:: #atomicgroup |
| 188 | + | ::capturing_:: |
| 189 | + ) |
| 190 | + alternation() ::_capturing:: |
| 191 | + |
| 192 | +literal: |
| 193 | + <character> |
| 194 | + | <dynamic_character> |
| 195 | + | <character_type> |
| 196 | + | <anchor> |
| 197 | + | <match_point_reset> |
| 198 | + | <literal> |
0 commit comments