|
| 1 | +/** |
| 2 | + * @license |
| 3 | + * Copyright Google LLC All Rights Reserved. |
| 4 | + * |
| 5 | + * Use of this source code is governed by an MIT-style license that can be |
| 6 | + * found in the LICENSE file at https://angular.io/license |
| 7 | + */ |
| 8 | + |
| 9 | +/** |
| 10 | + * Split the given `text` into an array of "static strings" and ICU "placeholder names". |
| 11 | + * |
| 12 | + * This is required because ICU expressions in `$localize` tagged messages may contain "dynamic" |
| 13 | + * piece (e.g. interpolations or element markers). These markers need to be translated to |
| 14 | + * placeholders in extracted translation files. So we must parse ICUs to identify them and separate |
| 15 | + * them out so that the translation serializers can render them appropriately. |
| 16 | + * |
| 17 | + * An example of an ICU with interpolations: |
| 18 | + * |
| 19 | + * ``` |
| 20 | + * {VAR_PLURAL, plural, one {{INTERPOLATION}} other {{INTERPOLATION_1} post}} |
| 21 | + * ``` |
| 22 | + * |
| 23 | + * In this ICU, `INTERPOLATION` and `INTERPOLATION_1` are actually placeholders that will be |
| 24 | + * replaced with dynamic content at runtime. |
| 25 | + * |
| 26 | + * Such placeholders are identifiable as text wrapped in curly braces, within an ICU case |
| 27 | + * expression. |
| 28 | + * |
| 29 | + * To complicate matters, it is possible for ICUs to be nested indefinitely within each other. In |
| 30 | + * such cases, the nested ICU expression appears enclosed in a set of curly braces in the same way |
| 31 | + * as a placeholder. The nested ICU expressions can be differentiated from placeholders as they |
| 32 | + * contain a comma `,`, which separates the ICU value from the ICU type. |
| 33 | + * |
| 34 | + * Furthermore, nested ICUs can have placeholders of their own, which need to be extracted. |
| 35 | + * |
| 36 | + * An example of a nested ICU containing its own placeholders: |
| 37 | + * |
| 38 | + * ``` |
| 39 | + * {VAR_SELECT_1, select, |
| 40 | + * invoice {Invoice for {INTERPOLATION}} |
| 41 | + * payment {{VAR_SELECT, select, |
| 42 | + * processor {Payment gateway} |
| 43 | + * other {{INTERPOLATION_1}} |
| 44 | + * }} |
| 45 | + * ``` |
| 46 | + * |
| 47 | + * @param text Text to be broken. |
| 48 | + * @returns an array of strings, where |
| 49 | + * - even values are static strings (e.g. 0, 2, 4, etc) |
| 50 | + * - odd values are placeholder names (e.g. 1, 3, 5, etc) |
| 51 | + */ |
| 52 | +export function extractIcuPlaceholders(text: string): string[] { |
| 53 | + const state = new StateStack(); |
| 54 | + const pieces = new IcuPieces(); |
| 55 | + const braces = /[{}]/g; |
| 56 | + |
| 57 | + let lastPos = 0; |
| 58 | + let match: RegExpMatchArray|null; |
| 59 | + while (match = braces.exec(text)) { |
| 60 | + if (match[0] == '{') { |
| 61 | + state.enterBlock(); |
| 62 | + } else { |
| 63 | + // We must have hit a `}` |
| 64 | + state.leaveBlock(); |
| 65 | + } |
| 66 | + |
| 67 | + if (state.getCurrent() === 'placeholder') { |
| 68 | + const name = tryParsePlaceholder(text, braces.lastIndex); |
| 69 | + if (name) { |
| 70 | + // We found a placeholder so store it in the pieces; |
| 71 | + // store the current static text (minus the opening curly brace); |
| 72 | + // skip the closing brace and leave the placeholder block. |
| 73 | + pieces.addText(text.substring(lastPos, braces.lastIndex - 1)); |
| 74 | + pieces.addPlaceholder(name); |
| 75 | + braces.lastIndex += name.length + 1; |
| 76 | + state.leaveBlock(); |
| 77 | + } else { |
| 78 | + // This is not a placeholder, so it must be a nested ICU; |
| 79 | + // store the current static text (including the opening curly brace). |
| 80 | + pieces.addText(text.substring(lastPos, braces.lastIndex)); |
| 81 | + state.nestedIcu(); |
| 82 | + } |
| 83 | + } else { |
| 84 | + pieces.addText(text.substring(lastPos, braces.lastIndex)); |
| 85 | + } |
| 86 | + lastPos = braces.lastIndex; |
| 87 | + } |
| 88 | + |
| 89 | + // Capture the last piece of text after the ICUs (if any). |
| 90 | + pieces.addText(text.substring(lastPos)); |
| 91 | + return pieces.toArray(); |
| 92 | +} |
| 93 | + |
| 94 | +/** |
| 95 | + * A helper class to store the pieces ("static text" or "placeholder name") in an ICU. |
| 96 | + */ |
| 97 | +class IcuPieces { |
| 98 | + private pieces: string[] = ['']; |
| 99 | + |
| 100 | + /** |
| 101 | + * Add the given `text` to the current "static text" piece. |
| 102 | + * |
| 103 | + * Sequential calls to `addText()` will append to the current text piece. |
| 104 | + */ |
| 105 | + addText(text: string): void { |
| 106 | + this.pieces[this.pieces.length - 1] += text; |
| 107 | + } |
| 108 | + |
| 109 | + /** |
| 110 | + * Add the given placeholder `name` to the stored pieces. |
| 111 | + */ |
| 112 | + addPlaceholder(name: string): void { |
| 113 | + this.pieces.push(name); |
| 114 | + this.pieces.push(''); |
| 115 | + } |
| 116 | + |
| 117 | + /** |
| 118 | + * Return the stored pieces as an array of strings. |
| 119 | + * |
| 120 | + * Even values are static strings (e.g. 0, 2, 4, etc) |
| 121 | + * Odd values are placeholder names (e.g. 1, 3, 5, etc) |
| 122 | + */ |
| 123 | + toArray(): string[] { |
| 124 | + return this.pieces; |
| 125 | + } |
| 126 | +} |
| 127 | + |
| 128 | +/** |
| 129 | + * A helper class to track the current state of parsing the strings for ICU placeholders. |
| 130 | + * |
| 131 | + * State changes happen when we enter or leave a curly brace block. |
| 132 | + * Since ICUs can be nested the state is stored as a stack. |
| 133 | + */ |
| 134 | +class StateStack { |
| 135 | + private stack: ParserState[] = []; |
| 136 | + |
| 137 | + /** |
| 138 | + * Update the state upon entering a block. |
| 139 | + * |
| 140 | + * The new state is computed from the current state and added to the stack. |
| 141 | + */ |
| 142 | + enterBlock(): void { |
| 143 | + const current = this.getCurrent(); |
| 144 | + switch (current) { |
| 145 | + case 'icu': |
| 146 | + this.stack.push('case'); |
| 147 | + break; |
| 148 | + case 'case': |
| 149 | + this.stack.push('placeholder'); |
| 150 | + break; |
| 151 | + case 'placeholder': |
| 152 | + this.stack.push('case'); |
| 153 | + break; |
| 154 | + default: |
| 155 | + this.stack.push('icu'); |
| 156 | + break; |
| 157 | + } |
| 158 | + } |
| 159 | + |
| 160 | + /** |
| 161 | + * Update the state upon leaving a block. |
| 162 | + * |
| 163 | + * The previous state is popped off the stack. |
| 164 | + */ |
| 165 | + leaveBlock(): ParserState { |
| 166 | + return this.stack.pop(); |
| 167 | + } |
| 168 | + |
| 169 | + /** |
| 170 | + * Update the state upon arriving at a nested ICU. |
| 171 | + * |
| 172 | + * In this case, the current state of "placeholder" is incorrect, so this is popped off and the |
| 173 | + * correct "icu" state is stored. |
| 174 | + */ |
| 175 | + nestedIcu(): void { |
| 176 | + const current = this.stack.pop(); |
| 177 | + assert(current === 'placeholder', 'A nested ICU must replace a placeholder but got ' + current); |
| 178 | + this.stack.push('icu'); |
| 179 | + } |
| 180 | + |
| 181 | + /** |
| 182 | + * Get the current (most recent) state from the stack. |
| 183 | + */ |
| 184 | + getCurrent() { |
| 185 | + return this.stack[this.stack.length - 1]; |
| 186 | + } |
| 187 | +} |
| 188 | +type ParserState = 'icu'|'case'|'placeholder'|undefined; |
| 189 | + |
| 190 | +/** |
| 191 | + * Attempt to parse a simple placeholder name from a curly braced block. |
| 192 | + * |
| 193 | + * If the block contains a comma `,` then it cannot be a placeholder - and is probably a nest ICU |
| 194 | + * instead. |
| 195 | + * |
| 196 | + * @param text the whole string that is being parsed. |
| 197 | + * @param start the index of the character in the `text` string where this placeholder may start. |
| 198 | + * @returns the placeholder name or `null` if it is not a placeholder. |
| 199 | + */ |
| 200 | +function tryParsePlaceholder(text: string, start: number): string|null { |
| 201 | + for (let i = start; i < text.length; i++) { |
| 202 | + if (text[i] === ',') { |
| 203 | + break; |
| 204 | + } |
| 205 | + if (text[i] === '}') { |
| 206 | + return text.substring(start, i); |
| 207 | + } |
| 208 | + } |
| 209 | + return null; |
| 210 | +} |
| 211 | + |
| 212 | +function assert(test: boolean, message: string): void { |
| 213 | + if (!test) { |
| 214 | + throw new Error('Assertion failure: ' + message); |
| 215 | + } |
| 216 | +} |
0 commit comments