Skip to content

Commit 633201e

Browse files
petebacondarwinprofanis
authored andcommitted
fix(localize): render ICU placeholders in extracted translation files (angular#38484)
Previously placeholders were only rendered for dynamic interpolation expressons in `$localize` tagged strings. But there are also potentially dynamic values in ICU expressions too, so we need to render these as placeholders when extracting i18n messages into translation files. PR Close angular#38484
1 parent 4451408 commit 633201e

File tree

10 files changed

+437
-32
lines changed

10 files changed

+437
-32
lines changed
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
/**
2+
* @license
3+
* Copyright Google LLC All Rights Reserved.
4+
*
5+
* Use of this source code is governed by an MIT-style license that can be
6+
* found in the LICENSE file at https://angular.io/license
7+
*/
8+
9+
/**
10+
* Split the given `text` into an array of "static strings" and ICU "placeholder names".
11+
*
12+
* This is required because ICU expressions in `$localize` tagged messages may contain "dynamic"
13+
* piece (e.g. interpolations or element markers). These markers need to be translated to
14+
* placeholders in extracted translation files. So we must parse ICUs to identify them and separate
15+
* them out so that the translation serializers can render them appropriately.
16+
*
17+
* An example of an ICU with interpolations:
18+
*
19+
* ```
20+
* {VAR_PLURAL, plural, one {{INTERPOLATION}} other {{INTERPOLATION_1} post}}
21+
* ```
22+
*
23+
* In this ICU, `INTERPOLATION` and `INTERPOLATION_1` are actually placeholders that will be
24+
* replaced with dynamic content at runtime.
25+
*
26+
* Such placeholders are identifiable as text wrapped in curly braces, within an ICU case
27+
* expression.
28+
*
29+
* To complicate matters, it is possible for ICUs to be nested indefinitely within each other. In
30+
* such cases, the nested ICU expression appears enclosed in a set of curly braces in the same way
31+
* as a placeholder. The nested ICU expressions can be differentiated from placeholders as they
32+
* contain a comma `,`, which separates the ICU value from the ICU type.
33+
*
34+
* Furthermore, nested ICUs can have placeholders of their own, which need to be extracted.
35+
*
36+
* An example of a nested ICU containing its own placeholders:
37+
*
38+
* ```
39+
* {VAR_SELECT_1, select,
40+
* invoice {Invoice for {INTERPOLATION}}
41+
* payment {{VAR_SELECT, select,
42+
* processor {Payment gateway}
43+
* other {{INTERPOLATION_1}}
44+
* }}
45+
* ```
46+
*
47+
* @param text Text to be broken.
48+
* @returns an array of strings, where
49+
* - even values are static strings (e.g. 0, 2, 4, etc)
50+
* - odd values are placeholder names (e.g. 1, 3, 5, etc)
51+
*/
52+
export function extractIcuPlaceholders(text: string): string[] {
53+
const state = new StateStack();
54+
const pieces = new IcuPieces();
55+
const braces = /[{}]/g;
56+
57+
let lastPos = 0;
58+
let match: RegExpMatchArray|null;
59+
while (match = braces.exec(text)) {
60+
if (match[0] == '{') {
61+
state.enterBlock();
62+
} else {
63+
// We must have hit a `}`
64+
state.leaveBlock();
65+
}
66+
67+
if (state.getCurrent() === 'placeholder') {
68+
const name = tryParsePlaceholder(text, braces.lastIndex);
69+
if (name) {
70+
// We found a placeholder so store it in the pieces;
71+
// store the current static text (minus the opening curly brace);
72+
// skip the closing brace and leave the placeholder block.
73+
pieces.addText(text.substring(lastPos, braces.lastIndex - 1));
74+
pieces.addPlaceholder(name);
75+
braces.lastIndex += name.length + 1;
76+
state.leaveBlock();
77+
} else {
78+
// This is not a placeholder, so it must be a nested ICU;
79+
// store the current static text (including the opening curly brace).
80+
pieces.addText(text.substring(lastPos, braces.lastIndex));
81+
state.nestedIcu();
82+
}
83+
} else {
84+
pieces.addText(text.substring(lastPos, braces.lastIndex));
85+
}
86+
lastPos = braces.lastIndex;
87+
}
88+
89+
// Capture the last piece of text after the ICUs (if any).
90+
pieces.addText(text.substring(lastPos));
91+
return pieces.toArray();
92+
}
93+
94+
/**
95+
* A helper class to store the pieces ("static text" or "placeholder name") in an ICU.
96+
*/
97+
class IcuPieces {
98+
private pieces: string[] = [''];
99+
100+
/**
101+
* Add the given `text` to the current "static text" piece.
102+
*
103+
* Sequential calls to `addText()` will append to the current text piece.
104+
*/
105+
addText(text: string): void {
106+
this.pieces[this.pieces.length - 1] += text;
107+
}
108+
109+
/**
110+
* Add the given placeholder `name` to the stored pieces.
111+
*/
112+
addPlaceholder(name: string): void {
113+
this.pieces.push(name);
114+
this.pieces.push('');
115+
}
116+
117+
/**
118+
* Return the stored pieces as an array of strings.
119+
*
120+
* Even values are static strings (e.g. 0, 2, 4, etc)
121+
* Odd values are placeholder names (e.g. 1, 3, 5, etc)
122+
*/
123+
toArray(): string[] {
124+
return this.pieces;
125+
}
126+
}
127+
128+
/**
129+
* A helper class to track the current state of parsing the strings for ICU placeholders.
130+
*
131+
* State changes happen when we enter or leave a curly brace block.
132+
* Since ICUs can be nested the state is stored as a stack.
133+
*/
134+
class StateStack {
135+
private stack: ParserState[] = [];
136+
137+
/**
138+
* Update the state upon entering a block.
139+
*
140+
* The new state is computed from the current state and added to the stack.
141+
*/
142+
enterBlock(): void {
143+
const current = this.getCurrent();
144+
switch (current) {
145+
case 'icu':
146+
this.stack.push('case');
147+
break;
148+
case 'case':
149+
this.stack.push('placeholder');
150+
break;
151+
case 'placeholder':
152+
this.stack.push('case');
153+
break;
154+
default:
155+
this.stack.push('icu');
156+
break;
157+
}
158+
}
159+
160+
/**
161+
* Update the state upon leaving a block.
162+
*
163+
* The previous state is popped off the stack.
164+
*/
165+
leaveBlock(): ParserState {
166+
return this.stack.pop();
167+
}
168+
169+
/**
170+
* Update the state upon arriving at a nested ICU.
171+
*
172+
* In this case, the current state of "placeholder" is incorrect, so this is popped off and the
173+
* correct "icu" state is stored.
174+
*/
175+
nestedIcu(): void {
176+
const current = this.stack.pop();
177+
assert(current === 'placeholder', 'A nested ICU must replace a placeholder but got ' + current);
178+
this.stack.push('icu');
179+
}
180+
181+
/**
182+
* Get the current (most recent) state from the stack.
183+
*/
184+
getCurrent() {
185+
return this.stack[this.stack.length - 1];
186+
}
187+
}
188+
type ParserState = 'icu'|'case'|'placeholder'|undefined;
189+
190+
/**
191+
* Attempt to parse a simple placeholder name from a curly braced block.
192+
*
193+
* If the block contains a comma `,` then it cannot be a placeholder - and is probably a nest ICU
194+
* instead.
195+
*
196+
* @param text the whole string that is being parsed.
197+
* @param start the index of the character in the `text` string where this placeholder may start.
198+
* @returns the placeholder name or `null` if it is not a placeholder.
199+
*/
200+
function tryParsePlaceholder(text: string, start: number): string|null {
201+
for (let i = start; i < text.length; i++) {
202+
if (text[i] === ',') {
203+
break;
204+
}
205+
if (text[i] === '}') {
206+
return text.substring(start, i);
207+
}
208+
}
209+
return null;
210+
}
211+
212+
function assert(test: boolean, message: string): void {
213+
if (!test) {
214+
throw new Error('Assertion failure: ' + message);
215+
}
216+
}

packages/localize/src/tools/src/extract/translation_files/xliff1_translation_serializer.ts

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
99
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';
1010

11+
import {extractIcuPlaceholders} from './icu_parsing';
1112
import {TranslationSerializer} from './translation_serializer';
1213
import {XmlFile} from './xml_file';
1314

@@ -63,11 +64,22 @@ export class Xliff1TranslationSerializer implements TranslationSerializer {
6364
}
6465

6566
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
66-
xml.text(message.messageParts[0]);
67-
for (let i = 1; i < message.messageParts.length; i++) {
68-
xml.startTag('x', {id: message.placeholderNames[i - 1]}, {selfClosing: true});
69-
xml.text(message.messageParts[i]);
67+
const length = message.messageParts.length - 1;
68+
for (let i = 0; i < length; i++) {
69+
this.serializeTextPart(xml, message.messageParts[i]);
70+
xml.startTag('x', {id: message.placeholderNames[i]}, {selfClosing: true});
7071
}
72+
this.serializeTextPart(xml, message.messageParts[length]);
73+
}
74+
75+
private serializeTextPart(xml: XmlFile, text: string): void {
76+
const pieces = extractIcuPlaceholders(text);
77+
const length = pieces.length - 1;
78+
for (let i = 0; i < length; i += 2) {
79+
xml.text(pieces[i]);
80+
xml.startTag('x', {id: pieces[i + 1]}, {selfClosing: true});
81+
}
82+
xml.text(pieces[length]);
7183
}
7284

7385
private serializeNote(xml: XmlFile, name: string, value: string): void {

packages/localize/src/tools/src/extract/translation_files/xliff2_translation_serializer.ts

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
99
import {ɵParsedMessage} from '@angular/localize';
1010

11+
import {extractIcuPlaceholders} from './icu_parsing';
1112
import {TranslationSerializer} from './translation_serializer';
1213
import {XmlFile} from './xml_file';
1314

@@ -22,6 +23,7 @@ const MAX_LEGACY_XLIFF_2_MESSAGE_LENGTH = 20;
2223
* @see Xliff2TranslationParser
2324
*/
2425
export class Xliff2TranslationSerializer implements TranslationSerializer {
26+
private currentPlaceholderId = 0;
2527
constructor(
2628
private sourceLocale: string, private basePath: AbsoluteFsPath,
2729
private useLegacyIds: boolean) {}
@@ -74,21 +76,38 @@ export class Xliff2TranslationSerializer implements TranslationSerializer {
7476
}
7577

7678
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
77-
xml.text(message.messageParts[0]);
78-
for (let i = 1; i < message.messageParts.length; i++) {
79-
const placeholderName = message.placeholderNames[i - 1];
80-
if (placeholderName.startsWith('START_')) {
81-
xml.startTag('pc', {
82-
id: `${i}`,
83-
equivStart: placeholderName,
84-
equivEnd: placeholderName.replace(/^START/, 'CLOSE')
85-
});
86-
} else if (placeholderName.startsWith('CLOSE_')) {
87-
xml.endTag('pc');
88-
} else {
89-
xml.startTag('ph', {id: `${i}`, equiv: placeholderName}, {selfClosing: true});
90-
}
91-
xml.text(message.messageParts[i]);
79+
this.currentPlaceholderId = 0;
80+
const length = message.messageParts.length - 1;
81+
for (let i = 0; i < length; i++) {
82+
this.serializeTextPart(xml, message.messageParts[i]);
83+
this.serializePlaceholder(xml, message.placeholderNames[i]);
84+
}
85+
this.serializeTextPart(xml, message.messageParts[length]);
86+
}
87+
88+
private serializeTextPart(xml: XmlFile, text: string): void {
89+
const pieces = extractIcuPlaceholders(text);
90+
const length = pieces.length - 1;
91+
for (let i = 0; i < length; i += 2) {
92+
xml.text(pieces[i]);
93+
this.serializePlaceholder(xml, pieces[i + 1]);
94+
}
95+
xml.text(pieces[length]);
96+
}
97+
98+
private serializePlaceholder(xml: XmlFile, placeholderName: string): void {
99+
if (placeholderName.startsWith('START_')) {
100+
xml.startTag('pc', {
101+
id: `${this.currentPlaceholderId++}`,
102+
equivStart: placeholderName,
103+
equivEnd: placeholderName.replace(/^START/, 'CLOSE')
104+
});
105+
} else if (placeholderName.startsWith('CLOSE_')) {
106+
xml.endTag('pc');
107+
} else {
108+
xml.startTag(
109+
'ph', {id: `${this.currentPlaceholderId++}`, equiv: placeholderName},
110+
{selfClosing: true});
92111
}
93112
}
94113

packages/localize/src/tools/src/extract/translation_files/xmb_translation_serializer.ts

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
99
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';
1010

11+
import {extractIcuPlaceholders} from './icu_parsing';
1112
import {TranslationSerializer} from './translation_serializer';
1213
import {XmlFile} from './xml_file';
1314

@@ -77,11 +78,22 @@ export class XmbTranslationSerializer implements TranslationSerializer {
7778
}
7879

7980
private serializeMessage(xml: XmlFile, message: ɵParsedMessage): void {
80-
xml.text(message.messageParts[0]);
81-
for (let i = 1; i < message.messageParts.length; i++) {
82-
xml.startTag('ph', {name: message.placeholderNames[i - 1]}, {selfClosing: true});
83-
xml.text(message.messageParts[i]);
81+
const length = message.messageParts.length - 1;
82+
for (let i = 0; i < length; i++) {
83+
this.serializeTextPart(xml, message.messageParts[i]);
84+
xml.startTag('ph', {name: message.placeholderNames[i]}, {selfClosing: true});
8485
}
86+
this.serializeTextPart(xml, message.messageParts[length]);
87+
}
88+
89+
private serializeTextPart(xml: XmlFile, text: string): void {
90+
const pieces = extractIcuPlaceholders(text);
91+
const length = pieces.length - 1;
92+
for (let i = 0; i < length; i += 2) {
93+
xml.text(pieces[i]);
94+
xml.startTag('ph', {name: pieces[i + 1]}, {selfClosing: true});
95+
}
96+
xml.text(pieces[length]);
8597
}
8698

8799
/**

packages/localize/src/tools/test/extract/integration/main_spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,12 @@ runInEachFileSystem(() => {
175175
` <file>`,
176176
` <unit id="3291030485717846467">`,
177177
` <segment>`,
178-
` <source>Hello, <ph id="1" equiv="PH"/>!</source>`,
178+
` <source>Hello, <ph id="0" equiv="PH"/>!</source>`,
179179
` </segment>`,
180180
` </unit>`,
181181
` <unit id="8669027859022295761">`,
182182
` <segment>`,
183-
` <source>try<ph id="1" equiv="PH"/>me</source>`,
183+
` <source>try<ph id="0" equiv="PH"/>me</source>`,
184184
` </segment>`,
185185
` </unit>`,
186186
` </file>`,

0 commit comments

Comments
 (0)