Skip to content

Commit 32628df

Browse files
authored
feat!(@formatjs/cli-lib): repurpose en-XB for bidi pseudo locale (#3978)
BREAKING CHANGE: `en-XA` is a pseudo locale for accented and lengthned English with markers, and `en-XB` is now a bidi pseudo locale.
1 parent 90cbd0a commit 32628df

File tree

4 files changed

+702
-78
lines changed

4 files changed

+702
-78
lines changed

packages/cli-lib/src/pseudo_locale.ts

Lines changed: 107 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,26 @@ import {
66
isPluralElement,
77
isSelectElement,
88
isTagElement,
9+
LiteralElement,
910
} from '@formatjs/icu-messageformat-parser'
1011

12+
function forEachLiteralElement(
13+
ast: MessageFormatElement[],
14+
fn: (el: LiteralElement) => void
15+
): void {
16+
ast.forEach(el => {
17+
if (isLiteralElement(el)) {
18+
fn(el)
19+
} else if (isPluralElement(el) || isSelectElement(el)) {
20+
for (const opt of Object.values(el.options)) {
21+
forEachLiteralElement(opt.value, fn)
22+
}
23+
} else if (isTagElement(el)) {
24+
forEachLiteralElement(el.children, fn)
25+
}
26+
})
27+
}
28+
1129
export function generateXXLS(
1230
msg: string | MessageFormatElement[]
1331
): MessageFormatElement[] {
@@ -24,16 +42,8 @@ export function generateXXAC(
2442
msg: string | MessageFormatElement[]
2543
): MessageFormatElement[] {
2644
const ast = typeof msg === 'string' ? parse(msg) : msg
27-
ast.forEach(el => {
28-
if (isLiteralElement(el)) {
29-
el.value = el.value.toUpperCase()
30-
} else if (isPluralElement(el) || isSelectElement(el)) {
31-
for (const opt of Object.values(el.options)) {
32-
generateXXAC(opt.value)
33-
}
34-
} else if (isTagElement(el)) {
35-
generateXXAC(el.children)
36-
}
45+
forEachLiteralElement(ast, el => {
46+
el.value = el.value.toUpperCase()
3747
})
3848
return ast
3949
}
@@ -50,64 +60,104 @@ export function generateXXHA(
5060
return [{type: TYPE.literal, value: '[javascript]'}, ...ast]
5161
}
5262

53-
const ASCII = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
54-
const ACCENTED_ASCII = 'âḃćḋèḟĝḫíĵǩĺṁńŏṗɋŕśṭůṿẘẋẏẓḀḂḈḊḔḞḠḢḬĴḴĻḾŊÕṔɊŔṠṮŨṼẄẌŸƵ'
63+
type _TupleOf<T, N extends number, R extends unknown[]> = R['length'] extends N
64+
? R
65+
: _TupleOf<T, N, [T, ...R]>
66+
67+
type Tuple<T, N extends number> = N extends N
68+
? number extends N
69+
? T[]
70+
: _TupleOf<T, N, []>
71+
: never
72+
73+
type PseudoLocaleTransformMap = {
74+
caps: Tuple<number, 26>
75+
small: Tuple<number, 26>
76+
}
77+
78+
const ACCENTED_MAP: PseudoLocaleTransformMap = {
79+
// ȦƁƇḒḖƑƓĦĪĴĶĿḾȠǾƤɊŘŞŦŬṼẆẊẎẐ
80+
// prettier-ignore
81+
"caps": [550, 385, 391, 7698, 7702, 401, 403, 294, 298, 308, 310, 319, 7742, 544, 510, 420, 586, 344, 350, 358, 364, 7804, 7814, 7818, 7822, 7824],
82+
// ȧƀƈḓḗƒɠħīĵķŀḿƞǿƥɋřşŧŭṽẇẋẏẑ
83+
// prettier-ignore
84+
"small": [551, 384, 392, 7699, 7703, 402, 608, 295, 299, 309, 311, 320, 7743, 414, 511, 421, 587, 345, 351, 359, 365, 7805, 7815, 7819, 7823, 7825],
85+
}
86+
87+
const FLIPPED_MAP: PseudoLocaleTransformMap = {
88+
// ∀ԐↃᗡƎℲ⅁HIſӼ⅂WNOԀÒᴚS⊥∩ɅMX⅄Z
89+
// prettier-ignore
90+
"caps": [8704, 1296, 8579, 5601, 398, 8498, 8513, 72, 73, 383, 1276, 8514, 87, 78, 79, 1280, 210, 7450, 83, 8869, 8745, 581, 77, 88, 8516, 90],
91+
// ɐqɔpǝɟƃɥıɾʞʅɯuodbɹsʇnʌʍxʎz
92+
// prettier-ignore
93+
"small": [592, 113, 596, 112, 477, 607, 387, 613, 305, 638, 670, 645, 623, 117, 111, 100, 98, 633, 115, 647, 110, 652, 653, 120, 654, 122],
94+
}
5595

96+
/**
97+
* Based on: https://hg.mozilla.org/mozilla-central/file/a1f74e8c8fb72390d22054d6b00c28b1a32f6c43/intl/l10n/L10nRegistry.jsm#l425
98+
*/
99+
function transformString(
100+
map: PseudoLocaleTransformMap,
101+
elongate = false,
102+
msg: string
103+
) {
104+
return msg.replace(/[a-z]/gi, ch => {
105+
const cc = ch.charCodeAt(0)
106+
if (cc >= 97 && cc <= 122) {
107+
const newChar = String.fromCodePoint(map.small[cc - 97])
108+
// duplicate "a", "e", "o" and "u" to emulate ~30% longer text
109+
if (elongate && (cc === 97 || cc === 101 || cc === 111 || cc === 117)) {
110+
return newChar + newChar
111+
}
112+
return newChar
113+
}
114+
if (cc >= 65 && cc <= 90) {
115+
return String.fromCodePoint(map.caps[cc - 65])
116+
}
117+
return ch
118+
})
119+
}
120+
121+
/**
122+
* accented - Ȧȧƈƈḗḗƞŧḗḗḓ Ḗḗƞɠŀīīşħ
123+
* --------------------------------
124+
*
125+
* This locale replaces all Latin characters with their accented equivalents, and duplicates some
126+
* vowels to create roughly 30% longer strings. Strings are wrapped in markers (square brackets),
127+
* which help with detecting truncation.
128+
*/
56129
export function generateENXA(
57130
msg: string | MessageFormatElement[]
58131
): MessageFormatElement[] {
59132
const ast = typeof msg === 'string' ? parse(msg) : msg
60-
ast.forEach(el => {
61-
if (isLiteralElement(el)) {
62-
el.value = el.value
63-
.split('')
64-
.map(c => {
65-
const i = ASCII.indexOf(c)
66-
if (i < 0) {
67-
return c
68-
}
69-
return ACCENTED_ASCII[i]
70-
})
71-
.join('')
72-
} else if (isPluralElement(el) || isSelectElement(el)) {
73-
for (const opt of Object.values(el.options)) {
74-
generateENXA(opt.value)
75-
}
76-
} else if (isTagElement(el)) {
77-
generateENXA(el.children)
78-
}
133+
forEachLiteralElement(ast, el => {
134+
el.value = transformString(ACCENTED_MAP, true, el.value)
79135
})
80-
return ast
136+
return [
137+
{type: TYPE.literal, value: '['},
138+
...ast,
139+
{type: TYPE.literal, value: ']'},
140+
]
81141
}
82142

143+
/**
144+
* bidi - ɥsıʅƃuƎ ıpıԐ
145+
* -------------------
146+
*
147+
* This strategy replaces all Latin characters with their 180 degree rotated versions and enforces
148+
* right to left text flow using Unicode UAX#9 Explicit Directional Embeddings. In this mode, the UI
149+
* directionality will also be set to right-to-left.
150+
*/
83151
export function generateENXB(
84152
msg: string | MessageFormatElement[]
85153
): MessageFormatElement[] {
86154
const ast = typeof msg === 'string' ? parse(msg) : msg
87-
ast.forEach(el => {
88-
if (isLiteralElement(el)) {
89-
const pseudoString = el.value
90-
.split('')
91-
.map((c, index) => {
92-
const i = ASCII.indexOf(c)
93-
const canPad = (index + 1) % 3 === 0
94-
95-
if (i < 0) {
96-
return c
97-
}
98-
99-
return canPad ? ACCENTED_ASCII[i].repeat(3) : ACCENTED_ASCII[i]
100-
})
101-
.join('')
102-
103-
el.value = `[!! ${pseudoString} !!]`
104-
} else if (isPluralElement(el) || isSelectElement(el)) {
105-
for (const opt of Object.values(el.options)) {
106-
generateENXB(opt.value)
107-
}
108-
} else if (isTagElement(el)) {
109-
generateENXB(el.children)
110-
}
155+
forEachLiteralElement(ast, el => {
156+
el.value = transformString(FLIPPED_MAP, false, el.value)
111157
})
112-
return ast
158+
return [
159+
{type: TYPE.literal, value: '\u202e'},
160+
...ast,
161+
{type: TYPE.literal, value: '\u202c'},
162+
]
113163
}

0 commit comments

Comments
 (0)