Skip to content

Commit 82eb630

Browse files
committed
Add a tool to generate fluent files
I am looking to add a benchmark for parsing large fluent files, in order to test potential performance optimisations. Originally I used an actual fluent file from my project, but that cannot be shared. This change adds a script which can be used to generate "random" fluent files, which share similar statistical properties to the actual file I used. For example, the ratio of comments to messages, or the number of elements in side a message.
1 parent 19aa241 commit 82eb630

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed

tools/benchmarks/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,12 @@ platform, you might need to use `sudo`.
2828

2929
And look at prof.svg in a browser. Note that this diagram includes the fixture
3030
setup, warmup and calibration phases which you should ignore.
31+
32+
This directory also contains generate_ftl_file.py, which can be used to generate
33+
files for benchmarking against. It can be run a python file:
34+
35+
$ python generate_ftl_file.py outfile.ftl
36+
37+
For full command line options, use:
38+
39+
$ python generate_ftl_file.py -h

tools/benchmarks/generate_ftl_file.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import argparse
2+
import random
3+
4+
import attrs
5+
from fluent.syntax import serialize
6+
from fluent.syntax.ast import Comment, Identifier, Message, Pattern, Placeable, Resource, TextElement, VariableReference
7+
8+
9+
@attrs.frozen
10+
class ItemRatios:
11+
"""
12+
Represent the ratios of different items inside the generated ftl file
13+
"""
14+
15+
message: int
16+
comment: int
17+
18+
19+
@attrs.frozen
20+
class ElementCountRatios:
21+
"""
22+
Represent the ratios of the different count of elements within a pattern
23+
"""
24+
25+
one: int
26+
two: int
27+
three: int
28+
four: int
29+
30+
31+
@attrs.frozen
32+
class Config:
33+
filename: str
34+
num_items: int
35+
36+
# Controls for the random generation of various elements
37+
item_ratios: ItemRatios = ItemRatios(message=40, comment=1)
38+
element_ratios: ElementCountRatios = ElementCountRatios(one=400, two=10, three=10, four=1)
39+
40+
41+
def parse_config() -> Config:
42+
parser = argparse.ArgumentParser(description="Generate a sample ftl file")
43+
parser.add_argument(
44+
"filename",
45+
help="Filename for the generated file",
46+
)
47+
parser.add_argument(
48+
"-n",
49+
"--num-items",
50+
type=int,
51+
help="The number of items in the generated file",
52+
default=100,
53+
)
54+
return Config(**parser.parse_args().__dict__)
55+
56+
57+
def generate_file(config: Config) -> None:
58+
with open("/usr/share/dict/words") as dictionary_file:
59+
all_words = [line.strip() for line in dictionary_file.readlines()]
60+
61+
with open(config.filename, "w") as outfile:
62+
outfile.write(serialize(_generate_resource(config, all_words)))
63+
64+
65+
def _generate_resource(config: Config, words: list[str]) -> Resource:
66+
body = []
67+
68+
generators = (_generate_message, _generate_comment)
69+
weights = (config.item_ratios.message, config.item_ratios.comment)
70+
71+
for _ in range(config.num_items):
72+
(generator,) = random.choices(generators, weights)
73+
body.append(generator(config, words))
74+
75+
return Resource(body=body)
76+
77+
78+
def _generate_message(config: Config, words: list[str]) -> Message:
79+
id = _generate_identifier(words, joiner="-", elements=4)
80+
return Message(
81+
id=id,
82+
value=_generate_pattern(config, words),
83+
)
84+
85+
86+
def _generate_comment(config: Config, words: list[str]) -> Comment:
87+
"""
88+
Generate a random comment, of the form:
89+
90+
# some words
91+
"""
92+
return Comment(content=" ".join(random.choices(words, k=random.randint(1, 10))))
93+
94+
95+
def _generate_identifier(words: list[str], joiner: str, elements: int) -> Identifier:
96+
"""
97+
Generate a random identifier, of the form:
98+
99+
correct-horse-battery-staple
100+
"""
101+
return Identifier(name=joiner.join(random.choices(words, k=elements)))
102+
103+
104+
def _generate_pattern(config: Config, words: list[str]) -> Pattern:
105+
"""
106+
Generate a pattern, which is a sequence of elements of the form:
107+
108+
some text { identifier } some more text { other_identifier }
109+
"""
110+
(num_elements,) = random.choices(
111+
(1, 2, 3, 4),
112+
weights=(
113+
config.element_ratios.one,
114+
config.element_ratios.two,
115+
config.element_ratios.three,
116+
config.element_ratios.four,
117+
),
118+
)
119+
elements = []
120+
for i in range(num_elements):
121+
if i % 2:
122+
elements.append(_generate_placeable(words))
123+
else:
124+
elements.append(_generate_text_element(words))
125+
126+
return Pattern(elements=elements)
127+
128+
129+
def _generate_text_element(words: list[str]) -> TextElement:
130+
"""
131+
Generate a random text element, of the form:
132+
133+
some words
134+
"""
135+
return TextElement(value=" ".join(random.choices(words, k=random.randint(1, 10))))
136+
137+
138+
def _generate_placeable(words: list[str]) -> Placeable:
139+
return Placeable(expression=_generate_variable_reference(words))
140+
141+
142+
def _generate_variable_reference(words: list[str]) -> VariableReference:
143+
"""
144+
Generate a variable reference, of the form:
145+
146+
{ some_variable }
147+
"""
148+
return VariableReference(id=_generate_identifier(words, joiner="_", elements=2))
149+
150+
151+
def main():
152+
config = parse_config()
153+
generate_file(config)
154+
155+
156+
if __name__ == "__main__":
157+
main()

0 commit comments

Comments
 (0)