Skip to content

Commit 5c56a7f

Browse files
dbortfacebook-github-bot
authored andcommitted
extract_sources.py generates cmake sources lists from buck2
Summary: The new `extract_sources.py` tool can query and filter the source files associated with sets of buck2 targets, and write them out to a file that can by included by CMake. In the future, the tool could also support other formats like Make or Ninja. This will make it much easier to keep a parallel CMake-based system in sync with the source of truth in buck2. Reviewed By: larryliu0820 Differential Revision: D47979069 fbshipit-source-id: f2ad6b52a34211878c2dd2d5c2b06b62dde73353
1 parent b21db6c commit 5c56a7f

File tree

2 files changed

+299
-0
lines changed

2 files changed

+299
-0
lines changed

build/cmake_deps.toml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# Inherited by all other targets. When a key already exists, the elements of the
8+
# target's value are appended to lists here.
9+
[target_base]
10+
excludes = [
11+
"^third-party",
12+
]
13+
14+
[targets.program_schema]
15+
buck_targets = [
16+
"//schema:program",
17+
]
18+
filters = [
19+
".fbs$",
20+
]
21+
22+
[targets.executorch]
23+
buck_targets = [
24+
"//runtime/executor:executor",
25+
]
26+
deps = [
27+
"program_schema",
28+
]
29+
filters = [
30+
".cpp$",
31+
]
32+
33+
[targets.portable_kernels]
34+
buck_targets = [
35+
# //kernels/portable:operators would be more appropriate, but buck2 doesn't
36+
# think it has any "inputs" since its srcs list is empty.
37+
"//kernels/portable:generated_lib_all_ops",
38+
]
39+
filters = [
40+
".cpp$",
41+
]
42+
excludes = [
43+
# Exclude the codegen templates, which are picked up because the buck target
44+
# is the generated_lib and not the unwrapped set of kernels.
45+
"^codegen/templates",
46+
]
47+
deps = [
48+
"executorch",
49+
]
50+
51+
[targets.executor_runner]
52+
buck_targets = [
53+
"//examples/executor_runner:executor_runner",
54+
]
55+
filters = [
56+
".cpp$",
57+
]
58+
excludes = [
59+
"^codegen",
60+
]
61+
deps = [
62+
"executorch",
63+
"portable_kernels",
64+
]

build/extract_sources.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
import argparse
9+
import copy
10+
import os
11+
import re
12+
import subprocess
13+
14+
from enum import Enum
15+
from typing import Any, Optional, Sequence
16+
17+
try:
18+
import tomllib # Standard in 3.11 and later
19+
except ModuleNotFoundError:
20+
import tomli as tomllib
21+
22+
"""Extracts source lists from the buck2 build system and writes them to a file.
23+
24+
The config file is in TOML format and should contains one or more
25+
`[targets.<target-name>]` entries, along with an optional `[target_base]` entry.
26+
27+
All of these may have the following lists of strings:
28+
- buck_targets: The list of buck targets that map to `<target-name>`.
29+
- deps: A list of other `<target-name>` entries that this target depends on.
30+
Used to prune sources that are provided by those other targets.
31+
- filters: A list of regular expressions. This tool will only emit source files
32+
whose relative paths match all entries.
33+
- excludes: A list of regular expressions. This tool will not emit source files
34+
whose relative paths match any entry.
35+
36+
The special `[target_base]` entry provides default lists that are inherited by
37+
the `[target.<target-name>]` entries. When the `[target.<target-name>]` entry defines
38+
a key that is already present in `[target_base]`, the target-specific entries are
39+
appended to the base list.
40+
41+
Example config:
42+
43+
[target_base]
44+
excludes = [
45+
"^third-party",
46+
]
47+
48+
[targets.schema]
49+
buck_targets = [
50+
"//schema:schema",
51+
]
52+
filters = [
53+
".fbs$",
54+
]
55+
56+
[targets.executorch]
57+
buck_targets = [
58+
"//runtime/executor:executor",
59+
]
60+
deps = [
61+
"schema",
62+
]
63+
filters = [
64+
".cpp$",
65+
]
66+
"""
67+
68+
# Run buck2 from the same directory (and thus repo) as this script.
69+
BUCK_CWD: str = os.path.dirname(os.path.realpath(__file__))
70+
71+
72+
class Buck2Runner:
73+
def __init__(self, tool_path: str) -> None:
74+
self._path = tool_path
75+
76+
def run(self, args: Sequence[str]) -> list[str]:
77+
"""Runs buck2 with the given args and returns its stdout as a sequence of lines."""
78+
try:
79+
cp: subprocess.CompletedProcess = subprocess.run(
80+
[self._path] + args, capture_output=True, cwd=BUCK_CWD, check=True
81+
)
82+
return [line.strip().decode("utf-8") for line in cp.stdout.splitlines()]
83+
except subprocess.CalledProcessError as ex:
84+
raise RuntimeError(ex.stderr.decode("utf-8")) from ex
85+
86+
87+
class Target:
88+
"""Parsed [targets.*] entry from the TOML file.
89+
90+
Can query buck for its list of source files.
91+
"""
92+
93+
class _InitState(Enum):
94+
UNINITIALIZED = 0
95+
INITIALIZING = 1
96+
READY = 2
97+
98+
def __init__(
99+
self,
100+
name: str,
101+
target_dict: dict[str, Sequence[str]],
102+
base_dict: Optional[dict] = None,
103+
) -> None:
104+
self._state: Target._InitState = Target._InitState.UNINITIALIZED
105+
self._sources = frozenset()
106+
107+
self.name = name
108+
# Extend the base lists with the target-specific entries.
109+
self._config = copy.deepcopy(base_dict or {})
110+
for k, v in target_dict.items():
111+
if k in self._config:
112+
self._config[k].extend(v)
113+
else:
114+
self._config[k] = v
115+
116+
def get_sources(self, graph: "Graph", runner: Buck2Runner) -> frozenset[str]:
117+
if self._state == Target._InitState.READY:
118+
return self._sources
119+
# Detect cycles.
120+
assert self._state != Target._InitState.INITIALIZING
121+
122+
# Assemble the query.
123+
query = "inputs({})".format(
124+
"+".join(
125+
[
126+
"deps('{}')".format(target)
127+
for target in self._config.get("buck_targets", [])
128+
]
129+
)
130+
)
131+
132+
# Get the complete list of source files that this target depends on.
133+
sources: set[str] = set(runner.run(["cquery", query]))
134+
135+
# Keep entries that match all of the filters.
136+
filters = [re.compile(p) for p in self._config.get("filters", [])]
137+
sources = {s for s in sources if all(p.search(s) for p in filters)}
138+
139+
# Remove entries that match any of the excludes.
140+
excludes = [re.compile(p) for p in self._config.get("excludes", [])]
141+
sources = {s for s in sources if not any(p.search(s) for p in excludes)}
142+
143+
# The buck query will give us the complete list of sources that this
144+
# target depends on, but that list includes sources that are owned by
145+
# its deps. Remove entries that are already covered by the transitive
146+
# set of dependencies.
147+
for dep in self._config.get("deps", []):
148+
sources.difference_update(graph.by_name[dep].get_sources(graph, runner))
149+
150+
self._sources = frozenset(sources)
151+
self._state = Target._InitState.READY
152+
return self._sources
153+
154+
155+
class Graph:
156+
"""Graph of targets."""
157+
158+
def __init__(self, config_dict: dict[str, Any]) -> None:
159+
base = config_dict.get("target_base", {})
160+
targets = config_dict.get("targets", {})
161+
162+
self.by_name = {}
163+
for k, v in targets.items():
164+
self.by_name[k] = Target(k, v, base)
165+
166+
167+
def parse_args() -> argparse.Namespace:
168+
parser = argparse.ArgumentParser(
169+
description="Extracts deps from the buck2 build system",
170+
)
171+
parser.add_argument(
172+
"--buck2",
173+
default="buck2",
174+
help="'buck2' command to use",
175+
)
176+
parser.add_argument(
177+
"--config",
178+
metavar="config.toml",
179+
required=True,
180+
help="Path to the input TOML configuration file",
181+
)
182+
parser.add_argument(
183+
"--format",
184+
default="cmake",
185+
choices=["cmake"],
186+
help="Format to generate.",
187+
)
188+
parser.add_argument(
189+
"--out",
190+
metavar="file",
191+
help="Path to the file to generate.",
192+
)
193+
return parser.parse_args()
194+
195+
196+
def generate_cmake(target_to_srcs: dict[str, list[str]]) -> bytes:
197+
lines: list[str] = []
198+
lines.append("# @" + f"generated by {os.path.basename(__file__)}")
199+
for target, srcs in target_to_srcs.items():
200+
lines.append("")
201+
lines.append(f"set(_{target}__srcs")
202+
for src in srcs:
203+
lines.append(f" {src}")
204+
lines.append(")")
205+
return "\n".join(lines).encode("utf-8")
206+
207+
208+
def main():
209+
args = parse_args()
210+
211+
# Load and parse the TOML configuration
212+
with open(args.config, mode="rb") as fp:
213+
config_dict = tomllib.load(fp)
214+
graph = Graph(config_dict)
215+
216+
# Run the queries and get the lists of source files.
217+
target_to_srcs: dict[str, list[str]] = {}
218+
runner: Buck2Runner = Buck2Runner(args.buck2)
219+
for name, target in graph.by_name.items():
220+
target_to_srcs[name] = sorted(target.get_sources(graph, runner))
221+
222+
# Generate the requested format.
223+
output: bytes
224+
if args.format == "cmake":
225+
output = generate_cmake(target_to_srcs)
226+
else:
227+
raise ValueError("Unknown format: {}".format(args.format))
228+
229+
# Write the output.
230+
with open(args.out, "wb") as fp:
231+
fp.write(output)
232+
233+
234+
if __name__ == "__main__":
235+
main()

0 commit comments

Comments
 (0)