Skip to content

Commit 28ba977

Browse files
kiukchungfacebook-github-bot
authored andcommitted
Remove rust-cli fallback in favor of a pure-Python CLI
Summary: All the working subcommands were falling back to Python anyways. Moved the (currently unimplemented) subcommand stubs: `bounce` and `stop` to Python. **Note:** couple of reasons why a Rust CLI for monarch isn't ideal: 1. Uses TorchX under the hood. TorchX is a Python library. 2. Due to #1 we have to run a Python CLI fallback anyways and the mechanics of this is meta specific (won't work for OSS). 3. Reverse pyo3 binding TorchX (call Python from Rust) doesn't work internally due to the way we package Python (hermetic PAR). 4. Any material benefits (e.g. performance?) of implementing the CLI in Rust would be negated by the effort to fix/deal-with #1-3. **Next:** ~~[6/n] Have kd_monarch use the default component (the custom mast.py is no longer needed). Update the README with updated instructions.~~ ~~[7/n] Remove rust CLI in favor of all-python (we delegate to torchx for most things anyways)~~ [8/n] Add E2E unittest using the local_cwd scheduler (actually run a mini-trainer actor) [9/n] Write an oss hyperactor mesh-worker entrypoint binary [10/n] Author a Dockerfile that sets up the environment (much like fbpkgs do it for internal runs) [11/n] Author a TorchXAllocator Reviewed By: vidhyav, suo Differential Revision: D75176535 fbshipit-source-id: 29020f4032bd642af26b393ade74f40b868df973
1 parent 916e62c commit 28ba977

File tree

11 files changed

+50
-409
lines changed

11 files changed

+50
-409
lines changed

tools/Cargo.toml

Lines changed: 0 additions & 19 deletions
This file was deleted.

tools/cli.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111

1212
from fastcli.argparse import inject_fastcli
1313
from monarch.tools.commands import (
14+
bounce,
1415
component_args_from_cli,
1516
create,
1617
info,
1718
kill,
19+
stop,
1820
torchx_runner,
1921
)
2022
from monarch.tools.config import ( # @manual=//monarch/tools/config/meta:defaults
@@ -89,14 +91,20 @@ def run(self, args: argparse.Namespace) -> None:
8991
print(handle)
9092

9193

92-
class InfoCmd:
93-
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
94+
class CommonArguments:
95+
@staticmethod
96+
def add_server_handle(subparser: argparse.ArgumentParser) -> None:
9497
subparser.add_argument(
9598
"server_handle",
9699
type=str,
97-
help="monarch server handle (e.g. mast:///job_id)",
100+
help="monarch server handle (e.g. slurm:///job_id)",
98101
)
99102

103+
104+
class InfoCmd:
105+
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
106+
CommonArguments.add_server_handle(subparser)
107+
100108
def run(self, args: argparse.Namespace) -> None:
101109
server_spec = info(args.server_handle)
102110
if server_spec is None:
@@ -110,16 +118,28 @@ def run(self, args: argparse.Namespace) -> None:
110118

111119
class KillCmd:
112120
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
113-
subparser.add_argument(
114-
"server_handle",
115-
type=str,
116-
help="monarch server handle (e.g. mast:///job_id)",
117-
)
121+
CommonArguments.add_server_handle(subparser)
118122

119123
def run(self, args: argparse.Namespace) -> None:
120124
kill(args.server_handle)
121125

122126

127+
class BounceCmd:
128+
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
129+
CommonArguments.add_server_handle(subparser)
130+
131+
def run(self, args: argparse.Namespace) -> None:
132+
bounce(args.server_handle)
133+
134+
135+
class StopCmd:
136+
def add_arguments(self, subparser: argparse.ArgumentParser) -> None:
137+
CommonArguments.add_server_handle(subparser)
138+
139+
def run(self, args: argparse.Namespace) -> None:
140+
stop(args.server_handle)
141+
142+
123143
def get_parser() -> argparse.ArgumentParser:
124144
parser = argparse.ArgumentParser(description="Monarch CLI")
125145
subparser = parser.add_subparsers(title="COMMANDS")
@@ -128,6 +148,9 @@ def get_parser() -> argparse.ArgumentParser:
128148
"create": CreateCmd(),
129149
"info": InfoCmd(),
130150
"kill": KillCmd(),
151+
# --- placeholder subcommands (not yet implemented) ---
152+
"bounce": BounceCmd(),
153+
"stop": StopCmd(),
131154
}.items():
132155
cmd_parser = subparser.add_parser(cmd_name)
133156
cmd.add_arguments(cmd_parser)

tools/commands.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,3 +177,13 @@ def info(server_handle: str) -> Optional[ServerSpec]:
177177
def kill(server_handle: str) -> None:
178178
with torchx_runner() as runner:
179179
runner.cancel(server_handle)
180+
181+
182+
def bounce(server_handle: str) -> None:
183+
"""(re)starts the server's processes without tearing down the server's job."""
184+
raise NotImplementedError("`bounce` is not yet implemented")
185+
186+
187+
def stop(server_handle: str) -> None:
188+
"""Stops the server's unix processes without tearing down the server's job."""
189+
raise NotImplementedError("`stop` is not yet implemented")

tools/src/args.rs

Lines changed: 0 additions & 140 deletions
This file was deleted.

tools/src/commands.rs

Lines changed: 0 additions & 39 deletions
This file was deleted.

tools/src/commands/bounce.rs

Lines changed: 0 additions & 50 deletions
This file was deleted.

0 commit comments

Comments
 (0)