Skip to content

Commit 203d23e

Browse files
got e2b working on simpy simple examples
1 parent 70ccde4 commit 203d23e

File tree

6 files changed

+103
-16
lines changed

6 files changed

+103
-16
lines changed

commit0/harness/build.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datasets import load_dataset
55
from typing import Iterator, Union
66

7-
from commit0.harness.constants import RepoInstance, SimpleInstance, SPLIT
7+
from commit0.harness.constants import RepoInstance, SimpleInstance, SPLIT, ABSOLUTE_REPO_DIR
88
from commit0.harness.docker_build import build_repo_images
99
from commit0.harness.spec import make_spec
1010

@@ -45,7 +45,7 @@ def main(
4545
repo_name = example["repo"].split("/")[-1]
4646
if split != "all" and repo_name not in SPLIT[split]:
4747
continue
48-
spec = make_spec(example, dataset_type)
48+
spec = make_spec(example, dataset_type, DOCKER_REPO_DIR)
4949
specs.append(spec)
5050

5151
client = docker.from_env()

commit0/harness/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ def items(self) -> ItemsView[str, object]:
6565

6666
# Evaluation backends
6767
EVAL_BACKENDS = ["local", "modal", "e2b"]
68+
# Use absolute for docker and modal. Backends with sudo access
69+
ABSOLUTE_REPO_DIR = "/testbed"
70+
# Use relative for e2b, with no sudo access
71+
RELATIVE_REPO_DIR = "testbed"
6872

6973
# available commands
7074
COMMANDS = [

commit0/harness/execution_context.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
import modal
1111
import modal.io_streams
1212
from enum import auto
13+
from e2b_code_interpreter import Sandbox
1314
from strenum import StrEnum
1415
from pathlib import Path
16+
import tempfile
1517
import time
1618
from typing import Optional, Type
1719
from types import TracebackType
@@ -220,3 +222,62 @@ def __exit__(
220222
exctb: Optional[TracebackType],
221223
) -> None:
222224
close_logger(self.logger)
225+
226+
227+
class E2B(ExecutionContext):
228+
def __init__(
229+
self,
230+
spec: Spec,
231+
logger: logging.Logger,
232+
timeout: int,
233+
num_cpus: int,
234+
log_dir: Path,
235+
files_to_copy: Optional[Files] = None,
236+
files_to_collect: Optional[list[str]] = None,
237+
rebuild_image: bool = False,
238+
):
239+
super().__init__(
240+
spec,
241+
logger,
242+
timeout,
243+
num_cpus,
244+
log_dir,
245+
files_to_copy=files_to_copy,
246+
files_to_collect=files_to_collect,
247+
)
248+
249+
self.sb = Sandbox(timeout=timeout)
250+
self.sb.commands.run("curl -LsSf https://astral.sh/uv/install.sh | sh")
251+
252+
# setup sandbox env
253+
self.sb.files.write("setup.sh", spec.setup_script)
254+
self.sb.commands.run("bash setup.sh")
255+
256+
# prepare for eval
257+
if files_to_copy:
258+
for _, f in files_to_copy.items():
259+
with open(f["src"], "r") as fp:
260+
content = fp.read()
261+
self.sb.files.write(f["dest"].name, content) # type: ignore
262+
263+
def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
264+
"""Execute command on E2B sandbox"""
265+
# TODO: setup timeout
266+
start_time = time.time()
267+
result = self.sb.commands.run(command)
268+
return_code = result.exit_code
269+
for fname in self.files_to_collect:
270+
with (self.log_dir / fname).open("w") as f:
271+
f.write(self.sb.files.read(f"testbed/{fname}"))
272+
timed_out = False # TODO: figure this out
273+
end_time = time.time()
274+
return result.stderr, timed_out, end_time - start_time
275+
276+
def __exit__(
277+
self,
278+
exctype: Optional[Type[BaseException]],
279+
excinst: Optional[BaseException],
280+
exctb: Optional[TracebackType],
281+
) -> None:
282+
self.sb.kill()
283+
close_logger(self.logger)

commit0/harness/run_pytest_ids.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77

88
from typing import Iterator, Union
99
from commit0.harness.constants import (
10+
ABSOLUTE_REPO_DIR,
1011
EVAL_BACKENDS,
1112
Files,
13+
RELATIVE_REPO_DIR,
1214
RUN_PYTEST_LOG_DIR,
1315
RepoInstance,
1416
SimpleInstance,
@@ -53,6 +55,7 @@ def main(
5355
dataset_name, split=dataset_split
5456
) # type: ignore
5557
dataset_name = dataset_name.lower()
58+
absolute = backend != "e2b"
5659
spec = None
5760
example = None
5861
repo_name = None
@@ -77,7 +80,7 @@ def main(
7780
if repo_name in os.path.basename(repo_or_repo_dir) or repo_or_repo_dir.endswith(
7881
repo_name
7982
):
80-
spec = make_spec(example, dataset_type)
83+
spec = make_spec(example, dataset_type, absolute)
8184
break
8285
assert spec is not None, "No spec available"
8386
assert example is not None, "No example available"
@@ -188,22 +191,22 @@ def main(
188191

189192
backend = backend.upper()
190193
if ExecutionBackend(backend) == ExecutionBackend.MODAL:
191-
logger.info("Runnning on Modal")
194+
logger.info("Running on Modal")
192195
execution_context = Modal
193196
elif ExecutionBackend(backend) == ExecutionBackend.LOCAL:
194-
logger.info("Runnning locally")
197+
logger.info("Running locally")
195198
execution_context = Docker
196199
elif ExecutionBackend(backend) == ExecutionBackend.E2B:
197-
logger.info("Runnning E2B")
200+
logger.info("Running E2B")
198201
execution_context = E2B
199202
else:
200203
raise ValueError(
201204
f"Evaluation must be from {', '.join(EVAL_BACKENDS)}, but {backend} is provided."
202205
)
203206

204207
files_to_copy = Files(
205-
eval_script={"src": eval_file, "dest": Path("/eval.sh")},
206-
patch={"src": patch_file, "dest": Path("/patch.diff")},
208+
eval_script={"src": eval_file, "dest": Path("/eval.sh" if absolute else "eval.sh")},
209+
patch={"src": patch_file, "dest": Path("/patch.diff" if absolute else "patch.diff")},
207210
)
208211
files_to_collect = [
209212
"report.json",
@@ -213,6 +216,8 @@ def main(
213216
if coverage:
214217
files_to_collect.append("coverage.json")
215218

219+
220+
eval_command = "/bin/bash /eval.sh" if ExecutionBackend(backend) != ExecutionBackend.E2B else "/bin/bash eval.sh"
216221
try:
217222
with execution_context(
218223
spec,
@@ -225,7 +230,7 @@ def main(
225230
rebuild_image,
226231
) as context:
227232
output, timed_out, total_runtime = context.exec_run_with_timeout(
228-
"/bin/bash /eval.sh"
233+
eval_command
229234
)
230235
logger.info(output)
231236
if timed_out:

commit0/harness/spec.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from typing import Union, cast, Optional
55

66
from commit0.harness.constants import (
7+
ABSOLUTE_REPO_DIR,
8+
RELATIVE_REPO_DIR,
79
RepoInstance,
810
SimpleInstance,
911
)
@@ -17,6 +19,7 @@
1719
class Spec(ABC):
1820
"""A dataclass that represents a test specification for a single instance of SWE-bench."""
1921

22+
absolute: bool
2023
repo: str
2124
# repo dir on docker
2225
repo_directory: str
@@ -164,11 +167,12 @@ def make_repo_script_list(self) -> list[str]:
164167

165168
def make_eval_script_list(self) -> list[str]:
166169
"""Run the tests."""
170+
diff_path = "/patch.diff" if self.absolute else "../patch.diff"
167171
eval_script_list = [
168172
f"cd {self.repo_directory}",
169173
"source .venv/bin/activate",
170174
f"git reset --hard {self.instance['base_commit']}",
171-
"git apply --allow-empty -v /patch.diff",
175+
f"git apply --allow-empty -v {diff_path}",
172176
"git status",
173177
f"{self.instance['test']['test_cmd']} --json-report --json-report-file=report.json --continue-on-collection-errors{{coverage}} {{test_ids}} > test_output.txt 2>&1",
174178
"echo $? > pytest_exit_code.txt",
@@ -306,39 +310,43 @@ def make_eval_script_list(self) -> list[str]:
306310
def get_specs_from_dataset(
307311
dataset: Union[list[Union[RepoInstance, SimpleInstance]], list[Spec]],
308312
dataset_type: str,
313+
absolute: bool,
309314
) -> list[Spec]:
310315
"""Idempotent function that converts a list of RepoInstance objects to a list of Spec objects."""
311316
if isinstance(dataset[0], Spec):
312317
return cast(list[Spec], dataset)
313318
return list(
314319
map(
315-
lambda instance: make_spec(instance, dataset_type),
320+
lambda instance: make_spec(instance, dataset_type, absolute),
316321
cast(list["RepoInstance"], dataset),
317322
)
318323
)
319324

320325

321-
def make_spec(instance: Union[RepoInstance, SimpleInstance], dataset_type: str) -> Spec:
326+
def make_spec(instance: Union[RepoInstance, SimpleInstance], dataset_type: str, absolute: bool) -> Spec:
327+
repo_directory = ABSOLUTE_REPO_DIR if absolute else RELATIVE_REPO_DIR
322328
if isinstance(instance, Spec):
323329
return instance
324-
repo_directory = "/testbed"
325330
if dataset_type == "commit0":
326331
return Commit0Spec(
327332
repo=instance["instance_id"],
328333
repo_directory=repo_directory,
329334
instance=instance,
335+
absolute=absolute,
330336
)
331337
elif dataset_type == "swebench":
332338
return SWEBenchSpec(
333339
repo=instance["instance_id"],
334340
repo_directory=repo_directory,
335341
instance=instance,
342+
absolute=absolute,
336343
)
337344
elif dataset_type == "simple":
338345
return SimpleSpec(
339346
repo="simple", # all benchmarks with mere function writing will share the simple docker image
340347
repo_directory=repo_directory,
341348
instance=instance,
349+
absolute=absolute,
342350
)
343351
else:
344352
raise NotImplementedError(

test_e2b.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,16 @@
33
from e2b_code_interpreter import Sandbox
44

55
sb = Sandbox()
6-
sb.commands.run("pip install commit0")
7-
sb.commands.run("commit0 setup tinydb")
6+
# install uv
7+
sb.commands.run("curl -LsSf https://astral.sh/uv/install.sh | sh")
8+
sb.commands.run("pip install git+https://github.com/commit-0/commit0.git@justin/e2b")
9+
# run setup script
10+
# copy diff
11+
# run eval script
12+
execution = sb.commands.run("commit0 setup tinydb")
13+
print(execution.stdout)
14+
execution = sb.commands.run("commit0 test simpy tests/test_event.py::test_succeed --reference --backend e2b")
15+
print(execution.stdout)
16+
execution = sb.commands.run("commit0 test simpy tests/test_event.py::test_succeed --backend e2b")
17+
print(execution.stdout)
818
import pdb; pdb.set_trace()
9-
k

0 commit comments

Comments
 (0)