Skip to content

Commit f5565e6

Browse files
feat: agent run snapshots (#622)
# Motivation <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed
1 parent 208a521 commit f5565e6

File tree

4 files changed

+42
-6
lines changed

4 files changed

+42
-6
lines changed

codegen-examples/examples/swebench_agent_run/entry_point.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from codegen.extensions.swebench.utils import SweBenchExample
22
from codegen.extensions.swebench.harness import run_agent_on_entry
33
import modal
4+
import sys
5+
from codegen.sdk.core.codebase import Codebase
46

57
image = (
68
modal.Image.debian_slim(python_version="3.13")
@@ -17,3 +19,22 @@
1719
async def run_agent_modal(entry: SweBenchExample):
1820
"""Modal function to process a single example from the SWE-bench dataset."""
1921
return run_agent_on_entry(entry)
22+
23+
24+
@app.cls(image=image, secrets=[modal.Secret.from_dotenv()], enable_memory_snapshot=True)
25+
class SwebenchAgentRun:
26+
repo_full_name: str = modal.parameter()
27+
commit: str = modal.parameter()
28+
codebase: Codebase | None = None
29+
30+
@modal.enter(snap=True)
31+
def load(self):
32+
self.codebase = Codebase.from_repo(repo_full_name=self.repo_full_name, commit=self.commit, language="python")
33+
34+
@modal.exit()
35+
def exit(self):
36+
sys.exit(0)
37+
38+
@modal.method()
39+
async def run(self, entry: SweBenchExample):
40+
return run_agent_on_entry(entry, codebase=self.codebase)

codegen-examples/examples/swebench_agent_run/run_eval.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
import modal
77
import click
88
from datetime import datetime
9-
from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_example, get_swe_bench_examples
9+
from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_example, get_swe_bench_examples
1010
from codegen.extensions.swebench.report import generate_report
1111

1212
PREDS_DNAME = Path(__file__).parent / "predictions"
1313
LOG_DIR = Path(__file__).parent / "logs"
1414

15-
run_agent_modal = modal.Function.lookup("swebench-agent-run", "run_agent_modal")
15+
SwebenchAgentRun = modal.Cls.from_name(app_name="swebench-agent-run", name="SwebenchAgentRun")
1616

1717

18-
async def process_batch(examples, batch_size=10):
18+
async def process_batch(examples: list[SweBenchExample], batch_size=10):
1919
"""Process a batch of examples concurrently.
2020
2121
Args:
@@ -31,7 +31,7 @@ async def process_batch(examples, batch_size=10):
3131
batch = examples[i : i + batch_size]
3232

3333
# Create tasks for this batch
34-
batch_tasks = [run_agent_modal.remote.aio(example) for example in batch]
34+
batch_tasks = [SwebenchAgentRun(repo_full_name=example.repo, commit=example.base_commit).run.remote.aio(example) for example in batch]
3535

3636
# Wait for all tasks in this batch to complete
3737
print(f"Processing batch {i // batch_size + 1}/{len(examples) // batch_size + 1} (examples {i + 1}-{min(i + batch_size, len(examples))})")
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from codegen import Codebase
2+
import modal
3+
4+
image = modal.Image.debian_slim(python_version="3.13").apt_install("git").pip_install("fastapi[standard]").run_commands("pip install codegen")
5+
6+
app = modal.App(name="codegen-examples", image=image, secrets=[modal.Secret.from_dotenv()])
7+
8+
9+
@app.function()
10+
def run_agent(AgentClass):
11+
codebase = Codebase.from_repo(repo_full_name="pallets/flask")
12+
agent = AgentClass(codebase)
13+
agent.run(prompt="Tell me about the codebase and the files in it.")
14+
return True

src/codegen/extensions/swebench/harness.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def show_problems(dataset):
4848
print(f"{inst}: {problem}")
4949

5050

51-
def run_agent_on_entry(entry: SweBenchExample):
51+
def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None):
5252
"""Process one `entry` from SWE Bench using the LLM `models` at the
5353
given `temperature`. Set `model_name_or_path` in the result json.
5454
"""
@@ -63,7 +63,8 @@ def run_agent_on_entry(entry: SweBenchExample):
6363

6464
gold_files = files_in_patch(entry.patch)
6565

66-
codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo
66+
if codebase is None:
67+
codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo
6768

6869
agent = CodeAgent(codebase=codebase)
6970

0 commit comments

Comments
 (0)