Skip to content

feat: agent run snapshots #622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions codegen-examples/examples/swebench_agent_run/entry_point.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from codegen.extensions.swebench.utils import SweBenchExample
from codegen.extensions.swebench.harness import run_agent_on_entry
import modal
import sys
from codegen.sdk.core.codebase import Codebase

image = (
modal.Image.debian_slim(python_version="3.13")
Expand All @@ -17,3 +19,22 @@
async def run_agent_modal(entry: SweBenchExample):
"""Modal function to process a single example from the SWE-bench dataset."""
return run_agent_on_entry(entry)


@app.cls(image=image, secrets=[modal.Secret.from_dotenv()], enable_memory_snapshot=True)
class SwebenchAgentRun:
repo_full_name: str = modal.parameter()
commit: str = modal.parameter()
codebase: Codebase | None = None

@modal.enter(snap=True)
def load(self):
self.codebase = Codebase.from_repo(repo_full_name=self.repo_full_name, commit=self.commit, language="python")

@modal.exit()
def exit(self):
sys.exit(0)

@modal.method()
async def run(self, entry: SweBenchExample):
return run_agent_on_entry(entry, codebase=self.codebase)
8 changes: 4 additions & 4 deletions codegen-examples/examples/swebench_agent_run/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
import modal
import click
from datetime import datetime
from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_example, get_swe_bench_examples
from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_example, get_swe_bench_examples
from codegen.extensions.swebench.report import generate_report

PREDS_DNAME = Path(__file__).parent / "predictions"
LOG_DIR = Path(__file__).parent / "logs"

run_agent_modal = modal.Function.lookup("swebench-agent-run", "run_agent_modal")
SwebenchAgentRun = modal.Cls.from_name(app_name="swebench-agent-run", name="SwebenchAgentRun")


async def process_batch(examples, batch_size=10):
async def process_batch(examples: list[SweBenchExample], batch_size=10):
"""Process a batch of examples concurrently.

Args:
Expand All @@ -31,7 +31,7 @@ async def process_batch(examples, batch_size=10):
batch = examples[i : i + batch_size]

# Create tasks for this batch
batch_tasks = [run_agent_modal.remote.aio(example) for example in batch]
batch_tasks = [SwebenchAgentRun(repo_full_name=example.repo, commit=example.base_commit).run.remote.aio(example) for example in batch]

# Wait for all tasks in this batch to complete
print(f"Processing batch {i // batch_size + 1}/{len(examples) // batch_size + 1} (examples {i + 1}-{min(i + batch_size, len(examples))})")
Expand Down
14 changes: 14 additions & 0 deletions codegen-examples/examples/swebench_agent_run/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from codegen import Codebase
import modal

image = modal.Image.debian_slim(python_version="3.13").apt_install("git").pip_install("fastapi[standard]").run_commands("pip install codegen")

app = modal.App(name="codegen-examples", image=image, secrets=[modal.Secret.from_dotenv()])


@app.function()
def run_agent(AgentClass):
codebase = Codebase.from_repo(repo_full_name="pallets/flask")
agent = AgentClass(codebase)
agent.run(prompt="Tell me about the codebase and the files in it.")
return True
5 changes: 3 additions & 2 deletions src/codegen/extensions/swebench/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def show_problems(dataset):
print(f"{inst}: {problem}")


def run_agent_on_entry(entry: SweBenchExample):
def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None):
"""Process one `entry` from SWE Bench using the LLM `models` at the
given `temperature`. Set `model_name_or_path` in the result json.
"""
Expand All @@ -63,7 +63,8 @@ def run_agent_on_entry(entry: SweBenchExample):

gold_files = files_in_patch(entry.patch)

codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo
if codebase is None:
codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo

agent = CodeAgent(codebase=codebase)

Expand Down
Loading