feat: agent run snapshots (#622)

jemeza-codegen · web-flow · commit f5565e69eadd · 2025-02-26T10:37:16.000-08:00
# Motivation

&lt;!-- Why is this change necessary? --&gt;

# Content

&lt;!-- Please include a summary of the change --&gt;

# Testing

&lt;!-- How was the change tested? --&gt;

# Please check the following before marking your PR as ready for review

- [ ] I have added tests for my changes
- [ ] I have updated the documentation or added new documentation as
needed
diff --git a/codegen-examples/examples/swebench_agent_run/entry_point.py b/codegen-examples/examples/swebench_agent_run/entry_point.py
@@ -1,6 +1,8 @@
 from codegen.extensions.swebench.utils import SweBenchExample
 from codegen.extensions.swebench.harness import run_agent_on_entry
 import modal
+import sys
+from codegen.sdk.core.codebase import Codebase
 
 image = (
     modal.Image.debian_slim(python_version="3.13")
@@ -17,3 +19,22 @@
 async def run_agent_modal(entry: SweBenchExample):
     """Modal function to process a single example from the SWE-bench dataset."""
     return run_agent_on_entry(entry)
+
+
+@app.cls(image=image, secrets=[modal.Secret.from_dotenv()], enable_memory_snapshot=True)
+class SwebenchAgentRun:
+    repo_full_name: str = modal.parameter()
+    commit: str = modal.parameter()
+    codebase: Codebase | None = None
+
+    @modal.enter(snap=True)
+    def load(self):
+        self.codebase = Codebase.from_repo(repo_full_name=self.repo_full_name, commit=self.commit, language="python")
+
+    @modal.exit()
+    def exit(self):
+        sys.exit(0)
+
+    @modal.method()
+    async def run(self, entry: SweBenchExample):
+        return run_agent_on_entry(entry, codebase=self.codebase)
diff --git a/codegen-examples/examples/swebench_agent_run/run_eval.py b/codegen-examples/examples/swebench_agent_run/run_eval.py
@@ -6,16 +6,16 @@
 import modal
 import click
 from datetime import datetime
-from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_example, get_swe_bench_examples
+from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_example, get_swe_bench_examples
 from codegen.extensions.swebench.report import generate_report
 
 PREDS_DNAME = Path(__file__).parent / "predictions"
 LOG_DIR = Path(__file__).parent / "logs"
 
-run_agent_modal = modal.Function.lookup("swebench-agent-run", "run_agent_modal")
+SwebenchAgentRun = modal.Cls.from_name(app_name="swebench-agent-run", name="SwebenchAgentRun")
 
 
-async def process_batch(examples, batch_size=10):
+async def process_batch(examples: list[SweBenchExample], batch_size=10):
     """Process a batch of examples concurrently.
 
     Args:
@@ -31,7 +31,7 @@ async def process_batch(examples, batch_size=10):
         batch = examples[i : i + batch_size]
 
         # Create tasks for this batch
-        batch_tasks = [run_agent_modal.remote.aio(example) for example in batch]
+        batch_tasks = [SwebenchAgentRun(repo_full_name=example.repo, commit=example.base_commit).run.remote.aio(example) for example in batch]
 
         # Wait for all tasks in this batch to complete
         print(f"Processing batch {i // batch_size + 1}/{len(examples) // batch_size + 1} (examples {i + 1}-{min(i + batch_size, len(examples))})")
diff --git a/codegen-examples/examples/swebench_agent_run/test.py b/codegen-examples/examples/swebench_agent_run/test.py
@@ -0,0 +1,14 @@
+from codegen import Codebase
+import modal
+
+image = modal.Image.debian_slim(python_version="3.13").apt_install("git").pip_install("fastapi[standard]").run_commands("pip install codegen")
+
+app = modal.App(name="codegen-examples", image=image, secrets=[modal.Secret.from_dotenv()])
+
+
+@app.function()
+def run_agent(AgentClass):
+    codebase = Codebase.from_repo(repo_full_name="pallets/flask")
+    agent = AgentClass(codebase)
+    agent.run(prompt="Tell me about the codebase and the files in it.")
+    return True
diff --git a/src/codegen/extensions/swebench/harness.py b/src/codegen/extensions/swebench/harness.py
@@ -48,7 +48,7 @@ def show_problems(dataset):
         print(f"{inst}: {problem}")
 
 
-def run_agent_on_entry(entry: SweBenchExample):
+def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None):
     """Process one `entry` from SWE Bench using the LLM `models` at the
     given `temperature`.  Set `model_name_or_path` in the result json.
     """
@@ -63,7 +63,8 @@ def run_agent_on_entry(entry: SweBenchExample):
 
     gold_files = files_in_patch(entry.patch)
 
-    codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python")  # check out the repo
+    if codebase is None:
+        codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python")  # check out the repo
 
     agent = CodeAgent(codebase=codebase)