Reduce test time (#706)

jemeza-codegen · web-flow · commit 92be9215ce93 · 2025-02-28T18:17:40.000-08:00
# Motivation

Modified validation flow to only partially build a codebase reducing
test time substantially

# Content

&lt;!-- Please include a summary of the change --&gt;

# Testing

&lt;!-- How was the change tested? --&gt;

# Please check the following before marking your PR as ready for review

- [ ] I have added tests for my changes
- [ ] I have updated the documentation or added new documentation as
needed
diff --git a/codegen-examples/examples/swebench_agent_run/run_eval.py b/codegen-examples/examples/swebench_agent_run/run_eval.py
@@ -17,7 +17,7 @@
 run_agent_modal = modal.Function.from_name(app_name="swebench-agent-run", name="run_agent_modal")
 
 
-async def process_batch(examples: list[SweBenchExample], batch_size=10):
+async def process_batch_modal(examples: list[SweBenchExample], batch_size=10):
     """Process a batch of examples concurrently.
 
     Args:
@@ -90,7 +90,7 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
     return results
 
 
-def process_batch_sync(examples: list[SweBenchExample], batch_size=10, codebases: dict[str, Codebase] = {}):
+def process_batch_local(examples: list[SweBenchExample], batch_size=10, codebases: dict[str, Codebase] = {}):
     """Process a batch of examples synchronously.
 
     Args:
@@ -160,9 +160,9 @@ async def run_eval(use_existing_preds: str | None, dataset: str, length: int, in
 
             # Process all examples in parallel batches
             if local:
-                results = process_batch_sync(examples, codebases=codebases)
+                results = process_batch_local(examples, codebases=codebases)
             else:
-                results = await process_batch(examples)
+                results = await process_batch_modal(examples)
 
             # Save individual results
             for result in results:
diff --git a/src/codegen/extensions/swebench/harness.py b/src/codegen/extensions/swebench/harness.py
@@ -12,6 +12,7 @@
 
 from codegen import Codebase
 from codegen.agents.code_agent import CodeAgent
+from codegen.configs.models.codebase import CodebaseConfig
 from codegen.extensions.swebench.utils import (
     SweBenchExample,
     get_swe_bench_examples,
@@ -64,7 +65,10 @@ def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None)
     gold_files = files_in_patch(entry.patch)
 
     if codebase is None:
-        codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python")  # check out the repo
+        config = CodebaseConfig(
+            disable_file_parse=True,  # Disable the graph AND disable file parsing (file.edit only)
+        )
+        codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python", config=config)  # check out the repo
 
     agent = CodeAgent(codebase=codebase)
 
@@ -117,8 +121,9 @@ def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None)
 
     # Did we get a successful patch?
     if not model_patch:
-        msg = "Failed to generate a patch"
-        raise ValueError(msg)
+        pprint.pprint("=" * 60)
+        pprint.pprint("Failed to generate a patch")
+        pprint.pprint("=" * 60)
 
     return result