Skip to content

Reduce test time #706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions codegen-examples/examples/swebench_agent_run/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
run_agent_modal = modal.Function.from_name(app_name="swebench-agent-run", name="run_agent_modal")


async def process_batch(examples: list[SweBenchExample], batch_size=10):
async def process_batch_modal(examples: list[SweBenchExample], batch_size=10):
"""Process a batch of examples concurrently.

Args:
Expand Down Expand Up @@ -90,7 +90,7 @@ async def process_batch(examples: list[SweBenchExample], batch_size=10):
return results


def process_batch_sync(examples: list[SweBenchExample], batch_size=10, codebases: dict[str, Codebase] = {}):
def process_batch_local(examples: list[SweBenchExample], batch_size=10, codebases: dict[str, Codebase] = {}):
"""Process a batch of examples synchronously.

Args:
Expand Down Expand Up @@ -160,9 +160,9 @@ async def run_eval(use_existing_preds: str | None, dataset: str, length: int, in

# Process all examples in parallel batches
if local:
results = process_batch_sync(examples, codebases=codebases)
results = process_batch_local(examples, codebases=codebases)
else:
results = await process_batch(examples)
results = await process_batch_modal(examples)

# Save individual results
for result in results:
Expand Down
11 changes: 8 additions & 3 deletions src/codegen/extensions/swebench/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from codegen import Codebase
from codegen.agents.code_agent import CodeAgent
from codegen.configs.models.codebase import CodebaseConfig
from codegen.extensions.swebench.utils import (
SweBenchExample,
get_swe_bench_examples,
Expand Down Expand Up @@ -64,7 +65,10 @@ def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None)
gold_files = files_in_patch(entry.patch)

if codebase is None:
codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo
config = CodebaseConfig(
disable_file_parse=True, # Disable the graph AND disable file parsing (file.edit only)
)
codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python", config=config) # check out the repo

agent = CodeAgent(codebase=codebase)

Expand Down Expand Up @@ -117,8 +121,9 @@ def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None)

# Did we get a successful patch?
if not model_patch:
msg = "Failed to generate a patch"
raise ValueError(msg)
pprint.pprint("=" * 60)
pprint.pprint("Failed to generate a patch")
pprint.pprint("=" * 60)

return result

Expand Down
Loading