Skip to content

Commit 61f6145

Browse files
CG-10883: Improves dependency installation instructions (#608)
# Motivation Improves Installation instructions # Content Modifies how dependencies are installed # Testing Tested by running locally # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed
1 parent 63be53c commit 61f6145

File tree

7 files changed

+290
-50
lines changed

7 files changed

+290
-50
lines changed

codegen-examples/examples/swebench_agent_run/README.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,33 @@
11
# INSTRUCTIONS
22

3-
1. Create a `.env` file in the root directory and add your API keys.
3+
1. Create a `.env` file in the `swebench_agent_run` directory (codegen-examples/examples/swebench_agent_run) and add your API keys.
44

55
1. cd into the `codegen-examples/examples/swebench_agent_run` directory
66

77
1. Create a `.venv` with `uv venv` and activate it with `source .venv/bin/activate`
88

9+
1. Install the dependencies with `uv pip install .`
10+
911
1. Install the codegen dependencies with `uv add codegen`
1012

11-
- Note: If you'd like to install the dependencies in the global environment, you can use `uv pip install -e ../../../`. This will allow you to test modifications to the codegen codebase. You will need to run `uv pip install -e ../../../` each time you make changes to the codebase.
13+
- Note: If you'd like to install the dependencies using the global environment, use `uv pip install -e ../../../` instead of `uv pip install .`. This will allow you to test modifications to the codegen codebase. You will need to run `uv pip install -e ../../../` each time you make changes to the codebase.
1214

13-
5. Ensure that you have a modal account and profile set up. If you don't have one, you can create one at https://modal.com/
15+
6. Ensure that you have a modal account and profile set up. If you don't have one, you can create one at https://modal.com/
1416

15-
1. Activate the appropriate modal profile `uv modal profile activate <profile_name>`
17+
1. Activate the appropriate modal profile `python -m modal profile activate <profile_name>`
1618

17-
1. Launch the modal app with `uv run modal deploy --env=<env_name> entry_point.py`
19+
1. Launch the modal app with `python -m modal deploy --env=<env_name> entry_point.py`
1820

19-
1. Run the evaluation with `python run_eval.py` with the desired options:
21+
1. Run the evaluation with `python -m run_eval` with the desired options:
2022

2123
- ```bash
2224
$ python run_eval.py --help
2325
Usage: run_eval.py [OPTIONS]
2426

2527
Options:
26-
--use-existing-preds Use existing predictions instead of
27-
generating new ones.
28+
--use-existing-preds TEXT The run ID of the existing predictions to use.
2829
--dataset [princeton-nlp/SWE-bench_Lite|princeton-nlp/SWE-bench|princeton-nlp/SWE-bench-verified]
29-
The dataset to use.
30+
The dataset to use.
3031
--length INTEGER The number of examples to process.
3132
--instance-id TEXT The instance ID of the example to process.
3233
--help Show this message and exit.

codegen-examples/examples/swebench_agent_run/pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,7 @@ version = "0.1.0"
44
description = "Add your description here"
55
readme = "README.md"
66
requires-python = ">=3.12, <3.14"
7-
dependencies = []
7+
dependencies = ["swebench>=3.0.0", "modal>=0.73.25"]
8+
9+
[tool.setuptools]
10+
py-modules = ["entry_point", "run_eval"]

codegen-examples/examples/swebench_agent_run/run_eval.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import traceback
44
from pathlib import Path
5+
import uuid
56
import modal
67
import click
78
from datetime import datetime
@@ -87,21 +88,21 @@ async def process_batch(examples, batch_size=10):
8788
return results
8889

8990

90-
async def run_eval(use_existing_preds, dataset, length, instance_id=None):
91+
async def run_eval(use_existing_preds: str | None, dataset: str, length: int, instance_id: str | None = None):
92+
run_id = use_existing_preds or str(uuid.uuid4())
93+
predictions_dir = PREDS_DNAME / f"results_{run_id}"
9194
dataset = SWEBenchDataset(dataset)
9295
if instance_id:
9396
examples = [get_swe_bench_example(instance_id, dataset=dataset)]
9497
else:
9598
examples = get_swe_bench_examples(dataset=dataset, length=length)
9699

97100
try:
98-
if not use_existing_preds:
101+
if use_existing_preds is None:
99102
print(f"Processing {len(examples)} examples...")
100103

101104
# Create output directory if it doesn't exist
102-
PREDS_DNAME.mkdir(exist_ok=True)
103-
results_dir = PREDS_DNAME / "results"
104-
results_dir.mkdir(exist_ok=True)
105+
predictions_dir.mkdir(exist_ok=True, parents=True)
105106

106107
# Create a timestamp for this run
107108
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -113,12 +114,13 @@ async def run_eval(use_existing_preds, dataset, length, instance_id=None):
113114
for result in results:
114115
if result and "instance_id" in result:
115116
instance_id = result["instance_id"]
116-
output_file = results_dir / f"{instance_id}.json"
117+
output_file = predictions_dir / f"{instance_id}.json"
118+
output_file.parent.mkdir(exist_ok=True, parents=True)
117119
with open(output_file, "w") as f:
118120
json.dump(result, f, indent=4)
119121

120122
# Save summary file
121-
summary_file = results_dir / f"summary_{timestamp}.json"
123+
summary_file = predictions_dir / f"summary_{timestamp}.json"
122124
summary = {
123125
"timestamp": timestamp,
124126
"total_examples": len(examples),
@@ -138,7 +140,7 @@ async def run_eval(use_existing_preds, dataset, length, instance_id=None):
138140
json.dump(summary, f, indent=4)
139141

140142
print("\nProcessing complete!")
141-
print(f"Results saved to: {results_dir}")
143+
print(f"Results saved to: {predictions_dir}")
142144
print(f"Summary saved to: {summary_file}")
143145
print(f"Successful: {summary['successful']}/{summary['total_examples']}")
144146
print(f"Failed: {summary['failed']}/{summary['total_examples']}")
@@ -148,18 +150,18 @@ async def run_eval(use_existing_preds, dataset, length, instance_id=None):
148150
print(f" {error_type}: {count}")
149151

150152
# Generate Report on Modal
151-
generate_report(PREDS_DNAME, LOG_DIR, dataset)
153+
generate_report(predictions_dir, LOG_DIR, dataset, run_id)
152154
except Exception:
153155
print("Fatal error in run_eval:")
154156
traceback.print_exc()
155157
raise
156158

157159

158160
@click.command()
159-
@click.option("--use-existing-preds", is_flag=True, help="Use existing predictions instead of generating new ones.")
161+
@click.option("--use-existing-preds", help="The run ID of the existing predictions to use.", type=str, default=None)
160162
@click.option("--dataset", help="The dataset to use.", type=click.Choice([dataset.value for dataset in SWEBenchDataset]), default=SWEBenchDataset.LITE.value)
161163
@click.option("--length", help="The number of examples to process.", type=int, default=10)
162-
@click.option("--instance-id", help="The instance ID of the example to process.")
164+
@click.option("--instance-id", help="The instance ID of the example to process.", type=str, default=None)
163165
def run_eval_command(use_existing_preds, dataset, length, instance_id):
164166
asyncio.run(run_eval(use_existing_preds, dataset, length, instance_id))
165167

0 commit comments

Comments
 (0)