Skip to content

Commit 46612ab

Browse files
committed
Merge branch 'main' into aider
2 parents 60b1a92 + a17d06b commit 46612ab

File tree

15 files changed

+449
-279
lines changed

15 files changed

+449
-279
lines changed

.github/workflows/pre-commit.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,13 @@ jobs:
1010
runs-on: ubuntu-latest
1111
steps:
1212
- uses: actions/checkout@v3
13-
- uses: actions/setup-python@v3
14-
- uses: pre-commit/[email protected]
13+
- name: Install uv
14+
uses: astral-sh/setup-uv@v2
15+
- name: Set up Python
16+
run: uv venv --python 3.12
17+
- name: Install the project
18+
run: uv sync
19+
- name: Install the project
20+
run: uv pip install pre-commit
21+
- name: PreCommit
22+
run: uv run pre-commit run --show-diff-on-failure --color=always --all-files

.github/workflows/system.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: system
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches: [main]
7+
8+
jobs:
9+
system:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v3
13+
- name: Install uv
14+
uses: astral-sh/setup-uv@v2
15+
- name: Set up Python
16+
run: uv venv --python 3.12
17+
- name: Set up Docker
18+
uses: docker/setup-buildx-action@v3
19+
- name: Install the project
20+
run: uv sync
21+
- name: Clone
22+
run: uv run commit0 clone simpy
23+
- name: Setup
24+
run: uv run commit0 build simpy
25+
- name: Test
26+
run: uv run commit0 test-reference simpy tests/test_event.py::test_succeed

commit0/__main__.py

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,70 @@
1-
import argparse
21
import commit0.harness.run_pytest_ids
32
import commit0.harness.build
43
import commit0.harness.setup
4+
import copy
5+
import sys
6+
import os
7+
import hydra
8+
from hydra.core.config_store import ConfigStore
9+
from commit0.configs.config_class import Commit0Config
10+
from commit0.harness.constants import COMMANDS, SPLIT
511

612

713
def main() -> None:
8-
parser = argparse.ArgumentParser(description="Commit0 version control system")
9-
subparsers = parser.add_subparsers(dest="command", help="Available commands")
14+
command = sys.argv[1]
15+
if command not in COMMANDS:
16+
raise ValueError(
17+
f"command must be from {', '.join(COMMANDS)}, but you provided {command}"
18+
)
19+
# type check config values
20+
cs = ConfigStore.instance()
21+
cs.store(name="user", node=Commit0Config)
22+
# have hydra to ignore all command-line arguments
23+
sys_argv = copy.deepcopy(sys.argv)
24+
sys.argv = [sys.argv[0]]
25+
hydra.initialize(version_base=None, config_path="configs")
26+
config = hydra.compose(config_name="user")
27+
# after hydra gets all configs, put command-line arguments back
28+
sys.argv = sys_argv
29+
# repo_split: split from command line has a higher priority than split in hydra
30+
if command in ["clone", "build"]:
31+
if len(sys.argv) == 3:
32+
if sys.argv[2] not in SPLIT:
33+
raise ValueError(
34+
f"repo split must be from {', '.join(SPLIT.keys())}, but you provided {sys.argv[2]}"
35+
)
36+
config.repo_split = sys.argv[2]
37+
config.base_dir = os.path.abspath(config.base_dir)
1038

11-
commit0.harness.setup.add_init_args(subparsers.add_parser("clone"))
12-
commit0.harness.build.add_init_args(subparsers.add_parser("build"))
13-
commit0.harness.run_pytest_ids.add_init_args(subparsers.add_parser("test"))
14-
15-
args = parser.parse_args()
16-
17-
if args.command == "clone":
18-
commit0.harness.setup.run(args)
19-
elif args.command == "build":
20-
commit0.harness.build.run(args)
21-
elif args.command == "test":
22-
commit0.harness.run_pytest_ids.run(args)
23-
else:
24-
parser.print_help()
39+
if command == "clone":
40+
commit0.harness.setup.main(
41+
config.dataset_name,
42+
config.dataset_split,
43+
config.repo_split,
44+
config.base_dir,
45+
)
46+
elif command == "build":
47+
commit0.harness.build.main(
48+
config.dataset_name,
49+
config.dataset_split,
50+
config.repo_split,
51+
config.num_workers,
52+
)
53+
elif command == "test" or command == "test-reference":
54+
repo = sys.argv[2]
55+
test_ids = sys.argv[3]
56+
if command == "test-reference":
57+
config.branch = "reference"
58+
commit0.harness.run_pytest_ids.main(
59+
config.dataset_name,
60+
config.dataset_split,
61+
config.base_dir,
62+
repo,
63+
config.branch,
64+
test_ids,
65+
config.backend,
66+
config.timeout,
67+
)
2568

2669

2770
if __name__ == "__main__":

commit0/configs/base.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
defaults:
2+
- _self_
3+
4+
# shared in all steps
5+
dataset_name: wentingzhao/commit0_docstring
6+
dataset_split: test
7+
8+
# clone related
9+
base_dir: repos/
10+
repo_split: all
11+
12+
# build related
13+
num_workers: 8
14+
15+
# test related
16+
backend: local
17+
branch: ai
18+
timeout: 1_800

commit0/configs/config_class.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from dataclasses import dataclass
2+
3+
4+
@dataclass
5+
class Commit0Config:
6+
# shared in all steps
7+
dataset_name: str
8+
dataset_split: str
9+
10+
# clone related
11+
base_dir: str
12+
repo_split: str
13+
14+
# build related
15+
# which repo to build, all or one repo
16+
num_workers: int
17+
18+
# test related
19+
backend: str
20+
# which branch to work on
21+
branch: str
22+
# timeout for running pytest
23+
timeout: int

commit0/configs/user.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
defaults:
2+
- base
3+
- _self_

commit0/harness/build.py

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
import argparse
21
import logging
32

43
import docker
54
from datasets import load_dataset
65
from typing import Iterator
6+
77
from commit0.harness.docker_build import build_repo_images
88
from commit0.harness.spec import make_spec
9+
from commit0.harness.constants import RepoInstance, SPLIT
910

1011
logging.basicConfig(
1112
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -14,49 +15,19 @@
1415

1516

1617
def main(
17-
hf_name: str,
18-
base_dir: str,
19-
config_file: str,
18+
dataset_name: str, dataset_split: str, repo_split: str, num_workers: int
2019
) -> None:
21-
dataset: Iterator[RepoInstance] = load_dataset(hf_name, split="test")
20+
dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore
2221
specs = []
2322
for example in dataset:
23+
repo_name = example["repo"].split("/")[-1]
24+
if repo_split != "all" and repo_name not in SPLIT[repo_split]:
25+
continue
2426
spec = make_spec(example)
2527
specs.append(spec)
2628

2729
client = docker.from_env()
28-
build_repo_images(client, specs)
29-
logger.info("Done building docker images")
30-
31-
32-
def add_init_args(parser: argparse.ArgumentParser) -> None:
33-
parser.add_argument(
34-
"--hf_name",
35-
type=str,
36-
help="HF dataset name",
37-
default="wentingzhao/commit0_docstring",
38-
)
39-
parser.add_argument(
40-
"--base_dir",
41-
type=str,
42-
default="repos/",
43-
help="base directory to write repos to",
44-
)
45-
parser.add_argument(
46-
"--config_file",
47-
type=str,
48-
default="config.yml",
49-
help="where to write config file to",
50-
)
51-
parser.set_defaults(func=run)
52-
53-
54-
def run(args: argparse.Namespace) -> None:
55-
main(
56-
hf_name=args.hf_name,
57-
base_dir=args.base_dir,
58-
config_file=args.config_file,
59-
)
30+
build_repo_images(client, specs, num_workers)
6031

6132

6233
__all__ = []

commit0/harness/constants.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
from enum import Enum
22
from pathlib import Path
3-
from typing import TypedDict
3+
from typing import Dict, TypedDict
44

55

66
class RepoInstance(TypedDict):
77
repo: str
88
base_commit: str
99
reference_commit: str
1010
setup: dict
11-
test: str
11+
test: Dict[str, str]
1212

1313

1414
# Constants - Evaluation Log Directories
@@ -25,6 +25,88 @@ class RepoInstance(TypedDict):
2525
# Evaluation backends
2626
EVAL_BACKENDS = ["local", "modal"]
2727

28+
# available commands
29+
COMMANDS = ["clone", "build", "test", "test-reference"]
30+
# repo splits
31+
SPLIT_MINITORCH = ["minitorch"]
32+
SPLIT_SIMPY = ["simpy"]
33+
SPLIT_LITE = [
34+
"tinydb",
35+
"simpy",
36+
"deprecated",
37+
"wcwidth",
38+
"voluptuous",
39+
"cachetools",
40+
"imapclient",
41+
"marshmallow",
42+
"jinja",
43+
"cookiecutter",
44+
]
45+
SPLIT_ALL = [
46+
"statsmodels",
47+
"python-progressbar",
48+
"xarray",
49+
"imbalanced-learn",
50+
"web3.py",
51+
"scrapy",
52+
"seaborn",
53+
"pypdf",
54+
"pexpect",
55+
"pytest",
56+
"pylint",
57+
"joblib",
58+
"dulwich",
59+
"virtualenv",
60+
"minitorch",
61+
"networkx",
62+
"requests",
63+
"sphinx",
64+
"jedi",
65+
"moviepy",
66+
"loguru",
67+
"paramiko",
68+
"geopandas",
69+
"bitstring",
70+
"fastapi",
71+
"chardet",
72+
"tornado",
73+
"python-prompt-toolkit",
74+
"attrs",
75+
"PyBoy",
76+
"pydantic",
77+
"filesystem_spec",
78+
"tlslite-ng",
79+
"graphene",
80+
"mimesis",
81+
"babel",
82+
"dnspython",
83+
"portalocker," "cookiecutter",
84+
"pyjwt",
85+
"python-rsa",
86+
"more-itertools",
87+
"simpy",
88+
"click",
89+
"fabric",
90+
"jinja",
91+
"flask",
92+
"sqlparse",
93+
"marshmallow",
94+
"imapclient",
95+
"tinydb",
96+
"cachetools",
97+
"voluptuous",
98+
"parsel",
99+
"wcwidth",
100+
"deprecated",
101+
]
102+
103+
SPLIT = {
104+
"all": SPLIT_ALL,
105+
"minitorch": SPLIT_MINITORCH,
106+
"simpy": SPLIT_SIMPY,
107+
"lite": SPLIT_LITE,
108+
}
109+
28110

29111
class ResolvedStatus(Enum):
30112
NO = "RESOLVED_NO"

0 commit comments

Comments
 (0)