Skip to content

Commit 881d931

Browse files
authored
Merge pull request #27 from commit-0/patch
Patch
2 parents 79b0363 + 8b3464e commit 881d931

File tree

13 files changed

+50
-366
lines changed

13 files changed

+50
-366
lines changed

.github/workflows/system.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ jobs:
2222
run: uv run commit0 clone simpy
2323
- name: Build docker images
2424
run: uv run commit0 build simpy
25-
- name: Set up git user
26-
run: sudo "$(which uv)" run commit0 setup-git-user simpy
2725
- name: Get tests
2826
run: uv run commit0 get-tests simpy
2927
- name: Test

commit0/__main__.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import commit0.harness.get_pytest_ids
33
import commit0.harness.build
44
import commit0.harness.setup
5-
import commit0.harness.setup_git_user
65
import commit0.harness.evaluate
76
import commit0.harness.save
87
import copy
@@ -34,7 +33,6 @@ def main() -> None:
3433
if command in [
3534
"clone",
3635
"build",
37-
"setup-git-user",
3836
"evaluate",
3937
"evaluate-reference",
4038
"save",
@@ -62,16 +60,6 @@ def main() -> None:
6260
config.repo_split,
6361
config.num_workers,
6462
config.backend,
65-
config.key_path,
66-
)
67-
elif command == "setup-git-user":
68-
commit0.harness.setup_git_user.main(
69-
config.dataset_name,
70-
config.dataset_split,
71-
config.repo_split,
72-
config.base_dir,
73-
config.git_user,
74-
config.key_path,
7563
)
7664
elif command == "get-tests":
7765
repo = sys.argv[2]

commit0/configs/base.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@ repo_split: all
1111

1212
# build related
1313
num_workers: 8
14-
key_path: commit0/configs/public_keys.json
15-
16-
# set up git user
17-
git_user: git # by default, git user is called git
1814

1915
# test related
2016
backend: local

commit0/configs/config_class.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ class Commit0Config:
1515
# build related
1616
# which repo to build, all or one repo
1717
num_workers: int
18-
# path to store public keys from docker images
19-
key_path: str
2018

2119
# test related
2220
backend: str

commit0/harness/build.py

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,11 @@
1-
import json
21
import logging
3-
import traceback
42

53
import docker
64
from datasets import load_dataset
7-
from tqdm import tqdm
85
from typing import Iterator
96

10-
from commit0.harness.constants import EVAL_BACKENDS, RepoInstance, SPLIT
7+
from commit0.harness.constants import RepoInstance, SPLIT
118
from commit0.harness.docker_build import build_repo_images
12-
from commit0.harness.execution_context import (
13-
ExecutionBackend,
14-
Docker,
15-
Modal,
16-
)
179
from commit0.harness.spec import make_spec
1810

1911
logging.basicConfig(
@@ -28,7 +20,6 @@ def main(
2820
repo_split: str,
2921
num_workers: int,
3022
backend: str,
31-
key_path: str,
3223
) -> None:
3324
dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore
3425
specs = []
@@ -39,29 +30,9 @@ def main(
3930
spec = make_spec(example)
4031
specs.append(spec)
4132

42-
if ExecutionBackend(backend) == ExecutionBackend.MODAL:
43-
execution_context = Modal
44-
elif ExecutionBackend(backend) == ExecutionBackend.LOCAL:
33+
if backend == "local":
4534
client = docker.from_env()
4635
build_repo_images(client, specs, num_workers)
47-
execution_context = Docker
48-
else:
49-
raise ValueError(
50-
f"Evaluation must be from {', '.join(EVAL_BACKENDS)}, but {backend} is provided."
51-
)
52-
53-
# get ssh key from each docker image
54-
img2key = dict()
55-
for spec in tqdm(specs, desc="Retrieving public keys from docker images"):
56-
try:
57-
with execution_context(spec, logger, timeout=60) as context:
58-
key = context.get_ssh_pubkey_from_remote(user="root")
59-
img2key[spec.repo_image_key] = key
60-
except Exception as e:
61-
error_msg = f"General error: {e}\n" f"{traceback.format_exc()}\n"
62-
raise RuntimeError(error_msg)
63-
with open(key_path, "w") as json_file:
64-
json.dump(img2key, json_file, indent=4)
6536

6637

6738
__all__ = []

commit0/harness/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class RepoInstance(TypedDict):
1313

1414
class Files(TypedDict):
1515
eval_script: Dict[str, Path]
16+
patch: Dict[str, Path]
1617

1718

1819
# Constants - Evaluation Log Directories
@@ -33,7 +34,6 @@ class Files(TypedDict):
3334
COMMANDS = [
3435
"clone",
3536
"build",
36-
"setup-git-user",
3737
"test",
3838
"test-reference",
3939
"get-tests",

commit0/harness/docker_utils.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -140,33 +140,6 @@ def delete_file_from_container(container: Container, file_path: str) -> None:
140140
raise Exception(f"General Error: {str(e)}")
141141

142142

143-
def get_ssh_pubkey_from_container(container: Container, user: str) -> str:
144-
"""Copy the SSH public key from a Docker container to the local authorized_keys file.
145-
146-
Args:
147-
----
148-
container (Container): Docker container to copy the key from.
149-
user (str): to get public key of which user
150-
151-
Returns:
152-
-------
153-
public_key (str): public key from docker container
154-
155-
Raises:
156-
------
157-
docker.errors.APIError: If there is an error calling the Docker API.
158-
159-
"""
160-
try:
161-
exit_code, output = container.exec_run("cat /root/.ssh/id_rsa.pub")
162-
if exit_code != 0:
163-
raise Exception(f"Error reading file: {output.decode('utf-8').strip()}")
164-
public_key = output.decode("utf-8").strip()
165-
return public_key
166-
except docker.errors.APIError as e:
167-
raise docker.errors.APIError(f"Docker API Error: {str(e)}")
168-
169-
170143
def write_to_container(container: Container, data: str, dst: Path) -> None:
171144
"""Write a string to a file in a docker container"""
172145
# echo with heredoc to file

commit0/harness/dockerfiles.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
88
RUN apt update && apt install -y \
99
wget \
10-
git \
1110
build-essential \
1211
libffi-dev \
1312
libtiff-dev \
@@ -21,16 +20,10 @@
2120
tzdata \
2221
&& rm -rf /var/lib/apt/lists/*
2322
24-
# Define arguments for SSH key parameters
25-
ARG SSH_KEY_PATH="/root/.ssh"
26-
ARG SSH_KEY_NAME="id_rsa"
27-
ARG SSH_KEY_PASSPHRASE=""
28-
29-
# Create the .ssh directory
30-
RUN mkdir -p ${{SSH_KEY_PATH}}
31-
32-
# Generate SSH keys
33-
RUN ssh-keygen -t rsa -b 4096 -f ${{SSH_KEY_PATH}}/${{SSH_KEY_NAME}} -N "${{SSH_KEY_PASSPHRASE}}"
23+
# Install the latest version of Git
24+
RUN apt-get update && apt-get install software-properties-common -y
25+
RUN add-apt-repository ppa:git-core/ppa -y
26+
RUN apt-get update && apt-get install git -y
3427
3528
# Set up uv
3629
# The installer requires curl (and certificates) to download the release archive

commit0/harness/execution_context.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
create_container,
2828
copy_from_container,
2929
copy_to_container,
30-
get_ssh_pubkey_from_container,
3130
delete_file_from_container,
3231
exec_run_with_timeout,
3332
)
@@ -62,11 +61,6 @@ def __init__(
6261
self.logger = logger
6362
self.timeout = timeout
6463

65-
@abstractmethod
66-
def get_ssh_pubkey_from_remote(self) -> None:
67-
"""Copy"""
68-
raise NotImplementedError
69-
7064
@abstractmethod
7165
def exec_run_with_timeout(
7266
self, command: str, timeout: int
@@ -148,10 +142,6 @@ def __init__(
148142
for _, f in files_to_copy.items():
149143
copy_to_container(self.container, f["src"], f["dest"]) # type: ignore
150144

151-
def get_ssh_pubkey_from_remote(self, user: str) -> str:
152-
"""Copy"""
153-
return get_ssh_pubkey_from_container(self.container, user)
154-
155145
def exec_run_with_timeout(
156146
self, command: str, timeout: int
157147
) -> tuple[str, bool, float]:
@@ -206,12 +196,6 @@ def __init__(
206196
timeout=timeout,
207197
)
208198

209-
def get_ssh_pubkey_from_remote(self, user: str) -> str:
210-
"""Copy ssh pubkey"""
211-
process = self.sandbox.exec("bash", "-c", f"cat /{user}/.ssh/id_rsa.pub")
212-
public_key = "".join([line for line in process.stdout]).strip()
213-
return public_key
214-
215199
def exec_run_with_timeout(
216200
self, command: str, timeout: int
217201
) -> tuple[str, bool, float]:

commit0/harness/run_pytest_ids.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
EvaluationError,
1919
extract_test_output,
2020
get_hash_string,
21-
get_ip,
21+
generate_patch_between_commits,
2222
)
2323
from commit0.harness.execution_context import (
2424
ExecutionBackend,
@@ -60,20 +60,20 @@ def main(
6060
log_file = log_dir / "run_pytest.log"
6161
logger = setup_logger(repo, log_file)
6262

63+
local_repo = Repo(f"{base_dir}/{repo}")
6364
if branch == "reference":
6465
commit_id = example["reference_commit"]
6566
else:
66-
local_repo = Repo(f"{base_dir}/{repo}")
6767
local_branch = local_repo.branches[branch]
6868
commit_id = local_branch.commit.hexsha
69+
patch = generate_patch_between_commits(
70+
local_repo, example["base_commit"], commit_id
71+
)
72+
patch_file = Path(log_dir / "patch.diff")
73+
patch_file.write_text(patch)
6974

7075
# make eval file
71-
eval_script = spec.eval_script.format(
72-
local_repo=f"{base_dir}/{repo}",
73-
commit_id=commit_id,
74-
test_ids=test_ids,
75-
ip=get_ip(backend),
76-
)
76+
eval_script = spec.eval_script.format(test_ids=test_ids)
7777
eval_file = Path(log_dir / "eval.sh")
7878
eval_file.write_text(eval_script)
7979

@@ -86,7 +86,10 @@ def main(
8686
f"Evaluation must be from {', '.join(EVAL_BACKENDS)}, but {backend} is provided."
8787
)
8888

89-
files_to_copy = Files(eval_script={"src": eval_file, "dest": Path("/eval.sh")})
89+
files_to_copy = Files(
90+
eval_script={"src": eval_file, "dest": Path("/eval.sh")},
91+
patch={"src": patch_file, "dest": Path("/patch.diff")},
92+
)
9093

9194
try:
9295
with execution_context(spec, logger, timeout, files_to_copy) as context:

commit0/harness/setup_git_user.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

commit0/harness/spec.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,18 +147,14 @@ def make_repo_script_list(instance: RepoInstance, repo_directory: str) -> list[s
147147

148148
def make_eval_script_list(instance: RepoInstance, repo_directory: str) -> list[str]:
149149
"""Run the tests."""
150-
origin_name = "tmp-test"
151150
eval_script_list = [
152-
"ssh-keyscan {ip} >> ~/.ssh/known_hosts",
153151
f"cd {repo_directory}",
154152
"source .venv/bin/activate",
155-
f"git remote add {origin_name} ssh://git@{{ip}}:{{local_repo}}",
156-
f"git fetch {origin_name}",
157-
"git checkout {commit_id}",
153+
f"git reset --hard {instance['base_commit']}",
154+
"git apply --allow-empty -v /patch.diff",
158155
"git status",
159156
f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json {{test_ids}}",
160-
f"git checkout {instance['base_commit']}",
161-
f"git remote remove {origin_name}",
157+
f"git reset --hard {instance['base_commit']}",
162158
"git status",
163159
]
164160
return eval_script_list

0 commit comments

Comments
 (0)