Skip to content

Commit 2ba0357

Browse files
authored
Merge pull request #25 from commit-0/restrict-git
Restrict git
2 parents f9727de + 877e1e8 commit 2ba0357

File tree

9 files changed

+131
-29
lines changed

9 files changed

+131
-29
lines changed

.github/workflows/system.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ jobs:
1818
uses: docker/setup-buildx-action@v3
1919
- name: Install the project
2020
run: uv sync
21-
- name: Clone
22-
run: uv run commit0 clone simpy
23-
- name: Setup
21+
- name: Set up commit0
22+
run: uv run commit0 setup simpy
23+
- name: Build docker images
2424
run: uv run commit0 build simpy
2525
- name: Get tests
2626
run: uv run commit0 get-tests simpy

commit0/__main__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def main() -> None:
3030
# after hydra gets all configs, put command-line arguments back
3131
sys.argv = sys_argv
3232
# repo_split: split from command line has a higher priority than split in hydra
33-
if command in ["clone", "build", "evaluate", "evaluate-reference", "save"]:
33+
if command in ["setup", "build", "evaluate", "evaluate-reference", "save"]:
3434
if len(sys.argv) >= 3:
3535
if sys.argv[2] not in SPLIT:
3636
raise ValueError(
@@ -39,7 +39,7 @@ def main() -> None:
3939
config.repo_split = sys.argv[2]
4040
config.base_dir = os.path.abspath(config.base_dir)
4141

42-
if command == "clone":
42+
if command == "setup":
4343
commit0.harness.setup.main(
4444
config.dataset_name,
4545
config.dataset_split,

commit0/harness/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class RepoInstance(TypedDict):
2727

2828
# available commands
2929
COMMANDS = [
30-
"clone",
30+
"setup",
3131
"build",
3232
"test",
3333
"test-reference",

commit0/harness/docker_utils.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import threading
99
import time
1010
import traceback
11+
import pwd
1112
from pathlib import Path
1213
from io import BytesIO
1314
from typing import Optional, List, Union
@@ -158,23 +159,26 @@ def copy_ssh_pubkey_from_container(container: Container) -> None:
158159
if exit_code != 0:
159160
raise Exception(f"Error reading file: {output.decode('utf-8').strip()}")
160161
public_key = output.decode("utf-8").strip()
162+
public_key = f"no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty {public_key}"
161163

162-
local_authorized_keys_path = os.path.expanduser("~/.ssh/authorized_keys")
163-
os.makedirs(os.path.dirname(local_authorized_keys_path), exist_ok=True)
164-
if not os.path.exists(local_authorized_keys_path):
164+
user_info = pwd.getpwnam("git")
165+
home_directory = user_info.pw_dir
166+
authorized_keys_path = os.path.join(home_directory, ".ssh", "authorized_keys")
167+
os.makedirs(os.path.dirname(authorized_keys_path), exist_ok=True)
168+
if not os.path.exists(authorized_keys_path):
165169
# Since the file does not exist, create it
166-
open(local_authorized_keys_path, "a").close()
170+
open(authorized_keys_path, "a").close()
167171
write = True
168172
else:
169-
with open(local_authorized_keys_path, "r") as authorized_keys_file:
173+
with open(authorized_keys_path, "r") as authorized_keys_file:
170174
content = authorized_keys_file.read()
171175
if public_key not in content:
172176
write = True
173177
else:
174178
write = False
175179

176180
if write:
177-
with open(local_authorized_keys_path, "a") as authorized_keys_file:
181+
with open(authorized_keys_path, "a") as authorized_keys_file:
178182
authorized_keys_file.write(public_key + "\n")
179183

180184
except docker.errors.APIError as e:

commit0/harness/evaluate.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,19 @@ def main(
7272
for name in tqdm(log_dirs):
7373
report_file = os.path.join(name, "report.json")
7474
name = name.split("/")[2]
75+
test_ids = get_tests(name, stdout=False)
7576
if not os.path.exists(report_file):
7677
out.append(
7778
{
7879
"name": name,
7980
"sum": 0,
8081
"passed": 0,
8182
"num_passed": 0,
83+
"num_tests": len(test_ids),
8284
}
8385
)
8486
continue
8587
report = load_dataset("json", data_files=report_file, split="train") # type: ignore
86-
test_ids = get_tests(name, stdout=False)
8788
tests = {x["nodeid"]: x["call"] for x in report["tests"][0]} # type: ignore
8889
status = []
8990
runtimes = []
@@ -110,7 +111,7 @@ def main(
110111
"sum": total,
111112
"passed": passed,
112113
"num_passed": status["passed"] + status["xfail"],
113-
"num_tests": sum(status.values()),
114+
"num_tests": len(test_ids),
114115
}
115116
)
116117
print("repo,runtime,num_passed/num_tests")

commit0/harness/run_pytest_ids.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
extract_test_output,
1616
get_hash_string,
1717
get_ip,
18-
get_user,
1918
)
2019
from commit0.harness.execution_context import (
2120
Docker,
@@ -74,7 +73,6 @@ def main(
7473
commit_id=commit_id,
7574
test_ids=test_ids,
7675
ip=get_ip(backend),
77-
user=get_user(),
7876
)
7977
eval_file = Path(log_dir / "eval.sh")
8078
eval_file.write_text(eval_script)
@@ -96,18 +94,21 @@ def main(
9694
output, "--json-report --json-report-file=report.json"
9795
)
9896
context.write_test_output(test_output, timed_out)
97+
print(test_output)
9998
except EvaluationError as e:
100-
error_msg = traceback.format_exc()
101-
logger.info(error_msg)
102-
print(e)
99+
error_msg = (
100+
f"Error in running pytest for {repo}: {e}\n"
101+
f"{traceback.format_exc()}\n"
102+
f"Check ({log_file}) for more information."
103+
)
104+
raise EvaluationError(repo, error_msg, logger)
103105
except Exception as e:
104106
error_msg = (
105-
f"Error in running pytest for {spec.repo}: {e}\n"
107+
f"General error: {e}\n"
106108
f"{traceback.format_exc()}\n"
107-
# f"Check ({logger.log_file}) for more information."
109+
f"Check ({log_file}) for more information."
108110
)
109-
logger.error(error_msg)
110-
111+
raise RuntimeError(error_msg)
111112
return str(log_dir)
112113

113114

commit0/harness/setup.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44
from datasets import load_dataset
55

66
from typing import Iterator
7-
from commit0.harness.utils import clone_repo, create_branch
7+
from commit0.harness.utils import (
8+
clone_repo,
9+
create_branch,
10+
setup_git,
11+
add_safe_directory,
12+
)
813
from commit0.harness.constants import RepoInstance, SPLIT
914

1015

@@ -18,6 +23,7 @@ def main(
1823
dataset_name: str, dataset_split: str, repo_split: str, base_dir: str, branch: str
1924
) -> None:
2025
dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore
26+
setup_git(logger)
2127
for example in dataset:
2228
repo_name = example["repo"].split("/")[-1]
2329
if repo_split != "all" and repo_name not in SPLIT[repo_split]:
@@ -26,6 +32,7 @@ def main(
2632
clone_dir = os.path.abspath(os.path.join(base_dir, repo_name))
2733
local_repo = clone_repo(clone_url, clone_dir, example["base_commit"], logger)
2834
create_branch(local_repo, branch, logger)
35+
add_safe_directory(clone_dir, logger)
2936

3037

3138
__all__ = []

commit0/harness/spec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def make_eval_script_list(instance: RepoInstance, repo_directory: str) -> list[s
152152
"ssh-keyscan {ip} >> ~/.ssh/known_hosts",
153153
f"cd {repo_directory}",
154154
"source .venv/bin/activate",
155-
f"git remote add {origin_name} ssh://{{user}}@{{ip}}:{{local_repo}}",
155+
f"git remote add {origin_name} ssh://git@{{ip}}:{{local_repo}}",
156156
f"git fetch {origin_name}",
157157
"git checkout {commit_id}",
158158
"git status",

commit0/harness/utils.py

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import getpass
21
import git
32
import git.exc
43
import hashlib
@@ -7,7 +6,8 @@
76
import os
87
import time
98
import requests
10-
from typing import Optional
9+
import subprocess
10+
from typing import Optional, Tuple
1111

1212
from fastcore.net import HTTP404NotFoundError, HTTP403ForbiddenError # type: ignore
1313
from ghapi.core import GhApi
@@ -58,8 +58,97 @@ def get_ip(backend: str) -> str:
5858
return ip
5959

6060

61-
def get_user() -> str:
62-
return getpass.getuser()
61+
def run_command(command: str) -> Tuple[str, str, int]:
62+
"""Runs a shell command and returns the output, error message, and exit code."""
63+
try:
64+
result = subprocess.run(
65+
command,
66+
shell=True,
67+
check=True,
68+
stdout=subprocess.PIPE,
69+
stderr=subprocess.PIPE,
70+
)
71+
return (
72+
result.stdout.decode("utf-8"),
73+
result.stderr.decode("utf-8"),
74+
result.returncode,
75+
)
76+
except subprocess.CalledProcessError as e:
77+
return e.stdout.decode("utf-8"), e.stderr.decode("utf-8"), e.returncode
78+
79+
80+
def handle_command(command: str, description: str, logger: logging.Logger) -> None:
81+
"""Runs a command and handles success or failure with appropriate messages."""
82+
stdout, stderr, exit_code = run_command(command)
83+
if exit_code != 0:
84+
logger.error(f"Error running '{command}' which {description}:\n{stderr}")
85+
else:
86+
logger.info(f"Succeeded in running '{command}' which {description}")
87+
88+
89+
def setup_git(logger: logging.Logger) -> None:
90+
"""Sets up the 'git' user with appropriate shell settings, .ssh directory, and git-shell as login shell."""
91+
handle_command(
92+
'sudo adduser --disabled-password --gecos "" git', "adds git user", logger
93+
)
94+
95+
# Get git user's home directory dynamically
96+
git_home_command = "getent passwd git | cut -d: -f6"
97+
stdout, stderr, exit_code = run_command(git_home_command)
98+
if exit_code != 0:
99+
raise RuntimeError(f"Error getting git user's home directory: {stderr}")
100+
git_home = stdout.strip() # Extract and trim the home directory
101+
102+
# Commands to be executed
103+
commands = [
104+
(f"sudo chmod 755 {git_home}", "make home of git viewable by others"),
105+
(
106+
f"sudo sh -c 'mkdir -p {git_home}/.ssh && chmod 755 {git_home}/.ssh && touch {git_home}/.ssh/authorized_keys && chmod 666 {git_home}/.ssh/authorized_keys'",
107+
"sets up .ssh directory for git",
108+
),
109+
("sudo touch /etc/shells", "creates /etc/shells if it doesn't exist yet"),
110+
("cat /etc/shells", "views available shells"),
111+
(
112+
"sudo sh -c 'which git-shell >> /etc/shells'",
113+
"adds git-shell to /etc/shells",
114+
),
115+
(
116+
"sudo chsh git -s $(which git-shell)",
117+
"changes shell for git user to git-shell",
118+
),
119+
]
120+
121+
# Execute each command
122+
for command, description in commands:
123+
handle_command(command, description, logger)
124+
125+
126+
def is_safe_directory_added(safe_directory: str) -> bool:
127+
# Run command to get all safe directories
128+
command = "sudo git config --system --get-all safe.directory"
129+
stdout, stderr, exit_code = run_command(command)
130+
131+
# Check if the directory is listed
132+
if exit_code == 0 and safe_directory in stdout.splitlines():
133+
return True
134+
else:
135+
return False
136+
137+
138+
def add_safe_directory(safe_directory: str, logger: logging.Logger) -> None:
139+
safe_directory = os.path.join(safe_directory, ".git")
140+
# Check if the directory is already added
141+
if not is_safe_directory_added(safe_directory):
142+
# Command to add the directory to safe.directory
143+
command = f"sudo git config --system --add safe.directory {safe_directory}"
144+
stdout, stderr, exit_code = run_command(command)
145+
146+
if exit_code == 0:
147+
logger.info(f"Directory '{safe_directory}' added to safe.directory.")
148+
else:
149+
logger.error(f"Error adding directory: {stderr}")
150+
else:
151+
logger.info(f"Directory '{safe_directory}' is already in the list.")
63152

64153

65154
def get_hash_string(input_string: str) -> str:

0 commit comments

Comments
 (0)