Merge pull request #32 from commit-0/aider_reorg

wenting-zhao · web-flow · commit fb742535d3fd · 2024-09-17T12:10:40.000-04:00
fix aider and commit0 compatibility issues
diff --git a/commit0/__main__.py b/commit0/__main__.py
@@ -65,15 +65,16 @@ def main() -> None:
         repo = sys.argv[2]
         commit0.harness.get_pytest_ids.main(repo, stdout=True)
     elif command == "test" or command == "test-reference":
-        repo = sys.argv[2]
+        # this command assume execution in arbitrary working directory
+        repo_or_repo_path = sys.argv[2]
         test_ids = sys.argv[3]
         if command == "test-reference":
             config.branch = "reference"
         commit0.harness.run_pytest_ids.main(
             config.dataset_name,
             config.dataset_split,
             config.base_dir,
-            repo,
+            repo_or_repo_path,
             config.branch,
             test_ids,
             config.backend,
diff --git a/commit0/harness/evaluate.py b/commit0/harness/evaluate.py
@@ -1,6 +1,5 @@
 import logging
 import os
-import traceback
 from collections import Counter
 
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -10,7 +9,8 @@
 
 from commit0.harness.run_pytest_ids import main as run_tests
 from commit0.harness.get_pytest_ids import main as get_tests
-from commit0.harness.constants import RepoInstance, SPLIT
+from commit0.harness.constants import RepoInstance, SPLIT, RUN_PYTEST_LOG_DIR
+from commit0.harness.utils import get_hash_string
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
@@ -32,13 +32,16 @@ def main(
     dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split)  # type: ignore
     repos = SPLIT[repo_split]
     pairs = []
+    log_dirs = []
     for example in dataset:
         repo_name = example["repo"].split("/")[-1]
         if repo_split != "all" and repo_name not in SPLIT[repo_split]:
             continue
         pairs.append((repo_name, example["test"]["test_dir"]))
+        hashed_test_ids = get_hash_string(example["test"]["test_dir"])
+        log_dir = RUN_PYTEST_LOG_DIR / repo_name / branch / hashed_test_ids
+        log_dirs.append(str(log_dir))
 
-    log_dirs = []
     with tqdm(total=len(repos), smoothing=0, desc="Evaluating repos") as pbar:
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             # Create a future for running each instance
@@ -61,13 +64,6 @@ def main(
             # Wait for each future to complete
             for future in as_completed(futures):
                 pbar.update(1)
-                try:
-                    # Update progress bar, check if instance ran successfully
-                    result = future.result()
-                    log_dirs.append(result)
-                except Exception:
-                    traceback.print_exc()
-                    continue
 
     # get numbers
     out = []
diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py
@@ -1,9 +1,11 @@
-from datasets import load_dataset
+import git
+import os
+import sys
 import traceback
+from datasets import load_dataset
 from pathlib import Path
 
 from typing import Iterator
-from git import Repo
 from commit0.harness.constants import (
     EVAL_BACKENDS,
     Files,
@@ -31,14 +33,14 @@ def main(
     dataset_name: str,
     dataset_split: str,
     base_dir: str,
-    repo: str,
+    repo_or_repo_dir: str,
     branch: str,
     test_ids: str,
     backend: str,
     timeout: int,
     num_cpus: int,
     stdout: bool,
-) -> str:
+) -> None:
     """Runs the pytests for repos in a dataset.
 
     Tests are run either locally through docker
@@ -47,21 +49,36 @@ def main(
     dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split)  # type: ignore
     spec = None
     example = None
+    repo_name = None
     for example in dataset:
-        if example["repo"].endswith(repo):
+        repo_name = example["repo"].split("/")[-1]
+        if repo_name in os.path.basename(repo_or_repo_dir):
             spec = make_spec(example)
             break
     assert spec is not None, "No spec available"
     assert example is not None, "No example available"
+    assert repo_name is not None, "No repo available"
 
     hashed_test_ids = get_hash_string(test_ids)
     # set up logging
-    log_dir = RUN_PYTEST_LOG_DIR / repo / branch / hashed_test_ids
+    log_dir = RUN_PYTEST_LOG_DIR / repo_name / branch / hashed_test_ids
     log_dir.mkdir(parents=True, exist_ok=True)
     log_file = log_dir / "run_pytest.log"
-    logger = setup_logger(repo, log_file)
+    logger = setup_logger(repo_name, log_file)
 
-    local_repo = Repo(f"{base_dir}/{repo}")
+    try:
+        local_repo = git.Repo(repo_or_repo_dir)
+    except git.exc.NoSuchPathError:  # type: ignore
+        repo_dir = os.path.join(base_dir, repo_name)
+        logger.error(f"{repo_or_repo_dir} is not a git dir, trying {repo_dir} again")
+        try:
+            local_repo = git.Repo(repo_dir)
+        except git.exc.NoSuchPathError:  # type: ignore
+            raise Exception(
+                f"{repo_dir} and {repo_or_repo_dir} are not git directories.\nUsage: commit0 test {{repo_dir}} {test_ids}"
+            )
+        except Exception as e:
+            raise e
     if branch == "reference":
         commit_id = example["reference_commit"]
     else:
@@ -106,21 +123,28 @@ def main(
             context.write_test_output(test_output, timed_out)
             if stdout:
                 print(test_output)
+            pytest_exit_code = extract_test_output(output, "echo ")
+            try:
+                pytest_exit_code = int(pytest_exit_code)
+            except Exception:
+                raise Exception(
+                    f"Fail to convert pytest_exit_code {pytest_exit_code} into an integer."
+                )
+        sys.exit(pytest_exit_code)
     except EvaluationError as e:
         error_msg = (
-            f"Error in running pytest for {repo}: {e}\n"
+            f"Error in running pytest for {repo_name}: {e}\n"
             f"{traceback.format_exc()}\n"
             f"Check ({log_file}) for more information."
         )
-        raise EvaluationError(repo, error_msg, logger)
+        raise EvaluationError(repo_name, error_msg, logger)
     except Exception as e:
         error_msg = (
             f"General error: {e}\n"
             f"{traceback.format_exc()}\n"
             f"Check ({log_file}) for more information."
         )
         raise RuntimeError(error_msg)
-    return str(log_dir)
 
 
 __all__ = []
diff --git a/commit0/harness/spec.py b/commit0/harness/spec.py
@@ -154,6 +154,7 @@ def make_eval_script_list(instance: RepoInstance, repo_directory: str) -> list[s
         "git apply --allow-empty -v /patch.diff",
         "git status",
         f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json {{test_ids}}",
+        "echo $?",
         f"git reset --hard {instance['base_commit']}",
         "git status",
     ]
diff --git a/commit0/harness/utils.py b/commit0/harness/utils.py
@@ -44,7 +44,9 @@ def extract_test_output(ss: str, pattern: str) -> str:
             append = True
         # the next command started here, so we finished reading test output
         elif append and one.startswith("+"):
-            return "\n".join(out)
+            # remove the first element "+ {command}"
+            out = out[1:]
+            return "\n".join(out).strip()
         if append:
             out.append(one)
     return ""

Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,7 @@ def make_eval_script_list(instance: RepoInstance, repo_directory: str) -> list[s`
`154`	`154`	`"git apply --allow-empty -v /patch.diff",`
`155`	`155`	`"git status",`
`156`	`156`	`f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json {{test_ids}}",`
	`157`	`+ "echo $?",`
`157`	`158`	`f"git reset --hard {instance['base_commit']}",`
`158`	`159`	`"git status",`
`159`	`160`	`]`