Merge pull request #39 from commit-0/coverage

wenting-zhao · web-flow · commit 68cfc3a6eb15 · 2024-09-19T22:27:17.000-04:00
Coverage
diff --git a/commit0/harness/docker_build.py b/commit0/harness/docker_build.py
@@ -7,13 +7,13 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from typing import Any
-import sys
 
 from commit0.harness.constants import (
     BASE_IMAGE_BUILD_DIR,
     REPO_IMAGE_BUILD_DIR,
 )
 from commit0.harness.spec import get_specs_from_dataset
+from commit0.harness.utils import setup_logger, close_logger
 
 ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
 
@@ -33,32 +33,6 @@ def __str__(self):
         )
 
 
-def setup_logger(repo: str, log_file: Path, mode: str = "w") -> logging.Logger:
-    """Used for logging the build process of images and running containers.
-    It writes logs to the log file.
-    """
-    log_file.parent.mkdir(parents=True, exist_ok=True)
-    logger = logging.getLogger(f"{repo}.{log_file.name}")
-    handler = logging.FileHandler(log_file, mode=mode)
-    stdout_handler = logging.StreamHandler(sys.stdout)
-    logger.addHandler(stdout_handler)
-    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    logger.setLevel(logging.INFO)
-    logger.propagate = False
-    setattr(logger, "log_file", log_file)
-    return logger
-
-
-def close_logger(logger: logging.Logger) -> None:
-    """Closes all handlers associated with the given logger to prevent too many open files."""
-    # To avoid too many open files
-    for handler in logger.handlers:
-        handler.close()
-        logger.removeHandler(handler)
-
-
 def build_image(
     image_name: str,
     setup_scripts: dict,
diff --git a/commit0/harness/execution_context.py b/commit0/harness/execution_context.py
@@ -17,9 +17,6 @@
 
 from commit0.harness.constants import Files
 from commit0.harness.spec import Spec
-from commit0.harness.utils import (
-    EvaluationError,
-)
 from commit0.harness.docker_build import (
     close_logger,
 )
@@ -32,14 +29,6 @@
 )
 
 
-def read_stream(stream: modal.io_streams.StreamReader) -> str:
-    """Read stream"""
-    strings = []
-    for line in stream:
-        strings.append(line)
-    return "\n".join(strings)
-
-
 class ExecutionBackend(StrEnum):
     LOCAL = auto()
     MODAL = auto()
@@ -54,6 +43,7 @@ def __init__(
         num_cpus: int,
         log_dir: Path,
         files_to_copy: Optional[Files] = None,
+        files_to_collect: Optional[list[str]] = None,
     ):
         """Create the remote execution context
 
@@ -65,25 +55,13 @@ def __init__(
         self.timeout = timeout
         self.num_cpus = num_cpus
         self.log_dir = log_dir
+        self.files_to_collect = files_to_collect
 
     @abstractmethod
     def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
         """Execute a test command"""
         raise NotImplementedError
 
-    def write_test_output(self, test_output: str, timed_out: bool) -> None:
-        """Write test output"""
-        test_output_path = self.log_dir / "test_output.txt"
-        with open(test_output_path, "w") as f:
-            f.write(test_output)
-            if timed_out:
-                f.write(f"\n\nTimeout error: {self.timeout} seconds exceeded.")
-                raise EvaluationError(
-                    self.spec.repo,
-                    f"Test timed out after {self.timeout} seconds.",
-                    self.logger,
-                )
-
     def __enter__(self):
         return self
 
@@ -106,8 +84,17 @@ def __init__(
         num_cpus: int,
         log_dir: Path,
         files_to_copy: Optional[Files] = None,
+        files_to_collect: Optional[list[str]] = None,
     ):
-        super().__init__(spec, logger, timeout, num_cpus, log_dir)
+        super().__init__(
+            spec,
+            logger,
+            timeout,
+            num_cpus,
+            log_dir,
+            files_to_copy=files_to_copy,
+            files_to_collect=files_to_collect,
+        )
 
         self.client = docker.from_env()
         self.container = create_container(
@@ -126,17 +113,16 @@ def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
         """Exec"""
         output = exec_run_with_timeout(self.container, command, self.timeout)
 
-        # copy back report.json if there is any
-        report_file = Path(self.spec.repo_directory) / "report.json"
-        # Run the test command inside the container to check if the file exists
-        exit_code, test_output = self.container.exec_run(
-            f"test -e {report_file}", demux=True
-        )
-        # Check the exit code of the command
-        if exit_code == 0:
-            copy_from_container(
-                self.container, report_file, self.log_dir / "report.json"
-            )
+        if self.files_to_collect:
+            for fname in self.files_to_collect:
+                file = Path(self.spec.repo_directory) / fname
+                # Run the test command inside the container to check if the file exists
+                exit_code, test_output = self.container.exec_run(
+                    f"test -e {file}", demux=True
+                )
+                # Check the exit code of the command
+                if exit_code == 0:
+                    copy_from_container(self.container, file, self.log_dir / fname)
         return output
 
     def __exit__(
@@ -158,8 +144,17 @@ def __init__(
         num_cpus: int,
         log_dir: Path,
         files_to_copy: Optional[Files] = None,
+        files_to_collect: Optional[list[str]] = None,
     ):
-        super().__init__(spec, logger, timeout, num_cpus, log_dir)
+        super().__init__(
+            spec,
+            logger,
+            timeout,
+            num_cpus,
+            log_dir,
+            files_to_copy=files_to_copy,
+            files_to_collect=files_to_collect,
+        )
 
         self.app = modal.App()
 
@@ -176,13 +171,18 @@ def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
         """Execute command on modal sandbox"""
         start_time = time.time()
         with modal.Volume.ephemeral() as vol:
-            # copy back report.json if there is any
-            report_file = Path(self.spec.repo_directory) / "report.json"
-
+            cp_cmd = ""
+            if self.files_to_collect:
+                for fname in self.files_to_collect:
+                    remote_file = Path(self.spec.repo_directory) / fname
+                    curr_cp_cmd = f" && cp {str(remote_file)} /vol/{fname} 2>/dev/null"
+                    cp_cmd += curr_cp_cmd
+
+            command += cp_cmd
             self.sandbox = modal.Sandbox.create(
                 "bash",
                 "-c",
-                f"{command} && cp {str(report_file)} /vol/report.json",
+                command,
                 image=self.image,
                 cpu=self.num_cpus,
                 timeout=self.timeout,
@@ -191,26 +191,22 @@ def exec_run_with_timeout(self, command: str) -> tuple[str, bool, float]:
             )
             self.sandbox.wait()
 
-            # stdout has been redirected to stderr
-            stdout = read_stream(self.sandbox.stderr)
-
             return_code = self.sandbox.returncode
             # https://github.com/modal-labs/modal-client/blob/d577b2916b5c3bf4ebbcb58fadced84d85e1cf8c/modal/sandbox.py#L413
             if return_code == 124:
                 timed_out = True
             else:
                 timed_out = False
 
-            # copy over report.json from mount
-            with (self.log_dir / "report.json").open("wb") as f:
-                for data in vol.read_file("report.json"):
-                    f.write(data)
+            if self.files_to_collect:
+                for fname in self.files_to_collect:
+                    with (self.log_dir / fname).open("wb") as f:
+                        for data in vol.read_file(fname):
+                            f.write(data)
 
             self.sandbox.terminate()
-
             end_time = time.time()
-
-            return stdout, timed_out, end_time - start_time
+            return self.sandbox.stderr.read(), timed_out, end_time - start_time
 
     def __exit__(
         self,
diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py
@@ -12,15 +12,13 @@
     RUN_PYTEST_LOG_DIR,
     RepoInstance,
 )
-from commit0.harness.docker_build import (
-    setup_logger,
-)
 from commit0.harness.spec import make_spec
 from commit0.harness.utils import (
     EvaluationError,
-    extract_test_output,
     get_hash_string,
     generate_patch_between_commits,
+    setup_logger,
+    close_logger,
 )
 from commit0.harness.execution_context import (
     ExecutionBackend,
@@ -114,29 +112,29 @@ def main(
         eval_script={"src": eval_file, "dest": Path("/eval.sh")},
         patch={"src": patch_file, "dest": Path("/patch.diff")},
     )
+    files_to_collect = [
+        "report.json",
+        "coverage.json",
+        "pytest_exit_code.txt",
+        "test_output.txt",
+    ]
 
     try:
         with execution_context(
-            spec, logger, timeout, num_cpus, log_dir, files_to_copy
+            spec, logger, timeout, num_cpus, log_dir, files_to_copy, files_to_collect
         ) as context:
             output, timed_out, total_runtime = context.exec_run_with_timeout(
                 "/bin/bash /eval.sh"
             )
-            logger.info(output)
-            test_output = extract_test_output(
-                output, "--json-report --json-report-file=report.json"
-            )
-            context.write_test_output(test_output, timed_out)
-            if stdout:
-                print(test_output)
-            pytest_exit_code = extract_test_output(output, "echo ")
-            try:
-                pytest_exit_code = int(pytest_exit_code)
-            except Exception:
-                raise Exception(
-                    f"Fail to convert pytest_exit_code {pytest_exit_code} into an integer."
+            if timed_out:
+                raise EvaluationError(
+                    repo_name,
+                    f"Test timed out after {timeout} seconds.",
+                    logger,
                 )
-        sys.exit(pytest_exit_code)
+        close_logger(logger)
+        pytest_exit_code = Path(log_dir / "pytest_exit_code.txt").read_text().strip()
+        sys.exit(int(pytest_exit_code))
     except EvaluationError as e:
         error_msg = (
             f"Error in running pytest for {repo_name}: {e}\n"
diff --git a/commit0/harness/spec.py b/commit0/harness/spec.py
@@ -153,13 +153,11 @@ def make_eval_script_list(instance: RepoInstance, repo_directory: str) -> list[s
         f"git reset --hard {instance['base_commit']}",
         "git apply --allow-empty -v /patch.diff",
         "git status",
-        f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json --continue-on-collection-errors {{test_ids}}",
-        "echo $?",
+        f"{instance['test']['test_cmd']} --json-report --json-report-file=report.json --continue-on-collection-errors --cov=. --cov-branch --cov-report json {{test_ids}} > test_output.txt 2>&1",
+        "echo $? > pytest_exit_code.txt",
         f"git reset --hard {instance['base_commit']}",
         "git status",
     ]
-    for i in range(len(eval_script_list)):
-        eval_script_list[i] = f"{eval_script_list[i]} 1>&2"
     return eval_script_list
 
 
diff --git a/commit0/harness/utils.py b/commit0/harness/utils.py
@@ -4,6 +4,8 @@
 import logging
 import os
 import time
+import sys
+from pathlib import Path
 from typing import Optional
 
 from fastcore.net import HTTP404NotFoundError, HTTP403ForbiddenError  # type: ignore
@@ -25,6 +27,32 @@ def __str__(self):
         )
 
 
+def setup_logger(repo: str, log_file: Path, mode: str = "w") -> logging.Logger:
+    """Used for logging the build process of images and running containers.
+    It writes logs to the log file.
+    """
+    log_file.parent.mkdir(parents=True, exist_ok=True)
+    logger = logging.getLogger(f"{repo}.{log_file.name}")
+    handler = logging.FileHandler(log_file, mode=mode)
+    stdout_handler = logging.StreamHandler(sys.stdout)
+    logger.addHandler(stdout_handler)
+    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    logger.setLevel(logging.INFO)
+    logger.propagate = False
+    setattr(logger, "log_file", log_file)
+    return logger
+
+
+def close_logger(logger: logging.Logger) -> None:
+    """Closes all handlers associated with the given logger to prevent too many open files."""
+    # To avoid too many open files
+    for handler in logger.handlers:
+        handler.close()
+        logger.removeHandler(handler)
+
+
 def get_hash_string(input_string: str) -> str:
     # Create a new SHA-256 hash object
     sha256 = hashlib.sha256()