update

nanjiangwill · nanjiangwill · commit 1439727a1e72 · 2024-10-30T13:47:45.000-04:00
diff --git a/agent/README.md b/agent/README.md
@@ -38,6 +38,8 @@ Available options include:
 `--max-lint-info-length: int`: Maximum length of the lint information to use. [Default: `10000`]
 `--pre-commit-config-path: str`: Path to the pre-commit config file. This is needed for running `lint`. [Default: `.pre-commit-config.yaml`]
 `--agent-config-file: str`: Path to write the agent config. [Default: `.agent.yaml`]
+`--add-import-module-to-context: bool`: Add import module to context. [Default: `False`]
+`--record-test-for-each-commit: bool`: Record test results for each commit. [Default: `False`], if set to `True`, the test results will be saved in `experiment_log_dir/eval_results.json`
 
 ## Running Agent
 Use `agent run [OPTIONS] BRANCH` to execute an agent on a specific branch.
diff --git a/agent/run_agent.py b/agent/run_agent.py
@@ -12,6 +12,7 @@
     get_lint_cmd,
     read_yaml_config,
 )
+import json
 import subprocess
 from agent.agents import AiderAgents
 from typing import Optional, Type, cast
@@ -46,20 +47,18 @@ def __exit__(
 
 
 def run_eval_after_each_commit(
-    branch: str, backend: str, commit0_config_file: str, file_log_dir: Path
-) -> None:
+    branch: str, backend: str, commit0_config_file: str
+) -> str:
     """Run the eval command after each commit."""
     eval_cmd = f"python -m commit0 evaluate --branch {branch} --backend {backend} --commit0-config-file {commit0_config_file} --timeout 100"
     try:
         result = subprocess.run(
             eval_cmd, shell=True, capture_output=True, text=True, check=True
         )
-        with open(file_log_dir / "current_commit_eval_result.txt", "w") as f:
-            f.write(result.stdout)
+        return result.stdout
     except subprocess.CalledProcessError as e:
         print(f"Error running eval command: {e}")
-        with open(file_log_dir / "current_commit_eval_result.txt", "w") as f:
-            f.write(e.stdout if e.stdout else str(e))
+        return e.stdout if e.stdout else str(e)
 
 
 def run_agent_for_repo(
@@ -147,6 +146,7 @@ def run_agent_for_repo(
     )
     experiment_log_dir.mkdir(parents=True, exist_ok=True)
 
+    eval_results = {}
     # write agent_config to .agent.yaml in the log_dir for record
     agent_config_log_file = experiment_log_dir / ".agent.yaml"
     with open(agent_config_log_file, "w") as agent_config_file:
@@ -179,8 +179,9 @@ def run_agent_for_repo(
                     test_first=True,
                 )
                 if agent_config.record_test_for_each_commit:
-                    run_eval_after_each_commit(
-                        branch, backend, commit0_config_file, test_log_dir
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
                     )
 
                 # after running the agent, update the money display
@@ -211,8 +212,9 @@ def run_agent_for_repo(
                     lint_first=True,
                 )
                 if agent_config.record_test_for_each_commit:
-                    run_eval_after_each_commit(
-                        branch, backend, commit0_config_file, lint_log_dir
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
                     )
 
                 # after running the agent, update the money display
@@ -239,8 +241,9 @@ def run_agent_for_repo(
                 )
                 agent_return = agent.run(message, "", lint_cmd, [f], file_log_dir)
                 if agent_config.record_test_for_each_commit:
-                    run_eval_after_each_commit(
-                        branch, backend, commit0_config_file, file_log_dir
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
                     )
 
                 update_queue.put(
@@ -249,6 +252,10 @@ def run_agent_for_repo(
                         (repo_name, file_name, agent_return.last_cost),
                     )
                 )
+    if agent_config.record_test_for_each_commit:
+        with open(experiment_log_dir / "eval_results.json", "w") as f:
+            json.dump(eval_results, f)
+
     update_queue.put(("finish_repo", repo_name))
 
 
diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
@@ -14,6 +14,7 @@
     read_yaml_config,
 )
 import subprocess
+import json
 from agent.agents import AiderAgents
 from typing import cast
 from agent.class_types import AgentConfig
@@ -106,6 +107,7 @@ def run_agent_for_repo(
         / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     )
     experiment_log_dir.mkdir(parents=True, exist_ok=True)
+    eval_results = {}
 
     # write agent_config to .agent.yaml in the log_dir for record
     agent_config_log_file = experiment_log_dir / ".agent.yaml"
@@ -137,8 +139,9 @@ def run_agent_for_repo(
                     test_first=True,
                 )
                 if agent_config.record_test_for_each_commit:
-                    run_eval_after_each_commit(
-                        branch, backend, commit0_config_file, test_log_dir
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
                     )
         elif agent_config.run_entire_dir_lint:
             # when unit test feedback is available, iterate over test files
@@ -159,8 +162,9 @@ def run_agent_for_repo(
                     lint_first=True,
                 )
                 if agent_config.record_test_for_each_commit:
-                    run_eval_after_each_commit(
-                        branch, backend, commit0_config_file, lint_log_dir
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
                     )
         else:
             # when unit test feedback is not available, iterate over target files to edit
@@ -177,9 +181,13 @@ def run_agent_for_repo(
                 )
                 _ = agent.run(message, "", lint_cmd, [f], file_log_dir)
                 if agent_config.record_test_for_each_commit:
-                    run_eval_after_each_commit(
-                        branch, backend, commit0_config_file, file_log_dir
+                    current_commit = local_repo.head.commit.hexsha
+                    eval_results[current_commit] = run_eval_after_each_commit(
+                        branch, backend, commit0_config_file
                     )
+    if agent_config.record_test_for_each_commit:
+        with open(experiment_log_dir / "eval_results.json", "w") as f:
+            json.dump(eval_results, f)
 
 
 def run_agent(