Skip to content

Commit f2d4627

Browse files
jemeza-codegenvictorxhengtomcodgen
authored
Swe bench treehacks codegen (#552)
# Motivation <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed --------- Co-authored-by: Victor Cheng <[email protected]> Co-authored-by: tomcodgen <[email protected]>
1 parent d8278f0 commit f2d4627

File tree

14 files changed

+1453
-0
lines changed

14 files changed

+1453
-0
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ dependencies = [
7272
"neo4j",
7373
"modal>=0.73.45",
7474
"slack-sdk",
75+
"datasets",
7576
]
7677

7778
license = { text = "Apache-2.0" }

src/codegen/extensions/swe_bench/__init__.py

Whitespace-only changes.
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import shutil
2+
from collections.abc import Generator
3+
from typing import Any
4+
5+
from datasets import load_dataset
6+
7+
from codegen.extensions.swe_bench.utils import NO_ENV_SETUP, SWEBenchEntry, SWEBenchEnvSetup, SWEBenchSplit, construct_codebase
8+
from codegen.sdk.core.codebase import Codebase
9+
10+
11+
class SWEBenchWrapper:
12+
def __init__(self, remove_after_run: bool = False):
13+
print("Loading SWE-bench dataset...")
14+
self.ds = load_dataset("princeton-nlp/SWE-bench")
15+
print("SWE-bench dataset loaded.")
16+
self.remove_after_run = remove_after_run
17+
self.repo_groups = self.create_repo_groups()
18+
19+
def create_repo_groups(self) -> dict:
20+
# Create a list of all possible splits
21+
SPLITS: list[SWEBenchSplit] = ["train", "dev", "test"]
22+
23+
# Create a nested dictionary with explicit type hints
24+
repo_groups: dict[SWEBenchSplit, dict[str, dict[str, list[Any]]]] = {}
25+
26+
# Group entries from all splits
27+
for split in SPLITS:
28+
repo_groups[split] = {}
29+
for entry in self.ds[split]:
30+
repo = entry["repo"]
31+
environment_setup_commit = entry["environment_setup_commit"]
32+
33+
# Initialize nested dictionaries if they don't exist
34+
if repo not in repo_groups[split]:
35+
repo_groups[split][repo] = {}
36+
if environment_setup_commit not in repo_groups[split][repo]:
37+
repo_groups[split][repo][environment_setup_commit] = []
38+
39+
repo_groups[split][repo][environment_setup_commit].append(entry)
40+
41+
return repo_groups
42+
43+
def get_entries_for_split(self, split: SWEBenchSplit) -> Generator[tuple[SWEBenchEnvSetup | SWEBenchEntry, Codebase], None, None]:
44+
# ===== [ For each repo in the split ] =====
45+
for repo in self.repo_groups[split]:
46+
# construct the codebase for the repo
47+
codebase = construct_codebase(repo_full_name=repo)
48+
# ===== [ For each environment setup commit ] =====
49+
for environment_setup_commit in self.repo_groups[split][repo]:
50+
# yield the environment setup commit
51+
if environment_setup_commit:
52+
# no need to parse the codebase on the environment commit
53+
codebase.checkout(commit=environment_setup_commit, remote=True)
54+
yield SWEBenchEnvSetup(split=split, environment_setup_commit=environment_setup_commit), codebase
55+
else:
56+
yield SWEBenchEnvSetup(split=split, environment_setup_commit=NO_ENV_SETUP), codebase
57+
# ===== [ For each test setup commit ] =====
58+
for entry in self.repo_groups[split][repo][environment_setup_commit]:
59+
codebase.checkout(commit=entry["base_commit"], remote=True)
60+
# yield the test entry with a parsed codebase object
61+
yield SWEBenchEntry(entry=entry, split=split), codebase
62+
63+
if codebase and self.remove_after_run:
64+
# remove the repo from the tmp_dir
65+
shutil.rmtree(f"/tmp/codegen/{repo}")
66+
67+
68+
if __name__ == "__main__":
69+
swe_bench_wrapper = SWEBenchWrapper()
70+
for entry, codebase in swe_bench_wrapper.get_entries_for_split("train"):
71+
if isinstance(entry, SWEBenchEnvSetup):
72+
print(f"Environment setup commit: {entry.environment_setup_commit}")
73+
# install dependencies...
74+
elif isinstance(entry, SWEBenchEntry):
75+
print(f"Entry: {entry.entry['instance_id']}")
76+
problem_statement = entry.entry["problem_statement"]
77+
print(f"Task: {problem_statement[:20]}")
78+
# send of agent to solve tasks....
79+
80+
print(f"Number of files: {len(codebase.files)}")
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from typing import Literal
2+
3+
from pydantic import BaseModel
4+
5+
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
6+
from codegen.git.schemas.repo_config import RepoConfig
7+
from codegen.sdk.codebase.config import ProjectConfig
8+
from codegen.sdk.core.codebase import Codebase, PyCodebaseType
9+
10+
# Define the SWEBenchSplit type using Literal
11+
SWEBenchSplit = Literal["train", "dev", "test"]
12+
NO_ENV_SETUP = "NO_ENV_SETUP"
13+
14+
15+
class SWEBenchEnvSetup(BaseModel):
16+
split: SWEBenchSplit
17+
environment_setup_commit: str = NO_ENV_SETUP
18+
19+
20+
class SWEBenchEntry(BaseModel):
21+
split: SWEBenchSplit
22+
entry: dict
23+
24+
25+
def construct_codebase(repo_full_name: str) -> PyCodebaseType:
26+
repo_name = repo_full_name.split("/")[-1]
27+
repo_config = RepoConfig(name=repo_name, full_name=repo_full_name, base_dir="/tmp/codegen")
28+
29+
# clone or pull the repo
30+
print(f"Cloning or pulling {repo_full_name}...")
31+
remote_operator = RemoteRepoOperator(repo_config=repo_config, bot_commit=False)
32+
print(f"Cloned or pulled {repo_full_name}.")
33+
34+
# create the project config
35+
projects = [ProjectConfig(repo_operator=remote_operator, base_path=None, subdirectories=None)]
36+
37+
# parse the codebase
38+
print("Parsing codebase...")
39+
codebase = Codebase(projects=projects)
40+
print("Codebase parsed.")
41+
42+
return codebase
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
## Codegen Harness and Evaluator for SWE Bennch Development Tool
2+
3+
This folder contains a harness and evaluator for the SWE Bench leaderboard, and enables developers to test and evaluate their codegen models on the SWE Bench leaderboard.
4+
5+
It integrates directly into the Codegen agentic framework and can be built on top of.
6+
7+
### Setup
8+
9+
Remember to install all the dependencies for the environment.
10+
11+
### Usage
12+
13+
#### Edit agent.py, your codegen agent
14+
15+
This file contains the main logic for the agent.
16+
17+
The agent taps into the tree sitter using codegen. You can modify this by adding additional tools, extending its capabilities, prompts, and more.
18+
19+
It is invoked in the harness script.
20+
21+
#### Run harness.py to run the agent
22+
23+
This script will gather the correct dataset, run the agent, and save the results.
24+
25+
#### Run report.py to generate a report
26+
27+
This script will generate a report from the results. It will loop through all the results and generate a report to evaluate each. Currently, there is an error in the docker image.
28+
29+
There are currently example predictions in the `predictions/results` folder.
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from langchain_openai import ChatOpenAI
2+
from codegen import Codebase
3+
4+
"""Demo implementation of an agent with Codegen tools."""
5+
6+
from langchain.agents import AgentExecutor
7+
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
8+
from langchain.hub import pull
9+
from langchain.tools import BaseTool
10+
from langchain_core.chat_history import InMemoryChatMessageHistory
11+
from langchain_core.messages import BaseMessage
12+
from langchain_core.runnables.history import RunnableWithMessageHistory
13+
from langchain_openai import ChatOpenAI
14+
15+
from codegen import Codebase
16+
17+
from codegen.extensions.langchain.tools import (
18+
CommitTool,
19+
CreateFileTool,
20+
DeleteFileTool,
21+
EditFileTool,
22+
GithubCreatePRCommentTool,
23+
GithubCreatePRReviewCommentTool,
24+
GithubCreatePRTool,
25+
GithubViewPRTool,
26+
ListDirectoryTool,
27+
MoveSymbolTool,
28+
RenameFileTool,
29+
RevealSymbolTool,
30+
SearchTool,
31+
SemanticEditTool,
32+
SemanticSearchTool,
33+
ViewFileTool,
34+
)
35+
36+
37+
def create_codebase_agent(
38+
codebase: Codebase,
39+
model_name: str = "gpt-4o",
40+
temperature: float = 0,
41+
verbose: bool = True,
42+
chat_history: list[BaseMessage] = [],
43+
) -> RunnableWithMessageHistory:
44+
"""Create an agent with all codebase tools.
45+
46+
Args:
47+
codebase: The codebase to operate on
48+
model_name: Name of the model to use (default: gpt-4)
49+
temperature: Model temperature (default: 0)
50+
verbose: Whether to print agent's thought process (default: True)
51+
52+
Returns:
53+
Initialized agent with message history
54+
"""
55+
# Initialize language model
56+
llm = ChatOpenAI(
57+
model_name=model_name,
58+
temperature=temperature,
59+
)
60+
61+
# Get all codebase tools
62+
tools = [
63+
ViewFileTool(codebase),
64+
ListDirectoryTool(codebase),
65+
SearchTool(codebase),
66+
EditFileTool(codebase),
67+
CreateFileTool(codebase),
68+
DeleteFileTool(codebase),
69+
RenameFileTool(codebase),
70+
MoveSymbolTool(codebase),
71+
# RevealSymbolTool(codebase),
72+
SemanticEditTool(codebase),
73+
SemanticSearchTool(codebase),
74+
CommitTool(codebase),
75+
GithubCreatePRTool(codebase),
76+
GithubViewPRTool(codebase),
77+
GithubCreatePRCommentTool(codebase),
78+
GithubCreatePRReviewCommentTool(codebase),
79+
]
80+
81+
# Get the prompt to use
82+
prompt = pull("hwchase17/openai-functions-agent")
83+
84+
# Create the agent
85+
agent = OpenAIFunctionsAgent(
86+
llm=llm,
87+
tools=tools,
88+
prompt=prompt,
89+
)
90+
91+
# Create the agent executor
92+
agent_executor = AgentExecutor(
93+
agent=agent,
94+
tools=tools,
95+
verbose=verbose,
96+
)
97+
98+
# Create message history handler
99+
message_history = InMemoryChatMessageHistory(messages=chat_history)
100+
101+
# Wrap with message history
102+
return RunnableWithMessageHistory(
103+
agent_executor,
104+
lambda session_id: message_history,
105+
input_messages_key="input",
106+
history_messages_key="chat_history",
107+
)
108+
109+
110+
# Initialize codebase
111+
codebase = Codebase.from_repo("fastapi/fastapi")
112+
113+
# Create the agent with GPT-4
114+
agent = create_codebase_agent(
115+
codebase=codebase,
116+
model_name="gpt-4o",
117+
temperature=0,
118+
verbose=True
119+
)
120+
121+
122+
123+
# Analyze dependencies
124+
result = agent.invoke(
125+
{"input": "What are the dependencies of the FastAPI class?"},
126+
config={"configurable": {"session_id": "demo"}}
127+
)
128+
print(result["output"])
129+

0 commit comments

Comments
 (0)