Zeeeepa · Zeeeepa · Feb 19, 2025 · korbit-ai · Apr 22, 2025 · korbit-ai
diff --git a/pyproject.toml b/pyproject.toml
@@ -72,6 +72,7 @@ dependencies = [
   "neo4j",
   "modal>=0.73.45",
   "slack-sdk",
+  "datasets",
 ]
 
 license = { text = "Apache-2.0" }

diff --git a/src/codegen/extensions/swe_bench/__init__.py b/src/codegen/extensions/swe_bench/__init__.py
diff --git a/src/codegen/extensions/swe_bench/swe_bench_wrapper.py b/src/codegen/extensions/swe_bench/swe_bench_wrapper.py
@@ -0,0 +1,80 @@
+import shutil
+from collections.abc import Generator
+from typing import Any
+
+from datasets import load_dataset
+
+from codegen.extensions.swe_bench.utils import NO_ENV_SETUP, SWEBenchEntry, SWEBenchEnvSetup, SWEBenchSplit, construct_codebase
+from codegen.sdk.core.codebase import Codebase
+
+
+class SWEBenchWrapper:
+    def __init__(self, remove_after_run: bool = False):
+        print("Loading SWE-bench dataset...")
+        self.ds = load_dataset("princeton-nlp/SWE-bench")
+        print("SWE-bench dataset loaded.")
+        self.remove_after_run = remove_after_run
+        self.repo_groups = self.create_repo_groups()
+
+    def create_repo_groups(self) -> dict:
+        # Create a list of all possible splits
+        SPLITS: list[SWEBenchSplit] = ["train", "dev", "test"]
+
+        # Create a nested dictionary with explicit type hints
+        repo_groups: dict[SWEBenchSplit, dict[str, dict[str, list[Any]]]] = {}
+
+        # Group entries from all splits
+        for split in SPLITS:
+            repo_groups[split] = {}
+            for entry in self.ds[split]:
+                repo = entry["repo"]
+                environment_setup_commit = entry["environment_setup_commit"]
+
+                # Initialize nested dictionaries if they don't exist
+                if repo not in repo_groups[split]:
+                    repo_groups[split][repo] = {}
+                if environment_setup_commit not in repo_groups[split][repo]:
+                    repo_groups[split][repo][environment_setup_commit] = []
+
+                repo_groups[split][repo][environment_setup_commit].append(entry)
+
+        return repo_groups
+
+    def get_entries_for_split(self, split: SWEBenchSplit) -> Generator[tuple[SWEBenchEnvSetup | SWEBenchEntry, Codebase], None, None]:
+        # ===== [ For each repo in the split ] =====
+        for repo in self.repo_groups[split]:
+            # construct the codebase for the repo
+            codebase = construct_codebase(repo_full_name=repo)
+            # ===== [ For each environment setup commit ] =====
+            for environment_setup_commit in self.repo_groups[split][repo]:
+                # yield the environment setup commit
+                if environment_setup_commit:
+                    #  no need to parse the codebase on the environment commit
+                    codebase.checkout(commit=environment_setup_commit, remote=True)
+                    yield SWEBenchEnvSetup(split=split, environment_setup_commit=environment_setup_commit), codebase
+                else:
+                    yield SWEBenchEnvSetup(split=split, environment_setup_commit=NO_ENV_SETUP), codebase
+                # ===== [ For each test setup commit ] =====
+                for entry in self.repo_groups[split][repo][environment_setup_commit]:
+                    codebase.checkout(commit=entry["base_commit"], remote=True)
+                    # yield the test entry with a parsed codebase object
+                    yield SWEBenchEntry(entry=entry, split=split), codebase
+
+        if codebase and self.remove_after_run:
+            # remove the repo from the tmp_dir
+            shutil.rmtree(f"/tmp/codegen/{repo}")
+
+
+if __name__ == "__main__":
+    swe_bench_wrapper = SWEBenchWrapper()
+    for entry, codebase in swe_bench_wrapper.get_entries_for_split("train"):
+        if isinstance(entry, SWEBenchEnvSetup):
+            print(f"Environment setup commit: {entry.environment_setup_commit}")
+            # install dependencies...
+        elif isinstance(entry, SWEBenchEntry):
+            print(f"Entry: {entry.entry['instance_id']}")
+            problem_statement = entry.entry["problem_statement"]
+            print(f"Task: {problem_statement[:20]}")
+            # send of agent to solve tasks....
+
+        print(f"Number of files: {len(codebase.files)}")
diff --git a/src/codegen/extensions/swe_bench/utils.py b/src/codegen/extensions/swe_bench/utils.py
@@ -0,0 +1,42 @@
+from typing import Literal
+
+from pydantic import BaseModel
+
+from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
+from codegen.git.schemas.repo_config import RepoConfig
+from codegen.sdk.codebase.config import ProjectConfig
+from codegen.sdk.core.codebase import Codebase, PyCodebaseType
+
+# Define the SWEBenchSplit type using Literal
+SWEBenchSplit = Literal["train", "dev", "test"]
+NO_ENV_SETUP = "NO_ENV_SETUP"
+
+
+class SWEBenchEnvSetup(BaseModel):
+    split: SWEBenchSplit
+    environment_setup_commit: str = NO_ENV_SETUP
+
+
+class SWEBenchEntry(BaseModel):
+    split: SWEBenchSplit
+    entry: dict
+
+
+def construct_codebase(repo_full_name: str) -> PyCodebaseType:
+    repo_name = repo_full_name.split("/")[-1]
+    repo_config = RepoConfig(name=repo_name, full_name=repo_full_name, base_dir="/tmp/codegen")
+
+    # clone or pull the repo
+    print(f"Cloning or pulling {repo_full_name}...")
+    remote_operator = RemoteRepoOperator(repo_config=repo_config, bot_commit=False)
+    print(f"Cloned or pulled {repo_full_name}.")
+
+    # create the project config
+    projects = [ProjectConfig(repo_operator=remote_operator, base_path=None, subdirectories=None)]
+
+    # parse the codebase
+    print("Parsing codebase...")
+    codebase = Codebase(projects=projects)
+    print("Codebase parsed.")
+
+    return codebase
diff --git a/src/codegen/extensions/swebench/README.md b/src/codegen/extensions/swebench/README.md
@@ -0,0 +1,29 @@
+## Codegen Harness and Evaluator for SWE Bennch Development Tool
-## Codegen Harness and Evaluator for SWE Bennch Development Tool
+## Codegen Harness and Evaluator for SWE Bench Development Tool
-## Codegen Harness and Evaluator for SWE Bennch Development Tool
+## Codegen Harness and Evaluator for SWE Bench Development Tool
+
+This folder contains a harness and evaluator for the SWE Bench leaderboard, and enables developers to test and evaluate their codegen models on the SWE Bench leaderboard.
+
+It integrates directly into the Codegen agentic framework and can be built on top of.
+
+### Setup
+
+Remember to install all the dependencies for the environment.
+
+### Usage
+
+#### Edit agent.py, your codegen agent
+
+This file contains the main logic for the agent.
+
+The agent taps into the tree sitter using codegen. You can modify this by adding additional tools, extending its capabilities, prompts, and more.
+
+It is invoked in the harness script.
+
+#### Run harness.py to run the agent
+
+This script will gather the correct dataset, run the agent, and save the results.
+
+#### Run report.py to generate a report
+
+This script will generate a report from the results. It will loop through all the results and generate a report to evaluate each. Currently, there is an error in the docker image. 
+
+There are currently example predictions in the `predictions/results` folder.
diff --git a/src/codegen/extensions/swebench/agent.py b/src/codegen/extensions/swebench/agent.py
@@ -0,0 +1,129 @@
+from langchain_openai import ChatOpenAI
+from codegen import Codebase
+
+"""Demo implementation of an agent with Codegen tools."""
+
+from langchain.agents import AgentExecutor
+from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
+from langchain.hub import pull
+from langchain.tools import BaseTool
+from langchain_core.chat_history import InMemoryChatMessageHistory
+from langchain_core.messages import BaseMessage
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from langchain_openai import ChatOpenAI
+
+from codegen import Codebase
+
+from codegen.extensions.langchain.tools import (
+    CommitTool,
+    CreateFileTool,
+    DeleteFileTool,
+    EditFileTool,
+    GithubCreatePRCommentTool,
+    GithubCreatePRReviewCommentTool,
+    GithubCreatePRTool,
+    GithubViewPRTool,
+    ListDirectoryTool,
+    MoveSymbolTool,
+    RenameFileTool,
+    RevealSymbolTool,
+    SearchTool,
+    SemanticEditTool,
+    SemanticSearchTool,
+    ViewFileTool,
+)
+
+
+def create_codebase_agent(
+    codebase: Codebase,
+    model_name: str = "gpt-4o",
+    temperature: float = 0,
+    verbose: bool = True,
+    chat_history: list[BaseMessage] = [],
+) -> RunnableWithMessageHistory:
+    """Create an agent with all codebase tools.
+
+    Args:
+        codebase: The codebase to operate on
+        model_name: Name of the model to use (default: gpt-4)
+        temperature: Model temperature (default: 0)
+        verbose: Whether to print agent's thought process (default: True)
+
+    Returns:
+        Initialized agent with message history
+    """
+    # Initialize language model
+    llm = ChatOpenAI(
+        model_name=model_name,
+        temperature=temperature,
+    )
+
+    # Get all codebase tools
+    tools = [
+        ViewFileTool(codebase),
+        ListDirectoryTool(codebase),
+        SearchTool(codebase),
+        EditFileTool(codebase),
+        CreateFileTool(codebase),
+        DeleteFileTool(codebase),
+        RenameFileTool(codebase),
+        MoveSymbolTool(codebase),
+        # RevealSymbolTool(codebase),
+        SemanticEditTool(codebase),
+        SemanticSearchTool(codebase),
+        CommitTool(codebase),
+        GithubCreatePRTool(codebase),
+        GithubViewPRTool(codebase),
+        GithubCreatePRCommentTool(codebase),
+        GithubCreatePRReviewCommentTool(codebase),
+    ]
+
+    # Get the prompt to use
+    prompt = pull("hwchase17/openai-functions-agent")
+
+    # Create the agent
+    agent = OpenAIFunctionsAgent(
+        llm=llm,
+        tools=tools,
+        prompt=prompt,
+    )
+
+    # Create the agent executor
+    agent_executor = AgentExecutor(
+        agent=agent,
+        tools=tools,
+        verbose=verbose,
+    )
+
+    # Create message history handler
+    message_history = InMemoryChatMessageHistory(messages=chat_history)
+
+    # Wrap with message history
+    return RunnableWithMessageHistory(
+        agent_executor,
+        lambda session_id: message_history,
+        input_messages_key="input",
+        history_messages_key="chat_history",
+    )
+
+
+# Initialize codebase
+codebase = Codebase.from_repo("fastapi/fastapi")
+
+# Create the agent with GPT-4
+agent = create_codebase_agent(
+    codebase=codebase,
+    model_name="gpt-4o",
+    temperature=0,
+    verbose=True
+)
+
+
+
+# Analyze dependencies
+result = agent.invoke(
+    {"input": "What are the dependencies of the FastAPI class?"},
+    config={"configurable": {"session_id": "demo"}}
+)
+print(result["output"])
+