Skip to content

Commit f56edb5

Browse files
authored
fix: deep code research (#512)
1 parent b09df25 commit f56edb5

File tree

4 files changed

+283
-0
lines changed

4 files changed

+283
-0
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Deep Code Research Example
2+
3+
This example demonstrates how to use Codegen to build a CLI tool for deep code research. The tool allows you to:
4+
5+
- Clone and analyze any GitHub repository
6+
- Ask questions about the codebase
7+
- Explore dependencies and relationships
8+
- Search for patterns and implementations
9+
10+
## Setup
11+
12+
1. Install the requirements:
13+
14+
```bash
15+
uv venv
16+
source .venv/bin/activate
17+
uv sync
18+
```
19+
20+
2. Set up your OpenAI API key in a `.env`:
21+
22+
```bash
23+
OPENAI_API_KEY=your-api-key
24+
```
25+
26+
## Usage
27+
28+
Run the CLI tool by providing a GitHub repository:
29+
30+
```bash
31+
python run.py research "owner/repo"
32+
```
33+
34+
For example:
35+
36+
```bash
37+
python run.py research "fastapi/fastapi"
38+
```
39+
40+
You can also provide an initial query:
41+
42+
```bash
43+
python run.py research "fastapi/fastapi" -q "Explain the main components"
44+
```
45+
46+
## Example Queries
47+
48+
- "Explain the main components and their relationships"
49+
- "Find all usages of the FastAPI class"
50+
- "Show me the dependency graph for the routing module"
51+
- "What design patterns are used in this codebase?"
52+
- "How is dependency injection implemented?"
53+
54+
## Features
55+
56+
The research agent has access to several powerful tools:
57+
58+
- Semantic code search
59+
- Symbol relationship analysis
60+
- Directory structure exploration
61+
- Code viewing and analysis
62+
63+
The agent maintains conversation history, so you can ask follow-up questions and build on previous findings.
64+
65+
## Exit
66+
67+
Type "exit" or "quit" to end the research session.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
click>=8.0.0
2+
rich>=10.0.0
3+
rich-click>=1.7.0
4+
langchain-core>=0.1.0
5+
langchain-openai>=0.0.5
6+
langchain>=0.1.0
7+
codegen-sdk>=0.1.0
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""CLI program for deep code research using Codegen."""
2+
3+
import sys
4+
import warnings
5+
from pathlib import Path
6+
from typing import Optional
7+
8+
import rich_click as click
9+
from codegen import Codebase
10+
from codegen.extensions.langchain.agent import create_agent_with_tools
11+
from codegen.extensions.langchain.tools import (
12+
ListDirectoryTool,
13+
RevealSymbolTool,
14+
SearchTool,
15+
SemanticSearchTool,
16+
ViewFileTool,
17+
)
18+
from langchain_core.messages import SystemMessage
19+
from rich.console import Console
20+
from rich.markdown import Markdown
21+
from rich.prompt import Prompt
22+
23+
# Suppress LangSmith warning
24+
warnings.filterwarnings("ignore", message="API key must be provided when using hosted LangSmith API")
25+
26+
# Add the project root to Python path
27+
project_root = str(Path(__file__).parent.parent.parent)
28+
sys.path.append(project_root)
29+
30+
# Configure rich-click
31+
click.rich_click.USE_RICH_MARKUP = True
32+
click.rich_click.USE_MARKDOWN = True
33+
click.rich_click.SHOW_ARGUMENTS = True
34+
click.rich_click.GROUP_ARGUMENTS_OPTIONS = True
35+
click.rich_click.STYLE_ERRORS_SUGGESTION = "yellow italic"
36+
click.rich_click.ERRORS_SUGGESTION = "Try running the command with --help for more information"
37+
38+
console = Console()
39+
40+
RESEARCH_AGENT_PROMPT = """You are a code research expert. Your goal is to help users understand codebases by:
41+
1. Finding relevant code through semantic and text search
42+
2. Analyzing symbol relationships and dependencies
43+
3. Exploring directory structures
44+
4. Reading and explaining code
45+
46+
Always explain your findings in detail and provide context about how different parts of the code relate to each other.
47+
When analyzing code, consider:
48+
- The purpose and functionality of each component
49+
- How different parts interact
50+
- Key patterns and design decisions
51+
- Potential areas for improvement
52+
53+
Break down complex concepts into understandable pieces and use examples when helpful."""
54+
55+
56+
def initialize_codebase(repo_name: str) -> Optional[Codebase]:
57+
"""Initialize a codebase with a spinner showing progress."""
58+
with console.status("") as status:
59+
try:
60+
# Update status with specific steps
61+
status.update(f"[bold blue]Cloning {repo_name}...[/bold blue]")
62+
codebase = Codebase.from_repo(repo_name)
63+
status.update("[bold green]✓ Repository cloned successfully![/bold green]")
64+
return codebase
65+
except Exception as e:
66+
console.print(f"[bold red]Error initializing codebase:[/bold red] {e}")
67+
return None
68+
69+
70+
@click.group()
71+
def cli():
72+
"""[bold blue]🔍 Codegen Code Research CLI[/bold blue]
73+
74+
A powerful tool for deep code analysis and research.
75+
"""
76+
pass
77+
78+
79+
@cli.command()
80+
@click.argument("repo_name", required=False)
81+
@click.option("--query", "-q", default=None, help="Initial research query to start with.")
82+
def research(repo_name: Optional[str] = None, query: Optional[str] = None):
83+
"""[bold green]Start a code research session[/bold green]
84+
85+
[blue]Arguments:[/blue]
86+
[yellow]REPO_NAME[/yellow]: GitHub repository in format 'owner/repo' (optional, will prompt if not provided)
87+
"""
88+
# If no repo name provided, prompt for it
89+
if not repo_name:
90+
console.print("\n[bold]Welcome to the Code Research CLI![/bold]")
91+
console.print("\nEnter a GitHub repository to analyze (format: owner/repo)\nExamples:\n • fastapi/fastapi\n • pytorch/pytorch\n • microsoft/TypeScript")
92+
repo_name = Prompt.ask("\n[bold cyan]Repository name[/bold cyan]")
93+
94+
# Initialize codebase
95+
codebase = initialize_codebase(repo_name)
96+
if not codebase:
97+
return
98+
99+
# Create research tools
100+
tools = [
101+
ViewFileTool(codebase),
102+
ListDirectoryTool(codebase),
103+
SearchTool(codebase),
104+
SemanticSearchTool(codebase),
105+
RevealSymbolTool(codebase),
106+
]
107+
108+
# Initialize agent with research tools
109+
with console.status("[bold blue]Initializing research agent...[/bold blue]") as status:
110+
agent = create_agent_with_tools(codebase=codebase, tools=tools, chat_history=[SystemMessage(content=RESEARCH_AGENT_PROMPT)], verbose=True)
111+
status.update("[bold green]✓ Research agent ready![/bold green]")
112+
113+
# Get initial query if not provided
114+
if not query:
115+
console.print(
116+
"\n[bold]What would you like to research?[/bold]"
117+
"\n[dim]Example queries:[/dim]"
118+
"\n• [italic]Explain the main components and their relationships[/italic]"
119+
"\n• [italic]Find all usages of X function/class[/italic]"
120+
"\n• [italic]Show me the dependency graph for Y module[/italic]"
121+
"\n• [italic]What design patterns are used in this codebase?[/italic]"
122+
)
123+
query = Prompt.ask("\n[bold cyan]Research query[/bold cyan]")
124+
125+
# Main research loop
126+
while True:
127+
if not query:
128+
query = Prompt.ask("\n[bold cyan]Research query[/bold cyan]")
129+
130+
if query.lower() in ["exit", "quit"]:
131+
console.print("\n[bold green]Thanks for using the Code Research CLI! Goodbye![/bold green]")
132+
break
133+
134+
# Run the agent
135+
with console.status("[bold blue]Researching...[/bold blue]", spinner="dots") as status:
136+
try:
137+
result = agent.invoke(
138+
{"input": query},
139+
config={"configurable": {"session_id": "research"}},
140+
)
141+
# Display the result
142+
console.print("\n[bold blue]📊 Research Findings:[/bold blue]")
143+
console.print(Markdown(result["output"]))
144+
except Exception as e:
145+
console.print(f"\n[bold red]Error during research:[/bold red] {e}")
146+
147+
# Clear query for next iteration
148+
query = None
149+
150+
151+
if __name__ == "__main__":
152+
cli()

src/codegen/extensions/langchain/agent.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from langchain.agents import AgentExecutor
44
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
55
from langchain.hub import pull
6+
from langchain.tools import BaseTool
67
from langchain_core.chat_history import InMemoryChatMessageHistory
78
from langchain_core.messages import BaseMessage
89
from langchain_core.runnables.history import RunnableWithMessageHistory
@@ -163,3 +164,59 @@ def create_codebase_inspector_agent(
163164
input_messages_key="input",
164165
history_messages_key="chat_history",
165166
)
167+
168+
169+
def create_agent_with_tools(
170+
codebase: Codebase,
171+
tools: list[BaseTool],
172+
model_name: str = "gpt-4o",
173+
temperature: float = 0,
174+
verbose: bool = True,
175+
chat_history: list[BaseMessage] = [],
176+
) -> RunnableWithMessageHistory:
177+
"""Create an agent with a specific set of tools.
178+
179+
Args:
180+
codebase: The codebase to operate on
181+
tools: List of tools to provide to the agent
182+
model_name: Name of the model to use (default: gpt-4)
183+
temperature: Model temperature (default: 0)
184+
verbose: Whether to print agent's thought process (default: True)
185+
chat_history: Optional list of messages to initialize chat history with
186+
187+
Returns:
188+
Initialized agent with message history
189+
"""
190+
# Initialize language model
191+
llm = ChatOpenAI(
192+
model_name=model_name,
193+
temperature=temperature,
194+
)
195+
196+
# Get the prompt to use
197+
prompt = pull("hwchase17/openai-functions-agent")
198+
199+
# Create the agent
200+
agent = OpenAIFunctionsAgent(
201+
llm=llm,
202+
tools=tools,
203+
prompt=prompt,
204+
)
205+
206+
# Create the agent executor
207+
agent_executor = AgentExecutor(
208+
agent=agent,
209+
tools=tools,
210+
verbose=verbose,
211+
)
212+
213+
# Create message history handler
214+
message_history = InMemoryChatMessageHistory(messages=chat_history)
215+
216+
# Wrap with message history
217+
return RunnableWithMessageHistory(
218+
agent_executor,
219+
lambda session_id: message_history,
220+
input_messages_key="input",
221+
history_messages_key="chat_history",
222+
)

0 commit comments

Comments
 (0)