Skip to content

Reflection step for agent + claude 3.7 + extender recursion #689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/codegen/agents/code_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class CodeAgent:
"""Agent for interacting with a codebase."""

def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-5-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs):
def __init__(self, codebase: "Codebase", model_provider: str = "anthropic", model_name: str = "claude-3-7-sonnet-latest", memory: bool = True, tools: Optional[list[BaseTool]] = None, **kwargs):
"""Initialize a CodeAgent.

Args:
Expand Down Expand Up @@ -49,7 +49,7 @@ def run(self, prompt: str, thread_id: Optional[str] = None) -> str:
input = {"messages": [("user", prompt)]}

# we stream the steps instead of invoke because it allows us to access intermediate nodes
stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id}}, stream_mode="values")
stream = self.agent.stream(input, config={"configurable": {"thread_id": thread_id}, "recursion_limit": 100}, stream_mode="values")

for s in stream:
message = s["messages"][-1]
Expand Down
4 changes: 3 additions & 1 deletion src/codegen/extensions/langchain/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DeleteFileTool,
ListDirectoryTool,
MoveSymbolTool,
ReflectionTool,
RelaceEditTool,
RenameFileTool,
ReplacementEditTool,
Expand All @@ -31,7 +32,7 @@
def create_codebase_agent(
codebase: "Codebase",
model_provider: str = "anthropic",
model_name: str = "claude-3-5-sonnet-latest",
model_name: str = "claude-3-7-sonnet-latest",
system_message: SystemMessage = SystemMessage(REASONER_SYSTEM_MESSAGE),
memory: bool = True,
debug: bool = False,
Expand Down Expand Up @@ -71,6 +72,7 @@ def create_codebase_agent(
# SemanticEditTool(codebase),
ReplacementEditTool(codebase),
RelaceEditTool(codebase),
ReflectionTool(codebase),
# SemanticSearchTool(codebase),
# =====[ Github Integration ]=====
# Enable Github integration
Expand Down
38 changes: 38 additions & 0 deletions src/codegen/extensions/langchain/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
linear_search_issues_tool,
)
from codegen.extensions.tools.link_annotation import add_links_to_message
from codegen.extensions.tools.reflection import perform_reflection
from codegen.extensions.tools.relace_edit import relace_edit
from codegen.extensions.tools.replacement_edit import replacement_edit
from codegen.extensions.tools.reveal_symbol import reveal_symbol
Expand Down Expand Up @@ -742,6 +743,7 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]:
SemanticSearchTool(codebase),
ViewFileTool(codebase),
RelaceEditTool(codebase),
ReflectionTool(codebase),
# Github
GithubCreatePRTool(codebase),
GithubCreatePRCommentTool(codebase),
Expand Down Expand Up @@ -842,3 +844,39 @@ def __init__(self, codebase: Codebase) -> None:
def _run(self, filepath: str, edit_snippet: str) -> str:
result = relace_edit(self.codebase, filepath, edit_snippet)
return result.render()


class ReflectionInput(BaseModel):
"""Input for agent reflection."""

context_summary: str = Field(..., description="Summary of the current context and problem being solved")
findings_so_far: str = Field(..., description="Key information and insights gathered so far")
current_challenges: str = Field(default="", description="Current obstacles or questions that need to be addressed")
reflection_focus: Optional[str] = Field(default=None, description="Optional specific aspect to focus reflection on (e.g., 'architecture', 'performance', 'next steps')")


class ReflectionTool(BaseTool):
"""Tool for agent self-reflection and planning."""

name: ClassVar[str] = "reflect"
description: ClassVar[str] = """
Reflect on current understanding and plan next steps.
This tool helps organize thoughts, identify knowledge gaps, and create a strategic plan.
Use this when you need to consolidate information or when facing complex decisions.
"""
args_schema: ClassVar[type[BaseModel]] = ReflectionInput
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
super().__init__(codebase=codebase)

def _run(
self,
context_summary: str,
findings_so_far: str,
current_challenges: str = "",
reflection_focus: Optional[str] = None,
) -> str:
result = perform_reflection(context_summary=context_summary, findings_so_far=findings_so_far, current_challenges=current_challenges, reflection_focus=reflection_focus, codebase=self.codebase)

return result.render()
6 changes: 6 additions & 0 deletions src/codegen/extensions/swebench/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,13 @@ def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None)
Filenames, directory names, file contents, etc may be different than what you're used to.

Propose changes to update the repo to fix the problem below.
*** IMPORTANT: *** DO NOT MODIFY ANY TESTS!
*** IMPORTANT: *** DO NOT ADD ANY TESTS!

Before commiting to do any modifications, double check your work with the Reflection tool.
you can also use that tool to check your work after you think you are done.
if you ever get stuck using other tools, use the Reflection tool to re asses your situation.
after every file edit, use the Reflection tool to check your work and sanity check yourself.
"""
message += problem_statement

Expand Down
3 changes: 3 additions & 0 deletions src/codegen/extensions/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from .list_directory import list_directory
from .move_symbol import move_symbol
from .reflection import perform_reflection
from .rename_file import rename_file
from .replacement_edit import replacement_edit
from .reveal_symbol import reveal_symbol
Expand Down Expand Up @@ -43,6 +44,8 @@
"list_directory",
# Symbol operations
"move_symbol",
# Reflection
"perform_reflection",
"rename_file",
"replacement_edit",
"reveal_symbol",
Expand Down
217 changes: 217 additions & 0 deletions src/codegen/extensions/tools/reflection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
"""Tool for agent self-reflection and planning."""

from typing import ClassVar, Optional

from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from pydantic import Field

from codegen.extensions.langchain.llm import LLM
from codegen.sdk.core.codebase import Codebase

from .observation import Observation


class ReflectionSection(Observation):
"""A section of the reflection output."""

title: str = Field(description="Title of the section")
content: str = Field(description="Content of the section")

str_template: ClassVar[str] = "{title}:\n{content}"


class ReflectionObservation(Observation):
"""Response from agent reflection."""

context_summary: str = Field(description="Summary of the current context")
findings: str = Field(description="Key information and insights gathered")
challenges: Optional[str] = Field(None, description="Current obstacles or questions")
focus: Optional[str] = Field(None, description="Specific aspect focused on")
sections: list[ReflectionSection] = Field(description="Structured reflection sections")

str_template: ClassVar[str] = "Reflection on: {focus}"

def _get_details(self) -> dict[str, str]:
"""Get details for string representation."""
return {
"focus": self.focus or "current understanding and next steps",
}

def render(self) -> str:
"""Render the reflection as a formatted string."""
output = []

# Add header
if self.focus:
output.append(f"# Reflection on: {self.focus}")
else:
output.append("# Agent Reflection")

# Add each section
for section in self.sections:
output.append(f"\n## {section.title}")
output.append(section.content)

return "\n".join(output)


# System prompt for the reflection LLM
REFLECTION_SYSTEM_PROMPT = """You are an expert AI assistant specialized in reflection and strategic planning.
Your task is to help organize thoughts, identify knowledge gaps, and create a strategic plan based on the information provided.

You will be given:
1. A summary of the current context and problem being solved
2. Key information and insights gathered so far
3. Current obstacles or questions that need to be addressed (if any)
4. A specific aspect to focus the reflection on (if any)

Your response should be structured into the following sections:
1. Current Understanding - Summarize what you understand about the problem and context
2. Key Insights - Highlight the most important findings and their implications
3. Knowledge Gaps (if challenges are provided) - Identify what information is still missing
4. Action Plan - Recommend specific next steps to move forward
5. Alternative Approaches - Suggest other ways to tackle the problem

Your reflection should be clear, insightful, and actionable. Focus on helping the agent make progress and double check its own work.
You will not suggest the agent writes new tests or modifies existing tests.
"""


def parse_reflection_response(response: str) -> list[ReflectionSection]:
"""Parse the LLM response into structured reflection sections.

Args:
response: Raw LLM response text

Returns:
List of ReflectionSection objects
"""
sections = []
current_section = None
current_content = []

# Split the response into lines
lines = response.strip().split("\n")

for line in lines:
# Check if this is a section header (starts with ## or #)
if line.startswith("## ") or (line.startswith("# ") and not line.startswith("# Reflection")):
# If we have a current section, save it before starting a new one
if current_section:
sections.append(ReflectionSection(title=current_section, content="\n".join(current_content).strip()))
current_content = []

# Extract the new section title
current_section = line.lstrip("#").strip()
elif current_section:
# Add content to the current section
current_content.append(line)

# Add the last section if there is one
if current_section and current_content:
sections.append(ReflectionSection(title=current_section, content="\n".join(current_content).strip()))

return sections


def perform_reflection(
context_summary: str,
findings_so_far: str,
current_challenges: str = "",
reflection_focus: Optional[str] = None,
codebase: Optional[Codebase] = None,
) -> ReflectionObservation:
"""Perform agent reflection to organize thoughts and plan next steps.

This function helps the agent consolidate its understanding, identify knowledge gaps,
and create a strategic plan for moving forward.

Args:
context_summary: Summary of the current context and problem being solved
findings_so_far: Key information and insights gathered so far
current_challenges: Current obstacles or questions that need to be addressed
reflection_focus: Optional specific aspect to focus reflection on
codebase: Optional codebase context for code-specific reflections

Returns:
ReflectionObservation containing structured reflection sections
"""
try:
# Create the prompt for the LLM
system_message = SystemMessage(content=REFLECTION_SYSTEM_PROMPT)

# Construct the human message with all the context
human_message_content = f"""
Context Summary:
{context_summary}

Key Findings:
{findings_so_far}
"""

# Add challenges if provided
if current_challenges:
human_message_content += f"""
Current Challenges:
{current_challenges}
"""

# Add reflection focus if provided
if reflection_focus:
human_message_content += f"""
Reflection Focus:
{reflection_focus}
"""

# Add codebase context if available and relevant
if codebase and (reflection_focus and "code" in reflection_focus.lower()):
# In a real implementation, you might add relevant codebase context here
# For example, listing key files or symbols related to the reflection focus
human_message_content += f"""
Codebase Context:
- Working with codebase at: {codebase.root}
"""

human_message = HumanMessage(content=human_message_content)
prompt = ChatPromptTemplate.from_messages([system_message, human_message])

# Initialize the LLM
llm = LLM(
model_provider="anthropic",
model_name="claude-3-5-sonnet-latest",
temperature=0.2, # Slightly higher temperature for more creative reflection
max_tokens=4000,
)

# Create and execute the chain
chain = prompt | llm | StrOutputParser()
response = chain.invoke({})

# Parse the response into sections
sections = parse_reflection_response(response)

# If no sections were parsed, create a default section with the full response
if not sections:
sections = [ReflectionSection(title="Reflection", content=response)]

return ReflectionObservation(
status="success",
context_summary=context_summary,
findings=findings_so_far,
challenges=current_challenges,
focus=reflection_focus,
sections=sections,
)

except Exception as e:
return ReflectionObservation(
status="error",
error=f"Failed to perform reflection: {e!s}",
context_summary=context_summary,
findings=findings_so_far,
challenges=current_challenges,
focus=reflection_focus,
sections=[],
)
Loading