Skip to content

feat: adds in replace tool #556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/codegen/extensions/langchain/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ListDirectoryTool,
MoveSymbolTool,
RenameFileTool,
ReplacementEditTool,
RevealSymbolTool,
SearchTool,
SemanticEditTool,
Expand Down Expand Up @@ -70,6 +71,7 @@ def create_codebase_agent(
RevealSymbolTool(codebase),
SemanticEditTool(codebase),
SemanticSearchTool(codebase),
ReplacementEditTool(codebase),
# =====[ Github Integration ]=====
# Enable Github integration
# GithubCreatePRTool(codebase),
Expand Down
49 changes: 47 additions & 2 deletions src/codegen/extensions/langchain/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
linear_search_issues_tool,
)
from codegen.extensions.tools.link_annotation import add_links_to_message
from codegen.extensions.tools.replacement_edit import replacement_edit
from codegen.extensions.tools.reveal_symbol import reveal_symbol
from codegen.extensions.tools.search import search
from codegen.extensions.tools.semantic_edit import semantic_edit
Expand All @@ -37,7 +38,7 @@
view_file,
view_pr,
)
from ..tools.tool_prompts import _FILE_EDIT_DESCRIPTION
from ..tools.semantic_edit_prompts import FILE_EDIT_PROMPT


class ViewFileInput(BaseModel):
Expand All @@ -49,9 +50,9 @@
class ViewFileTool(BaseTool):
"""Tool for viewing file contents and metadata."""

name: ClassVar[str] = "view_file"

Check failure on line 53 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
description: ClassVar[str] = "View the contents and metadata of a file in the codebase"

Check failure on line 54 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
args_schema: ClassVar[type[BaseModel]] = ViewFileInput

Check failure on line 55 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
Expand All @@ -72,9 +73,9 @@
class ListDirectoryTool(BaseTool):
"""Tool for listing directory contents."""

name: ClassVar[str] = "list_directory"

Check failure on line 76 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
description: ClassVar[str] = "List contents of a directory in the codebase"

Check failure on line 77 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
args_schema: ClassVar[type[BaseModel]] = ListDirectoryInput

Check failure on line 78 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
Expand All @@ -95,8 +96,8 @@
class SearchTool(BaseTool):
"""Tool for searching the codebase."""

name: ClassVar[str] = "search"

Check failure on line 99 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
description: ClassVar[str] = "Search the codebase using text search"

Check failure on line 100 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
args_schema: ClassVar[type[BaseModel]] = SearchInput
codebase: Codebase = Field(exclude=True)

Expand Down Expand Up @@ -257,7 +258,7 @@
"""Input for semantic editing."""

filepath: str = Field(..., description="Path of the file relative to workspace root")
edit_content: str = Field(..., description=_FILE_EDIT_DESCRIPTION)
edit_content: str = Field(..., description=FILE_EDIT_PROMPT)
start: int = Field(default=1, description="Starting line number (1-indexed, inclusive). Default is 1.")
end: int = Field(default=-1, description="Ending line number (1-indexed, inclusive). Default is -1 (end of file).")

Expand Down Expand Up @@ -706,6 +707,7 @@
ListDirectoryTool(codebase),
MoveSymbolTool(codebase),
RenameFileTool(codebase),
ReplacementEditTool(codebase),
RevealSymbolTool(codebase),
RunBashCommandTool(), # Note: This tool doesn't need the codebase
SearchTool(codebase),
Expand All @@ -725,3 +727,46 @@
LinearCreateIssueTool(codebase),
LinearGetTeamsTool(codebase),
]


class ReplacementEditInput(BaseModel):
"""Input for regex-based replacement editing."""

filepath: str = Field(..., description="Path to the file to edit")
pattern: str = Field(..., description="Regex pattern to match")
replacement: str = Field(..., description="Replacement text (can include regex groups)")
start: int = Field(default=1, description="Starting line number (1-indexed, inclusive). Default is 1.")
end: int = Field(default=-1, description="Ending line number (1-indexed, inclusive). Default is -1 (end of file).")
count: Optional[int] = Field(default=None, description="Maximum number of replacements. Default is None (replace all).")


class ReplacementEditTool(BaseTool):
"""Tool for regex-based replacement editing of files."""

name: ClassVar[str] = "replace"
description: ClassVar[str] = "Replace text in a file using regex pattern matching. For files over 300 lines, specify a line range."
args_schema: ClassVar[type[BaseModel]] = ReplacementEditInput
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
super().__init__(codebase=codebase)

def _run(
self,
filepath: str,
pattern: str,
replacement: str,
start: int = 1,
end: int = -1,
count: Optional[int] = None,
) -> str:
result = replacement_edit(
self.codebase,
filepath=filepath,
pattern=pattern,
replacement=replacement,
start=start,
end=end,
count=count,
)
return json.dumps(result, indent=2)
2 changes: 2 additions & 0 deletions src/codegen/extensions/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .list_directory import list_directory
from .move_symbol import move_symbol
from .rename_file import rename_file
from .replacement_edit import replacement_edit
from .reveal_symbol import reveal_symbol
from .search import search
from .semantic_edit import semantic_edit
Expand All @@ -42,6 +43,7 @@
# Symbol operations
"move_symbol",
"rename_file",
"replacement_edit",
"reveal_symbol",
# Search operations
"search",
Expand Down
148 changes: 148 additions & 0 deletions src/codegen/extensions/tools/replacement_edit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Tool for making regex-based replacements in files."""

import difflib
import re
from typing import Optional

from codegen import Codebase

from .view_file import add_line_numbers


def generate_diff(original: str, modified: str) -> str:
"""Generate a unified diff between two strings.

Args:
original: Original content
modified: Modified content

Returns:
Unified diff as a string
"""
original_lines = original.splitlines(keepends=True)
modified_lines = modified.splitlines(keepends=True)

diff = difflib.unified_diff(
original_lines,
modified_lines,
fromfile="original",
tofile="modified",
lineterm="",
)

return "".join(diff)


def _merge_content(original_content: str, edited_content: str, start: int, end: int) -> str:
"""Merge edited content with original content, preserving content outside the edit range.

Args:
original_content: Original file content
edited_content: New content for the specified range
start: Start line (1-indexed)
end: End line (1-indexed or -1 for end of file)

Returns:
Merged content
"""
original_lines = original_content.split("\n")
edited_lines = edited_content.split("\n")

if start == -1 and end == -1: # Append mode
return original_content + "\n" + edited_content

Check warning on line 52 in src/codegen/extensions/tools/replacement_edit.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/extensions/tools/replacement_edit.py#L52

Added line #L52 was not covered by tests

# Convert to 0-indexed
start_idx = start - 1
end_idx = end - 1 if end != -1 else len(original_lines)

# Merge the content
result_lines = original_lines[:start_idx] + edited_lines + original_lines[end_idx + 1 :]

return "\n".join(result_lines)


def replacement_edit(
codebase: Codebase,
filepath: str,
pattern: str,
replacement: str,
start: int = 1,
end: int = -1,
count: Optional[int] = None,
flags: re.RegexFlag = re.MULTILINE,
) -> dict[str, str]:
"""Replace text in a file using regex pattern matching.

Args:
codebase: The codebase to operate on
filepath: Path to the file to edit
pattern: Regex pattern to match
replacement: Replacement text (can include regex groups)
start: Start line (1-indexed, default: 1)
end: End line (1-indexed, -1 for end of file)
count: Maximum number of replacements (None for all)
flags: Regex flags (default: re.MULTILINE)

Returns:
Dict containing edit results and status

Raises:
FileNotFoundError: If file not found
ValueError: If invalid line range or regex pattern
"""
try:
file = codebase.get_file(filepath)
except ValueError:
msg = f"File not found: {filepath}"
raise FileNotFoundError(msg)

Check warning on line 97 in src/codegen/extensions/tools/replacement_edit.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/extensions/tools/replacement_edit.py#L95-L97

Added lines #L95 - L97 were not covered by tests

# Get the original content
original_content = file.content
original_lines = original_content.split("\n")

# Get the section to edit
total_lines = len(original_lines)
start_idx = start - 1
end_idx = end - 1 if end != -1 else total_lines

# Get the content to edit
section_lines = original_lines[start_idx : end_idx + 1]
section_content = "\n".join(section_lines)

try:
# Compile pattern for better error messages
regex = re.compile(pattern, flags)
except re.error as e:
msg = f"Invalid regex pattern: {e}"
raise ValueError(msg)

Check warning on line 117 in src/codegen/extensions/tools/replacement_edit.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/extensions/tools/replacement_edit.py#L115-L117

Added lines #L115 - L117 were not covered by tests

# Perform the replacement
if count is None:
new_section = regex.sub(replacement, section_content)
else:
new_section = regex.sub(replacement, section_content, count=count)

# If no changes were made, return early
if new_section == section_content:
return {
"filepath": filepath,
"status": "unchanged",
"message": "No matches found for the given pattern",
}

# Merge the edited content with the original
new_content = _merge_content(original_content, new_section, start, end)

# Generate diff
diff = generate_diff(original_content, new_content)

# Apply the edit
file.edit(new_content)
codebase.commit()

return {
"filepath": filepath,
"diff": diff,
"status": "success",
"new_content": add_line_numbers(new_content),
}
4 changes: 2 additions & 2 deletions src/codegen/extensions/tools/semantic_edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from codegen import Codebase

from .tool_prompts import _HUMAN_PROMPT_DRAFT_EDITOR, _SYSTEM_PROMPT_DRAFT_EDITOR
from .semantic_edit_prompts import _HUMAN_PROMPT_DRAFT_EDITOR, COMMANDER_SYSTEM_PROMPT
from .view_file import add_line_numbers


Expand Down Expand Up @@ -128,7 +128,7 @@
"You may need to make multiple targeted edits."
),
"status": "error",
"line_count": len(original_lines),

Check failure on line 131 in src/codegen/extensions/tools/semantic_edit.py

View workflow job for this annotation

GitHub Actions / mypy

error: Dict entry 2 has incompatible type "str": "int"; expected "str": "str" [dict-item]
}

# Handle append mode
Expand All @@ -152,7 +152,7 @@
original_file_section = "\n".join(context_lines)

# =====[ Get the LLM ]=====
system_message = _SYSTEM_PROMPT_DRAFT_EDITOR
system_message = COMMANDER_SYSTEM_PROMPT

Check warning on line 155 in src/codegen/extensions/tools/semantic_edit.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/extensions/tools/semantic_edit.py#L155

Added line #L155 was not covered by tests
human_message = _HUMAN_PROMPT_DRAFT_EDITOR
prompt = ChatPromptTemplate.from_messages([system_message, human_message])
llm = ChatAnthropic(
Expand All @@ -165,7 +165,7 @@

# Extract code from markdown code block
try:
modified_segment = _extract_code_block(response.content)

Check failure on line 168 in src/codegen/extensions/tools/semantic_edit.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "_extract_code_block" has incompatible type "str | list[str | dict[Any, Any]]"; expected "str" [arg-type]
except ValueError as e:
msg = f"Failed to parse LLM response: {e!s}"
raise ValueError(msg)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
_FILE_EDIT_DESCRIPTION = (
FILE_EDIT_PROMPT = (
"""Edit a file in plain-text format.
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
Expand Down Expand Up @@ -274,7 +274,7 @@ def helper():
)


_SYSTEM_PROMPT_DRAFT_EDITOR = """You are an expert code editor.
COMMANDER_SYSTEM_PROMPT = """You are an expert code editor.

Another agent has determined an edit needs to be made to this file.

Expand Down
61 changes: 61 additions & 0 deletions tests/unit/codegen/extensions/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
list_directory,
move_symbol,
rename_file,
replacement_edit,
reveal_symbol,
search,
semantic_edit,
Expand Down Expand Up @@ -179,3 +180,63 @@ def test_create_pr_review_comment(codebase):
assert "error" not in result
assert result["status"] == "success"
assert result["message"] == "Review comment created successfully"


def test_replacement_edit(codebase):
"""Test regex-based replacement editing."""
# Test basic replacement
result = replacement_edit(
codebase,
filepath="src/main.py",
pattern=r'print\("Hello, world!"\)',
replacement='print("Goodbye, world!")',
)
assert "error" not in result
assert result["status"] == "success"
assert 'print("Goodbye, world!")' in result["new_content"]

# Test with line range
result = replacement_edit(
codebase,
filepath="src/main.py",
pattern=r"Greeter",
replacement="Welcomer",
start=5, # Class definition line
end=7,
)
assert "error" not in result
assert result["status"] == "success"
assert "class Welcomer" in result["new_content"]

# Test with regex groups
result = replacement_edit(
codebase,
filepath="src/main.py",
pattern=r"def (\w+)\(\):",
replacement=r"def \1_function():",
)
assert "error" not in result
assert result["status"] == "success"
assert "def hello_function():" in result["new_content"]

# Test with count limit
result = replacement_edit(
codebase,
filepath="src/main.py",
pattern=r"def",
replacement="async def",
count=1, # Only replace first occurrence
)
assert "error" not in result
assert result["status"] == "success"
assert result["new_content"].count("async def") == 1

# Test no matches
result = replacement_edit(
codebase,
filepath="src/main.py",
pattern=r"nonexistent_pattern",
replacement="replacement",
)
assert result["status"] == "unchanged"
assert "No matches found" in result["message"]