Skip to content

fix: semantic edit tool fixes #568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 20, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 119 additions & 79 deletions src/codegen/extensions/tools/semantic_edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,32 +85,30 @@
return matches[-1]


def _merge_content(original_content: str, edited_content: str, start: int, end: int) -> str:
"""Merge edited content with original content, preserving content outside the edit range.
def get_llm_edit(original_file_section: str, edit_content: str) -> str:
"""Get edited content from LLM.

Args:
original_content: Original file content
edited_content: New content for the specified range
start: Start line (1-indexed)
end: End line (1-indexed or -1 for end of file)
original_file_section: Original content to edit
edit_content: Edit specification/instructions

Returns:
Merged content
LLM response with edited content
"""
original_lines = original_content.split("\n")
edited_lines = edited_content.split("\n")

if start == -1 and end == -1: # Append mode
return original_content + "\n" + edited_content
system_message = COMMANDER_SYSTEM_PROMPT
human_message = _HUMAN_PROMPT_DRAFT_EDITOR
prompt = ChatPromptTemplate.from_messages([system_message, human_message])

# Convert to 0-indexed
start_idx = start - 1
end_idx = end - 1 if end != -1 else len(original_lines)
llm = ChatAnthropic(
model="claude-3-5-sonnet-latest",
temperature=0,
max_tokens=5000,
)

# Merge the content
result_lines = original_lines[:start_idx] + edited_lines + original_lines[end_idx + 1 :]
chain = prompt | llm
response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content})

return "\n".join(result_lines)
return response.content

Check failure on line 111 in src/codegen/extensions/tools/semantic_edit.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "str | list[str | dict[Any, Any]]", expected "str") [return-value]


def _validate_edit_boundaries(original_lines: list[str], modified_lines: list[str], start_idx: int, end_idx: int) -> None:
Expand All @@ -126,14 +124,108 @@
ValueError: If changes were made outside the specified range
"""
# Check lines before start_idx
for i in range(start_idx):
if i >= len(original_lines) or i >= len(modified_lines) or original_lines[i] != modified_lines[i]:
for i in range(min(start_idx, len(original_lines), len(modified_lines))):
if original_lines[i] != modified_lines[i]:
msg = f"Edit modified line {i + 1} which is before the specified start line {start_idx + 1}"
raise ValueError(msg)

# Check lines after end_idx
remaining_lines = len(original_lines) - (end_idx + 1)
if remaining_lines > 0:
orig_suffix = original_lines[-remaining_lines:]
if len(modified_lines) >= remaining_lines:
mod_suffix = modified_lines[-remaining_lines:]
if orig_suffix != mod_suffix:
msg = f"Edit modified content after the specified end line {end_idx + 1}"
raise ValueError(msg)


def extract_file_window(file_content: str, start: int = 1, end: int = -1) -> tuple[str, int, int]:
"""Extract a window of content from a file.

Args:
file_content: Content of the file
start: Start line (1-indexed, default: 1)
end: End line (1-indexed or -1 for end of file, default: -1)

Returns:
Tuple of (extracted_content, start_idx, end_idx)
"""
# Split into lines and handle line numbers
lines = file_content.split("\n")
total_lines = len(lines)

# Convert to 0-indexed
start_idx = start - 1
end_idx = end - 1 if end != -1 else total_lines - 1

# Get the content window
window_lines = lines[start_idx : end_idx + 1]
window_content = "\n".join(window_lines)

return window_content, start_idx, end_idx


def apply_semantic_edit(codebase: Codebase, filepath: str, edited_content: str, start: int = 1, end: int = -1) -> tuple[str, str]:
"""Apply a semantic edit to a section of content.

Args:
codebase: Codebase object
filepath: Path to the file to edit
edited_content: New content for the specified range
start: Start line (1-indexed, default: 1)
end: End line (1-indexed or -1 for end of file, default: -1)

Returns:
Tuple of (new_content, diff)
"""
# Get the original content
file = codebase.get_file(filepath)
original_content = file.content

# Handle append mode
if start == -1 and end == -1:
new_content = original_content + "\n" + edited_content
diff = generate_diff(original_content, new_content)
file.edit(new_content)
codebase.commit()
return new_content, diff

# Split content into lines
original_lines = original_content.splitlines()
edited_lines = edited_content.splitlines()

# Convert to 0-indexed
start_idx = start - 1
end_idx = end - 1 if end != -1 else len(original_lines) - 1

# Splice together: prefix + edited content + suffix
new_lines = (
original_lines[:start_idx] # Prefix
+ edited_lines # Edited section
+ original_lines[end_idx + 1 :] # Suffix
)

# Preserve original file's newline if it had one
new_content = "\n".join(new_lines) + ("\n" if original_content.endswith("\n") else "")
# Validate the edit boundaries
_validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx)

# Apply the edit
file.edit(new_content)
codebase.commit()
with open(file.path, "w") as f:
f.write(new_content)

# Generate diff from the original section to the edited section
original_section, _, _ = extract_file_window(original_content, start, end)
diff = generate_diff(original_section, edited_content)

return new_content, diff


def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: int = 1, end: int = -1) -> SemanticEditObservation:
"""Edit a file using semantic editing with line range support. This is an internal api and should not be called by the LLM."""
"""Edit a file using semantic editing with line range support."""
try:
file = codebase.get_file(filepath)
except ValueError:
Expand All @@ -158,81 +250,29 @@
line_count=len(original_lines),
)

# Handle append mode
if start == -1 and end == -1:
try:
file.add_symbol_from_source(edit_content)
codebase.commit()

return SemanticEditObservation(
status="success",
filepath=filepath,
new_content=file.content,
diff=generate_diff(original_content, file.content),
)
except Exception as e:
msg = f"Failed to append content: {e!s}"
raise ValueError(msg)

# For range edits, get the context for the draft editor
total_lines = len(original_lines)
start_idx = start - 1
end_idx = end - 1 if end != -1 else total_lines

# Get the context for the edit
context_lines = original_lines[start_idx : end_idx + 1]
original_file_section = "\n".join(context_lines)
# Extract the window of content to edit
original_file_section, start_idx, end_idx = extract_file_window(original_content, start, end)

# =====[ Get the LLM ]=====
system_message = COMMANDER_SYSTEM_PROMPT
human_message = _HUMAN_PROMPT_DRAFT_EDITOR
prompt = ChatPromptTemplate.from_messages([system_message, human_message])
llm = ChatAnthropic(
model="claude-3-5-sonnet-latest",
temperature=0,
max_tokens=5000,
)
chain = prompt | llm
response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content})

# Extract code from markdown code block
# Get edited content from LLM
try:
modified_segment = _extract_code_block(response.content)
modified_segment = _extract_code_block(get_llm_edit(original_file_section, edit_content))
except ValueError as e:
return SemanticEditObservation(
status="error",
error=f"Failed to parse LLM response: {e!s}",
filepath=filepath,
)

# Merge the edited content with the original
new_content = _merge_content(original_content, modified_segment, start, end)
new_lines = new_content.splitlines()

# Validate that no changes were made before the start line
# Apply the semantic edit
try:
_validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx)
new_content, diff = apply_semantic_edit(codebase, filepath, modified_segment, start, end)
except ValueError as e:
return SemanticEditObservation(
status="error",
error=str(e),
filepath=filepath,
)

# Generate diff
diff = generate_diff(original_content, new_content)

# Apply the edit
try:
file.edit(new_content)
codebase.commit()
except Exception as e:
return SemanticEditObservation(
status="error",
error=f"Failed to apply edit: {e!s}",
filepath=filepath,
)

return SemanticEditObservation(
status="success",
filepath=filepath,
Expand Down
Loading