fix: semantic edit tool fixes (#568)

jayhack · web-flow · commit 21a36958aa74 · 2025-02-19T18:59:43.000-08:00
diff --git a/src/codegen/extensions/tools/semantic_edit.py b/src/codegen/extensions/tools/semantic_edit.py
@@ -85,32 +85,30 @@ def _extract_code_block(llm_response: str) -> str:
     return matches[-1]
 
 
-def _merge_content(original_content: str, edited_content: str, start: int, end: int) -> str:
-    """Merge edited content with original content, preserving content outside the edit range.
+def get_llm_edit(original_file_section: str, edit_content: str) -> str:
+    """Get edited content from LLM.
 
     Args:
-        original_content: Original file content
-        edited_content: New content for the specified range
-        start: Start line (1-indexed)
-        end: End line (1-indexed or -1 for end of file)
+        original_file_section: Original content to edit
+        edit_content: Edit specification/instructions
 
     Returns:
-        Merged content
+        LLM response with edited content
     """
-    original_lines = original_content.split("\n")
-    edited_lines = edited_content.split("\n")
-
-    if start == -1 and end == -1:  # Append mode
-        return original_content + "\n" + edited_content
+    system_message = COMMANDER_SYSTEM_PROMPT
+    human_message = _HUMAN_PROMPT_DRAFT_EDITOR
+    prompt = ChatPromptTemplate.from_messages([system_message, human_message])
 
-    # Convert to 0-indexed
-    start_idx = start - 1
-    end_idx = end - 1 if end != -1 else len(original_lines)
+    llm = ChatAnthropic(
+        model="claude-3-5-sonnet-latest",
+        temperature=0,
+        max_tokens=5000,
+    )
 
-    # Merge the content
-    result_lines = original_lines[:start_idx] + edited_lines + original_lines[end_idx + 1 :]
+    chain = prompt | llm
+    response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content})
 
-    return "\n".join(result_lines)
+    return response.content
 
 
 def _validate_edit_boundaries(original_lines: list[str], modified_lines: list[str], start_idx: int, end_idx: int) -> None:
@@ -126,14 +124,108 @@ def _validate_edit_boundaries(original_lines: list[str], modified_lines: list[st
         ValueError: If changes were made outside the specified range
     """
     # Check lines before start_idx
-    for i in range(start_idx):
-        if i >= len(original_lines) or i >= len(modified_lines) or original_lines[i] != modified_lines[i]:
+    for i in range(min(start_idx, len(original_lines), len(modified_lines))):
+        if original_lines[i] != modified_lines[i]:
             msg = f"Edit modified line {i + 1} which is before the specified start line {start_idx + 1}"
             raise ValueError(msg)
 
+    # Check lines after end_idx
+    remaining_lines = len(original_lines) - (end_idx + 1)
+    if remaining_lines > 0:
+        orig_suffix = original_lines[-remaining_lines:]
+        if len(modified_lines) >= remaining_lines:
+            mod_suffix = modified_lines[-remaining_lines:]
+            if orig_suffix != mod_suffix:
+                msg = f"Edit modified content after the specified end line {end_idx + 1}"
+                raise ValueError(msg)
+
+
+def extract_file_window(file_content: str, start: int = 1, end: int = -1) -> tuple[str, int, int]:
+    """Extract a window of content from a file.
+
+    Args:
+        file_content: Content of the file
+        start: Start line (1-indexed, default: 1)
+        end: End line (1-indexed or -1 for end of file, default: -1)
+
+    Returns:
+        Tuple of (extracted_content, start_idx, end_idx)
+    """
+    # Split into lines and handle line numbers
+    lines = file_content.split("\n")
+    total_lines = len(lines)
+
+    # Convert to 0-indexed
+    start_idx = start - 1
+    end_idx = end - 1 if end != -1 else total_lines - 1
+
+    # Get the content window
+    window_lines = lines[start_idx : end_idx + 1]
+    window_content = "\n".join(window_lines)
+
+    return window_content, start_idx, end_idx
+
+
+def apply_semantic_edit(codebase: Codebase, filepath: str, edited_content: str, start: int = 1, end: int = -1) -> tuple[str, str]:
+    """Apply a semantic edit to a section of content.
+
+    Args:
+        codebase: Codebase object
+        filepath: Path to the file to edit
+        edited_content: New content for the specified range
+        start: Start line (1-indexed, default: 1)
+        end: End line (1-indexed or -1 for end of file, default: -1)
+
+    Returns:
+        Tuple of (new_content, diff)
+    """
+    # Get the original content
+    file = codebase.get_file(filepath)
+    original_content = file.content
+
+    # Handle append mode
+    if start == -1 and end == -1:
+        new_content = original_content + "\n" + edited_content
+        diff = generate_diff(original_content, new_content)
+        file.edit(new_content)
+        codebase.commit()
+        return new_content, diff
+
+    # Split content into lines
+    original_lines = original_content.splitlines()
+    edited_lines = edited_content.splitlines()
+
+    # Convert to 0-indexed
+    start_idx = start - 1
+    end_idx = end - 1 if end != -1 else len(original_lines) - 1
+
+    # Splice together: prefix + edited content + suffix
+    new_lines = (
+        original_lines[:start_idx]  # Prefix
+        + edited_lines  # Edited section
+        + original_lines[end_idx + 1 :]  # Suffix
+    )
+
+    # Preserve original file's newline if it had one
+    new_content = "\n".join(new_lines) + ("\n" if original_content.endswith("\n") else "")
+    # Validate the edit boundaries
+    _validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx)
+
+    # Apply the edit
+    file.edit(new_content)
+    codebase.commit()
+    with open(file.path, "w") as f:
+        f.write(new_content)
+
+    # Generate diff from the original section to the edited section
+    original_section, _, _ = extract_file_window(original_content, start, end)
+    diff = generate_diff(original_section, edited_content)
+
+    return new_content, diff
+
 
 def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: int = 1, end: int = -1) -> SemanticEditObservation:
-    """Edit a file using semantic editing with line range support. This is an internal api and should not be called by the LLM."""
+    """Edit a file using semantic editing with line range support."""
     try:
         file = codebase.get_file(filepath)
     except ValueError:
@@ -158,81 +250,29 @@ def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: i
             line_count=len(original_lines),
         )
 
-    # Handle append mode
-    if start == -1 and end == -1:
-        try:
-            file.add_symbol_from_source(edit_content)
-            codebase.commit()
-
-            return SemanticEditObservation(
-                status="success",
-                filepath=filepath,
-                new_content=file.content,
-                diff=generate_diff(original_content, file.content),
-            )
-        except Exception as e:
-            msg = f"Failed to append content: {e!s}"
-            raise ValueError(msg)
-
-    # For range edits, get the context for the draft editor
-    total_lines = len(original_lines)
-    start_idx = start - 1
-    end_idx = end - 1 if end != -1 else total_lines
-
-    # Get the context for the edit
-    context_lines = original_lines[start_idx : end_idx + 1]
-    original_file_section = "\n".join(context_lines)
+    # Extract the window of content to edit
+    original_file_section, start_idx, end_idx = extract_file_window(original_content, start, end)
 
-    # =====[ Get the LLM ]=====
-    system_message = COMMANDER_SYSTEM_PROMPT
-    human_message = _HUMAN_PROMPT_DRAFT_EDITOR
-    prompt = ChatPromptTemplate.from_messages([system_message, human_message])
-    llm = ChatAnthropic(
-        model="claude-3-5-sonnet-latest",
-        temperature=0,
-        max_tokens=5000,
-    )
-    chain = prompt | llm
-    response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content})
-
-    # Extract code from markdown code block
+    # Get edited content from LLM
     try:
-        modified_segment = _extract_code_block(response.content)
+        modified_segment = _extract_code_block(get_llm_edit(original_file_section, edit_content))
     except ValueError as e:
         return SemanticEditObservation(
             status="error",
             error=f"Failed to parse LLM response: {e!s}",
             filepath=filepath,
         )
 
-    # Merge the edited content with the original
-    new_content = _merge_content(original_content, modified_segment, start, end)
-    new_lines = new_content.splitlines()
-
-    # Validate that no changes were made before the start line
+    # Apply the semantic edit
     try:
-        _validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx)
+        new_content, diff = apply_semantic_edit(codebase, filepath, modified_segment, start, end)
     except ValueError as e:
         return SemanticEditObservation(
             status="error",
             error=str(e),
             filepath=filepath,
         )
 
-    # Generate diff
-    diff = generate_diff(original_content, new_content)
-
-    # Apply the edit
-    try:
-        file.edit(new_content)
-        codebase.commit()
-    except Exception as e:
-        return SemanticEditObservation(
-            status="error",
-            error=f"Failed to apply edit: {e!s}",
-            filepath=filepath,
-        )
-
     return SemanticEditObservation(
         status="success",
         filepath=filepath,