Skip to content

Commit 21a3695

Browse files
authored
fix: semantic edit tool fixes (#568)
1 parent a8b5aca commit 21a3695

File tree

1 file changed

+119
-79
lines changed

1 file changed

+119
-79
lines changed

src/codegen/extensions/tools/semantic_edit.py

Lines changed: 119 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -85,32 +85,30 @@ def _extract_code_block(llm_response: str) -> str:
8585
return matches[-1]
8686

8787

88-
def _merge_content(original_content: str, edited_content: str, start: int, end: int) -> str:
89-
"""Merge edited content with original content, preserving content outside the edit range.
88+
def get_llm_edit(original_file_section: str, edit_content: str) -> str:
89+
"""Get edited content from LLM.
9090
9191
Args:
92-
original_content: Original file content
93-
edited_content: New content for the specified range
94-
start: Start line (1-indexed)
95-
end: End line (1-indexed or -1 for end of file)
92+
original_file_section: Original content to edit
93+
edit_content: Edit specification/instructions
9694
9795
Returns:
98-
Merged content
96+
LLM response with edited content
9997
"""
100-
original_lines = original_content.split("\n")
101-
edited_lines = edited_content.split("\n")
102-
103-
if start == -1 and end == -1: # Append mode
104-
return original_content + "\n" + edited_content
98+
system_message = COMMANDER_SYSTEM_PROMPT
99+
human_message = _HUMAN_PROMPT_DRAFT_EDITOR
100+
prompt = ChatPromptTemplate.from_messages([system_message, human_message])
105101

106-
# Convert to 0-indexed
107-
start_idx = start - 1
108-
end_idx = end - 1 if end != -1 else len(original_lines)
102+
llm = ChatAnthropic(
103+
model="claude-3-5-sonnet-latest",
104+
temperature=0,
105+
max_tokens=5000,
106+
)
109107

110-
# Merge the content
111-
result_lines = original_lines[:start_idx] + edited_lines + original_lines[end_idx + 1 :]
108+
chain = prompt | llm
109+
response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content})
112110

113-
return "\n".join(result_lines)
111+
return response.content
114112

115113

116114
def _validate_edit_boundaries(original_lines: list[str], modified_lines: list[str], start_idx: int, end_idx: int) -> None:
@@ -126,14 +124,108 @@ def _validate_edit_boundaries(original_lines: list[str], modified_lines: list[st
126124
ValueError: If changes were made outside the specified range
127125
"""
128126
# Check lines before start_idx
129-
for i in range(start_idx):
130-
if i >= len(original_lines) or i >= len(modified_lines) or original_lines[i] != modified_lines[i]:
127+
for i in range(min(start_idx, len(original_lines), len(modified_lines))):
128+
if original_lines[i] != modified_lines[i]:
131129
msg = f"Edit modified line {i + 1} which is before the specified start line {start_idx + 1}"
132130
raise ValueError(msg)
133131

132+
# Check lines after end_idx
133+
remaining_lines = len(original_lines) - (end_idx + 1)
134+
if remaining_lines > 0:
135+
orig_suffix = original_lines[-remaining_lines:]
136+
if len(modified_lines) >= remaining_lines:
137+
mod_suffix = modified_lines[-remaining_lines:]
138+
if orig_suffix != mod_suffix:
139+
msg = f"Edit modified content after the specified end line {end_idx + 1}"
140+
raise ValueError(msg)
141+
142+
143+
def extract_file_window(file_content: str, start: int = 1, end: int = -1) -> tuple[str, int, int]:
144+
"""Extract a window of content from a file.
145+
146+
Args:
147+
file_content: Content of the file
148+
start: Start line (1-indexed, default: 1)
149+
end: End line (1-indexed or -1 for end of file, default: -1)
150+
151+
Returns:
152+
Tuple of (extracted_content, start_idx, end_idx)
153+
"""
154+
# Split into lines and handle line numbers
155+
lines = file_content.split("\n")
156+
total_lines = len(lines)
157+
158+
# Convert to 0-indexed
159+
start_idx = start - 1
160+
end_idx = end - 1 if end != -1 else total_lines - 1
161+
162+
# Get the content window
163+
window_lines = lines[start_idx : end_idx + 1]
164+
window_content = "\n".join(window_lines)
165+
166+
return window_content, start_idx, end_idx
167+
168+
169+
def apply_semantic_edit(codebase: Codebase, filepath: str, edited_content: str, start: int = 1, end: int = -1) -> tuple[str, str]:
170+
"""Apply a semantic edit to a section of content.
171+
172+
Args:
173+
codebase: Codebase object
174+
filepath: Path to the file to edit
175+
edited_content: New content for the specified range
176+
start: Start line (1-indexed, default: 1)
177+
end: End line (1-indexed or -1 for end of file, default: -1)
178+
179+
Returns:
180+
Tuple of (new_content, diff)
181+
"""
182+
# Get the original content
183+
file = codebase.get_file(filepath)
184+
original_content = file.content
185+
186+
# Handle append mode
187+
if start == -1 and end == -1:
188+
new_content = original_content + "\n" + edited_content
189+
diff = generate_diff(original_content, new_content)
190+
file.edit(new_content)
191+
codebase.commit()
192+
return new_content, diff
193+
194+
# Split content into lines
195+
original_lines = original_content.splitlines()
196+
edited_lines = edited_content.splitlines()
197+
198+
# Convert to 0-indexed
199+
start_idx = start - 1
200+
end_idx = end - 1 if end != -1 else len(original_lines) - 1
201+
202+
# Splice together: prefix + edited content + suffix
203+
new_lines = (
204+
original_lines[:start_idx] # Prefix
205+
+ edited_lines # Edited section
206+
+ original_lines[end_idx + 1 :] # Suffix
207+
)
208+
209+
# Preserve original file's newline if it had one
210+
new_content = "\n".join(new_lines) + ("\n" if original_content.endswith("\n") else "")
211+
# Validate the edit boundaries
212+
_validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx)
213+
214+
# Apply the edit
215+
file.edit(new_content)
216+
codebase.commit()
217+
with open(file.path, "w") as f:
218+
f.write(new_content)
219+
220+
# Generate diff from the original section to the edited section
221+
original_section, _, _ = extract_file_window(original_content, start, end)
222+
diff = generate_diff(original_section, edited_content)
223+
224+
return new_content, diff
225+
134226

135227
def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: int = 1, end: int = -1) -> SemanticEditObservation:
136-
"""Edit a file using semantic editing with line range support. This is an internal api and should not be called by the LLM."""
228+
"""Edit a file using semantic editing with line range support."""
137229
try:
138230
file = codebase.get_file(filepath)
139231
except ValueError:
@@ -158,81 +250,29 @@ def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: i
158250
line_count=len(original_lines),
159251
)
160252

161-
# Handle append mode
162-
if start == -1 and end == -1:
163-
try:
164-
file.add_symbol_from_source(edit_content)
165-
codebase.commit()
166-
167-
return SemanticEditObservation(
168-
status="success",
169-
filepath=filepath,
170-
new_content=file.content,
171-
diff=generate_diff(original_content, file.content),
172-
)
173-
except Exception as e:
174-
msg = f"Failed to append content: {e!s}"
175-
raise ValueError(msg)
176-
177-
# For range edits, get the context for the draft editor
178-
total_lines = len(original_lines)
179-
start_idx = start - 1
180-
end_idx = end - 1 if end != -1 else total_lines
181-
182-
# Get the context for the edit
183-
context_lines = original_lines[start_idx : end_idx + 1]
184-
original_file_section = "\n".join(context_lines)
253+
# Extract the window of content to edit
254+
original_file_section, start_idx, end_idx = extract_file_window(original_content, start, end)
185255

186-
# =====[ Get the LLM ]=====
187-
system_message = COMMANDER_SYSTEM_PROMPT
188-
human_message = _HUMAN_PROMPT_DRAFT_EDITOR
189-
prompt = ChatPromptTemplate.from_messages([system_message, human_message])
190-
llm = ChatAnthropic(
191-
model="claude-3-5-sonnet-latest",
192-
temperature=0,
193-
max_tokens=5000,
194-
)
195-
chain = prompt | llm
196-
response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content})
197-
198-
# Extract code from markdown code block
256+
# Get edited content from LLM
199257
try:
200-
modified_segment = _extract_code_block(response.content)
258+
modified_segment = _extract_code_block(get_llm_edit(original_file_section, edit_content))
201259
except ValueError as e:
202260
return SemanticEditObservation(
203261
status="error",
204262
error=f"Failed to parse LLM response: {e!s}",
205263
filepath=filepath,
206264
)
207265

208-
# Merge the edited content with the original
209-
new_content = _merge_content(original_content, modified_segment, start, end)
210-
new_lines = new_content.splitlines()
211-
212-
# Validate that no changes were made before the start line
266+
# Apply the semantic edit
213267
try:
214-
_validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx)
268+
new_content, diff = apply_semantic_edit(codebase, filepath, modified_segment, start, end)
215269
except ValueError as e:
216270
return SemanticEditObservation(
217271
status="error",
218272
error=str(e),
219273
filepath=filepath,
220274
)
221275

222-
# Generate diff
223-
diff = generate_diff(original_content, new_content)
224-
225-
# Apply the edit
226-
try:
227-
file.edit(new_content)
228-
codebase.commit()
229-
except Exception as e:
230-
return SemanticEditObservation(
231-
status="error",
232-
error=f"Failed to apply edit: {e!s}",
233-
filepath=filepath,
234-
)
235-
236276
return SemanticEditObservation(
237277
status="success",
238278
filepath=filepath,

0 commit comments

Comments
 (0)