Skip to content

CG-10465 raw text edit #170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions src/codegen/sdk/core/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,73 @@ def parse(self, G: "CodebaseGraph") -> None:
def _compute_dependencies(self, *args, **kwargs) -> None:
pass

@writer
def edit(self, new_src: str, fix_indentation: bool = False, priority: int = 0, dedupe: bool = True) -> None:
"""Replace the source of this file with new_src.

For non-source files, replaces the entire content. For source files, delegates to the parent
Editable implementation which uses TreeSitter nodes for precise editing.

Args:
new_src (str): The new source text to replace the current text with.
fix_indentation (bool): If True, adjusts the indentation of new_src to match the current
text's indentation level. Only applies to source files. Defaults to False.
priority (int): The priority of the edit transaction. Higher priority edits are
applied first. Defaults to 0.
dedupe (bool): If True, deduplicates identical transactions. Defaults to True.

Raises:
ValueError: If attempting to edit a binary file.

Returns:
None
"""
if self.is_binary:
msg = "Cannot replace content in binary files"
raise ValueError(msg)

if self.ts_node is None or not isinstance(self, SourceFile):
self._edit_byte_range(new_src, 0, len(self.content_bytes), priority, dedupe)
else:
super().edit(new_src, fix_indentation, priority, dedupe)

@writer
def replace(self, old: str, new: str, count: int = -1, is_regex: bool = False, priority: int = 0) -> int:
"""Replace occurrences of text in the file.

For non-source files, performs a direct string replacement. For source files, delegates to the
parent Editable implementation which uses TreeSitter nodes for precise replacements.

Args:
old (str): The text to be replaced.
new (str): The text to replace with.
count (int): Maximum number of replacements to make. -1 means replace all occurrences.
Only applies to source files. Defaults to -1.
is_regex (bool): If True, treat 'old' as a regular expression pattern.
Only applies to source files. Defaults to False.
priority (int): The priority of the edit transaction. Higher priority edits are
applied first. Defaults to 0.

Raises:
ValueError: If attempting to replace content in a binary file.

Returns:
list[Editable]: List of affected Editable objects. For non-source files, always returns
an empty list since they don't have Editable sub-components.
"""
if self.is_binary:
msg = "Cannot replace content in binary files"
raise ValueError(msg)

if self.ts_node is None or not isinstance(self, SourceFile):
if old not in self.content:
return 0

self._edit_byte_range(self.content.replace(old, new), 0, len(self.content_bytes), priority)
return 1
else:
return super().replace(old, new, count, is_regex, priority)


TImport = TypeVar("TImport", bound="Import")
TFunction = TypeVar("TFunction", bound="Function")
Expand Down
22 changes: 0 additions & 22 deletions tests/unit/codegen/sdk/codebase/file/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,28 +59,6 @@ def test_codebase_files(tmpdir) -> None:
assert {f for f in codebase.files(extensions=[".bin"])} == {file3}


@pytest.mark.skip("MDX editing is broken")
def test_codebase_edit_mdx(tmpdir) -> None:
"""Editing MDx seems broken currently - it will just prepend to the file"""
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header", "file2.tsx": "console.log('hello, world!')"}) as codebase:
file = codebase.get_file("file1.mdx")
file.edit("NEW TEXT")
codebase.commit()
file = codebase.get_file("file1.mdx")
assert file.content == "NEW TEXT"


@pytest.mark.skip("MDX replacing is broken")
def test_codebase_replace_mdx(tmpdir) -> None:
"""Editing MDx seems broken currently - it will just prepend to the file"""
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header"}) as codebase:
file = codebase.get_file("file1.mdx")
file.replace("# Header", "NEW TEXT")
codebase.commit()
file = codebase.get_file("file1.mdx")
assert file.content == "NEW TEXT"


@pytest.mark.skipif(sys.platform == "darwin", reason="macOS is case-insensitive")
def test_file_extensions_ignore_case(tmpdir) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG", "file4": "Hello world!"}) as codebase:
Expand Down
119 changes: 119 additions & 0 deletions tests/unit/codegen/sdk/codebase/file/test_file_edit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import pytest

from codegen.sdk.codebase.factory.get_session import get_codebase_session
from codegen.sdk.core.file import SourceFile


def test_codebase_edit_mdx(tmpdir) -> None:
"""Test editing MDX file content"""
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header", "file2.tsx": "console.log('hello, world!')"}) as codebase:
file = codebase.get_file("file1.mdx")
file.edit("NEW TEXT")
codebase.commit()
file = codebase.get_file("file1.mdx")
assert file.content == "NEW TEXT"


def test_edit_json_file(tmpdir) -> None:
"""Test editing JSON file content"""
with get_codebase_session(tmpdir=tmpdir, files={"config.json": '{"key": "value", "nested": {"foo": "bar"}}'}) as codebase:
file = codebase.get_file("config.json")

# Test complete content replacement
file.edit('{"newKey": "newValue"}')
codebase.commit()
assert file.content == '{"newKey": "newValue"}'

# Test partial content replacement
file.edit('{"newKey": "newValue", "extra": true}')
codebase.commit()
assert file.content == '{"newKey": "newValue", "extra": true}'


def test_edit_txt_file(tmpdir) -> None:
"""Test editing plain text file content"""
with get_codebase_session(tmpdir=tmpdir, files={"data.txt": "Hello\nWorld\nTest"}) as codebase:
file = codebase.get_file("data.txt")

# Test single line replacement
file.edit("New World")
codebase.commit()
assert file.content == "New World"

# Test multiline content
file.edit("Line 1\nLine 2\nLine 3")
codebase.commit()
assert file.content == "Line 1\nLine 2\nLine 3"


def test_codebase_replace_mdx(tmpdir) -> None:
"""Test replacing content in MDX file"""
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header"}) as codebase:
file = codebase.get_file("file1.mdx")
file.replace("# Header", "NEW TEXT")
codebase.commit()
file = codebase.get_file("file1.mdx")
assert file.content == "NEW TEXT"


def test_replace_non_source_file(tmpdir) -> None:
"""Test replace functionality in non-source files"""
with get_codebase_session(tmpdir=tmpdir, files={"doc.mdx": "# Header\nThis is a test\nMore content", "config.json": '{"test": "value", "other": "test"}'}) as codebase:
# Test single replacement
mdx_file = codebase.get_file("doc.mdx")
mdx_file.replace("test", "demo")
codebase.commit()
assert mdx_file.content == "# Header\nThis is a demo\nMore content"

# Test multiple occurrences
json_file = codebase.get_file("config.json")
json_file.replace("test", "demo")
codebase.commit()
assert json_file.content == '{"demo": "value", "other": "demo"}'


def test_edit_binary_file_fails(tmpdir) -> None:
"""Test that editing binary files raises an error"""
binary_content = bytes([0x89, 0x50, 0x4E, 0x47]) # PNG header
with get_codebase_session(tmpdir=tmpdir, files={"image.png": binary_content}) as codebase:
file = codebase.get_file("image.png")

with pytest.raises(ValueError, match="Cannot replace content in binary files"):
file.edit("new content")

with pytest.raises(ValueError, match="Cannot replace content in binary files"):
file.replace("old", "new")


def test_edit_source_file_preserves_behavior(tmpdir) -> None:
"""Test that source files still use TreeSitter-based editing"""
with get_codebase_session(tmpdir=tmpdir, files={"script.py": "def test():\n print('hello')"}) as codebase:
file = codebase.get_file("script.py")

# Should use TreeSitter node-based editing
file.edit("def test():\n print('world')")
codebase.commit()
assert file.content == "def test():\n print('world')"

# Verify the file is still parseable as Python
assert isinstance(file, SourceFile)
assert file.get_function("test") is not None


def test_transaction_ordering_non_source_files(tmpdir) -> None:
"""Test that transaction ordering works for non-source files"""
with get_codebase_session(tmpdir=tmpdir, files={"doc.md": "# Header\nContent\nFooter"}) as codebase:
file = codebase.get_file("doc.md")

# Apply changes sequentially to avoid transaction conflicts
file.edit("# New Header\nContent\nFooter", priority=1)
codebase.commit()

file.edit("# New Header\nNew Content\nFooter", priority=2)
codebase.commit()

file.edit("# New Header\nNew Content\nNew Footer", priority=0)
codebase.commit()

# Verify final content
assert file.content == "# New Header\nNew Content\nNew Footer"