Skip to content

Commit 815d063

Browse files
tomcodgencodegen-bot
tomcodgen
and
codegen-bot
authored
CG-10465 raw text edit (#170)
# Motivation File(name="doc.mdx").edit(...) and .replace(...) should edit in place. Right now it's appending. # Content New File methods. # Testing UT, manual testing. # Please check the following before marking your PR as ready for review - [x] I have added tests for my changes - [x] I have updated the documentation or added new documentation as needed - [x] I have read and agree to the [Contributor License Agreement](../CLA.md) Co-authored-by: codegen-bot <[email protected]>
1 parent eb523a0 commit 815d063

File tree

3 files changed

+186
-22
lines changed

3 files changed

+186
-22
lines changed

src/codegen/sdk/core/file.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,73 @@ def parse(self, G: "CodebaseGraph") -> None:
344344
def _compute_dependencies(self, *args, **kwargs) -> None:
345345
pass
346346

347+
@writer
348+
def edit(self, new_src: str, fix_indentation: bool = False, priority: int = 0, dedupe: bool = True) -> None:
349+
"""Replace the source of this file with new_src.
350+
351+
For non-source files, replaces the entire content. For source files, delegates to the parent
352+
Editable implementation which uses TreeSitter nodes for precise editing.
353+
354+
Args:
355+
new_src (str): The new source text to replace the current text with.
356+
fix_indentation (bool): If True, adjusts the indentation of new_src to match the current
357+
text's indentation level. Only applies to source files. Defaults to False.
358+
priority (int): The priority of the edit transaction. Higher priority edits are
359+
applied first. Defaults to 0.
360+
dedupe (bool): If True, deduplicates identical transactions. Defaults to True.
361+
362+
Raises:
363+
ValueError: If attempting to edit a binary file.
364+
365+
Returns:
366+
None
367+
"""
368+
if self.is_binary:
369+
msg = "Cannot replace content in binary files"
370+
raise ValueError(msg)
371+
372+
if self.ts_node is None or not isinstance(self, SourceFile):
373+
self._edit_byte_range(new_src, 0, len(self.content_bytes), priority, dedupe)
374+
else:
375+
super().edit(new_src, fix_indentation, priority, dedupe)
376+
377+
@writer
378+
def replace(self, old: str, new: str, count: int = -1, is_regex: bool = False, priority: int = 0) -> int:
379+
"""Replace occurrences of text in the file.
380+
381+
For non-source files, performs a direct string replacement. For source files, delegates to the
382+
parent Editable implementation which uses TreeSitter nodes for precise replacements.
383+
384+
Args:
385+
old (str): The text to be replaced.
386+
new (str): The text to replace with.
387+
count (int): Maximum number of replacements to make. -1 means replace all occurrences.
388+
Only applies to source files. Defaults to -1.
389+
is_regex (bool): If True, treat 'old' as a regular expression pattern.
390+
Only applies to source files. Defaults to False.
391+
priority (int): The priority of the edit transaction. Higher priority edits are
392+
applied first. Defaults to 0.
393+
394+
Raises:
395+
ValueError: If attempting to replace content in a binary file.
396+
397+
Returns:
398+
list[Editable]: List of affected Editable objects. For non-source files, always returns
399+
an empty list since they don't have Editable sub-components.
400+
"""
401+
if self.is_binary:
402+
msg = "Cannot replace content in binary files"
403+
raise ValueError(msg)
404+
405+
if self.ts_node is None or not isinstance(self, SourceFile):
406+
if old not in self.content:
407+
return 0
408+
409+
self._edit_byte_range(self.content.replace(old, new), 0, len(self.content_bytes), priority)
410+
return 1
411+
else:
412+
return super().replace(old, new, count, is_regex, priority)
413+
347414

348415
TImport = TypeVar("TImport", bound="Import")
349416
TFunction = TypeVar("TFunction", bound="Function")

tests/unit/codegen/sdk/codebase/file/test_file.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -59,28 +59,6 @@ def test_codebase_files(tmpdir) -> None:
5959
assert {f for f in codebase.files(extensions=[".bin"])} == {file3}
6060

6161

62-
@pytest.mark.skip("MDX editing is broken")
63-
def test_codebase_edit_mdx(tmpdir) -> None:
64-
"""Editing MDx seems broken currently - it will just prepend to the file"""
65-
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header", "file2.tsx": "console.log('hello, world!')"}) as codebase:
66-
file = codebase.get_file("file1.mdx")
67-
file.edit("NEW TEXT")
68-
codebase.commit()
69-
file = codebase.get_file("file1.mdx")
70-
assert file.content == "NEW TEXT"
71-
72-
73-
@pytest.mark.skip("MDX replacing is broken")
74-
def test_codebase_replace_mdx(tmpdir) -> None:
75-
"""Editing MDx seems broken currently - it will just prepend to the file"""
76-
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header"}) as codebase:
77-
file = codebase.get_file("file1.mdx")
78-
file.replace("# Header", "NEW TEXT")
79-
codebase.commit()
80-
file = codebase.get_file("file1.mdx")
81-
assert file.content == "NEW TEXT"
82-
83-
8462
@pytest.mark.skipif(sys.platform == "darwin", reason="macOS is case-insensitive")
8563
def test_file_extensions_ignore_case(tmpdir) -> None:
8664
with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG", "file4": "Hello world!"}) as codebase:
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import pytest
2+
3+
from codegen.sdk.codebase.factory.get_session import get_codebase_session
4+
from codegen.sdk.core.file import SourceFile
5+
6+
7+
def test_codebase_edit_mdx(tmpdir) -> None:
8+
"""Test editing MDX file content"""
9+
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header", "file2.tsx": "console.log('hello, world!')"}) as codebase:
10+
file = codebase.get_file("file1.mdx")
11+
file.edit("NEW TEXT")
12+
codebase.commit()
13+
file = codebase.get_file("file1.mdx")
14+
assert file.content == "NEW TEXT"
15+
16+
17+
def test_edit_json_file(tmpdir) -> None:
18+
"""Test editing JSON file content"""
19+
with get_codebase_session(tmpdir=tmpdir, files={"config.json": '{"key": "value", "nested": {"foo": "bar"}}'}) as codebase:
20+
file = codebase.get_file("config.json")
21+
22+
# Test complete content replacement
23+
file.edit('{"newKey": "newValue"}')
24+
codebase.commit()
25+
assert file.content == '{"newKey": "newValue"}'
26+
27+
# Test partial content replacement
28+
file.edit('{"newKey": "newValue", "extra": true}')
29+
codebase.commit()
30+
assert file.content == '{"newKey": "newValue", "extra": true}'
31+
32+
33+
def test_edit_txt_file(tmpdir) -> None:
34+
"""Test editing plain text file content"""
35+
with get_codebase_session(tmpdir=tmpdir, files={"data.txt": "Hello\nWorld\nTest"}) as codebase:
36+
file = codebase.get_file("data.txt")
37+
38+
# Test single line replacement
39+
file.edit("New World")
40+
codebase.commit()
41+
assert file.content == "New World"
42+
43+
# Test multiline content
44+
file.edit("Line 1\nLine 2\nLine 3")
45+
codebase.commit()
46+
assert file.content == "Line 1\nLine 2\nLine 3"
47+
48+
49+
def test_codebase_replace_mdx(tmpdir) -> None:
50+
"""Test replacing content in MDX file"""
51+
with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header"}) as codebase:
52+
file = codebase.get_file("file1.mdx")
53+
file.replace("# Header", "NEW TEXT")
54+
codebase.commit()
55+
file = codebase.get_file("file1.mdx")
56+
assert file.content == "NEW TEXT"
57+
58+
59+
def test_replace_non_source_file(tmpdir) -> None:
60+
"""Test replace functionality in non-source files"""
61+
with get_codebase_session(tmpdir=tmpdir, files={"doc.mdx": "# Header\nThis is a test\nMore content", "config.json": '{"test": "value", "other": "test"}'}) as codebase:
62+
# Test single replacement
63+
mdx_file = codebase.get_file("doc.mdx")
64+
mdx_file.replace("test", "demo")
65+
codebase.commit()
66+
assert mdx_file.content == "# Header\nThis is a demo\nMore content"
67+
68+
# Test multiple occurrences
69+
json_file = codebase.get_file("config.json")
70+
json_file.replace("test", "demo")
71+
codebase.commit()
72+
assert json_file.content == '{"demo": "value", "other": "demo"}'
73+
74+
75+
def test_edit_binary_file_fails(tmpdir) -> None:
76+
"""Test that editing binary files raises an error"""
77+
binary_content = bytes([0x89, 0x50, 0x4E, 0x47]) # PNG header
78+
with get_codebase_session(tmpdir=tmpdir, files={"image.png": binary_content}) as codebase:
79+
file = codebase.get_file("image.png")
80+
81+
with pytest.raises(ValueError, match="Cannot replace content in binary files"):
82+
file.edit("new content")
83+
84+
with pytest.raises(ValueError, match="Cannot replace content in binary files"):
85+
file.replace("old", "new")
86+
87+
88+
def test_edit_source_file_preserves_behavior(tmpdir) -> None:
89+
"""Test that source files still use TreeSitter-based editing"""
90+
with get_codebase_session(tmpdir=tmpdir, files={"script.py": "def test():\n print('hello')"}) as codebase:
91+
file = codebase.get_file("script.py")
92+
93+
# Should use TreeSitter node-based editing
94+
file.edit("def test():\n print('world')")
95+
codebase.commit()
96+
assert file.content == "def test():\n print('world')"
97+
98+
# Verify the file is still parseable as Python
99+
assert isinstance(file, SourceFile)
100+
assert file.get_function("test") is not None
101+
102+
103+
def test_transaction_ordering_non_source_files(tmpdir) -> None:
104+
"""Test that transaction ordering works for non-source files"""
105+
with get_codebase_session(tmpdir=tmpdir, files={"doc.md": "# Header\nContent\nFooter"}) as codebase:
106+
file = codebase.get_file("doc.md")
107+
108+
# Apply changes sequentially to avoid transaction conflicts
109+
file.edit("# New Header\nContent\nFooter", priority=1)
110+
codebase.commit()
111+
112+
file.edit("# New Header\nNew Content\nFooter", priority=2)
113+
codebase.commit()
114+
115+
file.edit("# New Header\nNew Content\nNew Footer", priority=0)
116+
codebase.commit()
117+
118+
# Verify final content
119+
assert file.content == "# New Header\nNew Content\nNew Footer"

0 commit comments

Comments
 (0)