Skip to content

feat: Paginated view file tool #566

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions src/codegen/extensions/langchain/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,41 @@
"""Input for viewing a file."""

filepath: str = Field(..., description="Path to the file relative to workspace root")
start_line: Optional[int] = Field(None, description="Starting line number to view (1-indexed, inclusive)")
end_line: Optional[int] = Field(None, description="Ending line number to view (1-indexed, inclusive)")
max_lines: Optional[int] = Field(None, description="Maximum number of lines to view at once, defaults to 250")
line_numbers: Optional[bool] = Field(True, description="If True, add line numbers to the content (1-indexed)")


class ViewFileTool(BaseTool):
"""Tool for viewing file contents and metadata."""

name: ClassVar[str] = "view_file"

Check failure on line 57 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
description: ClassVar[str] = "View the contents and metadata of a file in the codebase"
description: ClassVar[str] = """View the contents and metadata of a file in the codebase.

Check failure on line 58 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
For large files (>250 lines), content will be paginated. Use start_line and end_line to navigate through the file.
The response will indicate if there are more lines available to view."""
args_schema: ClassVar[type[BaseModel]] = ViewFileInput

Check failure on line 61 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
super().__init__(codebase=codebase)

def _run(self, filepath: str) -> str:
result = view_file(self.codebase, filepath)
def _run(
self,
filepath: str,
start_line: Optional[int] = None,
end_line: Optional[int] = None,
max_lines: Optional[int] = None,
line_numbers: Optional[bool] = True,
) -> str:
result = view_file(
self.codebase,
filepath,
line_numbers=line_numbers if line_numbers is not None else True,
start_line=start_line,
end_line=end_line,
max_lines=max_lines if max_lines is not None else 250,
)
return result.render()


Expand All @@ -73,9 +93,9 @@
class ListDirectoryTool(BaseTool):
"""Tool for listing directory contents."""

name: ClassVar[str] = "list_directory"

Check failure on line 96 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
description: ClassVar[str] = "List contents of a directory in the codebase"

Check failure on line 97 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
args_schema: ClassVar[type[BaseModel]] = ListDirectoryInput

Check failure on line 98 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
Expand All @@ -96,9 +116,9 @@
class SearchTool(BaseTool):
"""Tool for searching the codebase."""

name: ClassVar[str] = "search"

Check failure on line 119 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
description: ClassVar[str] = "Search the codebase using text search"

Check failure on line 120 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
args_schema: ClassVar[type[BaseModel]] = SearchInput

Check failure on line 121 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
codebase: Codebase = Field(exclude=True)

def __init__(self, codebase: Codebase) -> None:
Expand All @@ -119,7 +139,7 @@
class EditFileTool(BaseTool):
"""Tool for editing files."""

name: ClassVar[str] = "edit_file"

Check failure on line 142 in src/codegen/extensions/langchain/tools.py

View workflow job for this annotation

GitHub Actions / mypy

error: Cannot override instance variable (previously declared on base class "BaseTool") with class variable [misc]
description: ClassVar[str] = "Edit a file by replacing its entire content. This tool should only be used for replacing entire file contents."
args_schema: ClassVar[type[BaseModel]] = EditFileInput
codebase: Codebase = Field(exclude=True)
Expand Down
98 changes: 88 additions & 10 deletions src/codegen/extensions/tools/view_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,40 @@ class ViewFileObservation(Observation):
default=None,
description="Number of lines in the file",
)
start_line: Optional[int] = Field(
default=None,
description="Starting line number of the content (1-indexed)",
)
end_line: Optional[int] = Field(
default=None,
description="Ending line number of the content (1-indexed)",
)
has_more: Optional[bool] = Field(
default=None,
description="Whether there are more lines after end_line",
)
max_lines_per_page: Optional[int] = Field(
default=None,
description="Maximum number of lines that can be viewed at once",
)

str_template: ClassVar[str] = "File {filepath} ({line_count} lines)"
str_template: ClassVar[str] = "File {filepath} (showing lines {start_line}-{end_line} of {line_count})"

def render(self) -> str:
return f"""[VIEW FILE]: {self.filepath} ({self.line_count} lines)
{self.content}
"""
"""Render the file view with pagination information if applicable."""
header = f"[VIEW FILE]: {self.filepath}"
if self.line_count is not None:
header += f" ({self.line_count} lines total)"

if self.start_line is not None and self.end_line is not None:
header += f"\nShowing lines {self.start_line}-{self.end_line}"
if self.has_more:
header += f" (more lines available, max {self.max_lines_per_page} lines per page)"

if not self.content:
return f"{header}\n<empty content>"

return f"{header}\n\n{self.content}"


def add_line_numbers(content: str) -> str:
Expand All @@ -45,13 +72,23 @@ def add_line_numbers(content: str) -> str:
return "\n".join(f"{i + 1:>{width}}|{line}" for i, line in enumerate(lines))


def view_file(codebase: Codebase, filepath: str, line_numbers: bool = True) -> ViewFileObservation:
def view_file(
codebase: Codebase,
filepath: str,
line_numbers: bool = True,
start_line: Optional[int] = None,
end_line: Optional[int] = None,
max_lines: int = 250,
) -> ViewFileObservation:
"""View the contents and metadata of a file.

Args:
codebase: The codebase to operate on
filepath: Path to the file relative to workspace root
line_numbers: If True, add line numbers to the content (1-indexed)
start_line: Starting line number to view (1-indexed, inclusive)
end_line: Ending line number to view (1-indexed, inclusive)
max_lines: Maximum number of lines to view at once, defaults to 250
"""
try:
file = codebase.get_file(filepath)
Expand All @@ -62,15 +99,56 @@ def view_file(codebase: Codebase, filepath: str, line_numbers: bool = True) -> V
filepath=filepath,
content="",
line_count=0,
start_line=start_line,
end_line=end_line,
has_more=False,
max_lines_per_page=max_lines,
)

content = file.content
if line_numbers:
content = add_line_numbers(content)
# Split content into lines and get total line count
lines = file.content.splitlines()
total_lines = len(lines)

# If no start_line specified, start from beginning
if start_line is None:
start_line = 1

# Ensure start_line is within bounds
start_line = max(1, min(start_line, total_lines))

# If no end_line specified, show up to max_lines from start
if end_line is None:
end_line = min(start_line + max_lines - 1, total_lines)
else:
# Ensure end_line is within bounds and doesn't exceed max_lines from start
end_line = min(end_line, total_lines, start_line + max_lines - 1)

return ViewFileObservation(
# Extract the requested lines (convert to 0-based indexing)
content_lines = lines[start_line - 1 : end_line]
content = "\n".join(content_lines)

# Add line numbers if requested
if line_numbers:
# Pass the actual line numbers for proper numbering
numbered_lines = []
width = len(str(total_lines)) # Use total_lines for consistent width
for i, line in enumerate(content_lines, start=start_line):
numbered_lines.append(f"{i:>{width}}|{line}")
content = "\n".join(numbered_lines)

# Create base observation with common fields
observation = ViewFileObservation(
status="success",
filepath=file.filepath,
content=content,
line_count=len(content.splitlines()),
line_count=total_lines,
)

# Only include pagination fields if file exceeds max_lines
if total_lines > max_lines:
observation.start_line = start_line
observation.end_line = end_line
observation.has_more = end_line < total_lines
observation.max_lines_per_page = max_lines

return observation
148 changes: 148 additions & 0 deletions tests/unit/codegen/extensions/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,160 @@ def greet(self):
yield codebase


@pytest.fixture
def large_codebase(tmpdir):
"""Create a codebase with a large file for pagination testing."""
# Create a large file with predictable content
large_file_lines = []
# Add imports at the top
large_file_lines.extend(
[
"from __future__ import annotations",
"import sys",
"import os",
"from typing import List, Optional, Dict",
"",
"# Constants",
"MAX_ITEMS = 100",
"DEBUG = False",
"",
"# Main class definition",
"class LargeClass:",
]
)

# Add methods with incrementing numbers
for i in range(1, 401): # This will create a 400+ line file
if i % 20 == 0:
# Add some class methods periodically
large_file_lines.extend([" @classmethod", f" def class_method_{i}(cls) -> None:", f" print('Class method {i}')", " return None", ""])
else:
# Add regular methods
large_file_lines.extend(
[
f" def method_{i}(self, param_{i}: int) -> str:",
f" # Method {i} does something interesting",
f" value = param_{i} * {i}",
f" return f'Method {i} computed: {{value}}'",
"",
]
)

large_file_content = "\n".join(large_file_lines)

files = {
"src/main.py": """
def hello():
print("Hello, world!")
""",
"src/large_file.py": large_file_content,
}

with get_codebase_session(tmpdir=tmpdir, files=files) as codebase:
yield codebase


def test_view_file(codebase):
"""Test viewing a file."""
# Test basic file viewing
result = view_file(codebase, "src/main.py")
assert result.status == "success"
assert result.filepath == "src/main.py"
assert "hello()" in result.content
# For small files, pagination fields should not be present
assert result.start_line is None
assert result.end_line is None
assert result.has_more is None
assert result.max_lines_per_page is None


def test_view_file_pagination(large_codebase):
"""Test viewing a file with pagination."""
# Test default pagination (should show first max_lines lines)
result = view_file(large_codebase, "src/large_file.py")
assert result.status == "success"
assert result.start_line == 1
assert result.end_line == 250 # Default max_lines
assert result.has_more is True
assert result.max_lines_per_page == 250
assert "from __future__ import annotations" in result.content # First line
assert "def method_1" in result.content # Early method
assert "def method_251" not in result.content # Method after page 1

# Test custom pagination range
result = view_file(large_codebase, "src/large_file.py", start_line=200, end_line=250)
assert result.status == "success"
assert result.start_line == 200
assert result.end_line == 250
assert result.has_more is True
assert "def method_39" in result.content # Regular method before class method
assert "def class_method_40" in result.content # Class method at 40
assert "def method_41" in result.content # Regular method after class method
assert "from __future__ import annotations" not in result.content # Before range
assert "def method_251" not in result.content # After range

# Test viewing end of file
result = view_file(large_codebase, "src/large_file.py", start_line=350)
assert result.status == "success"
assert result.start_line == 350
assert result.has_more is True # File has 2010 lines, so there should be more content
assert "def method_69" in result.content # Regular method
assert "def class_method_80" in result.content # Class method at 80
assert result.end_line == 599 # Should show 250 lines from start (350 to 599)

# Test custom max_lines
result = view_file(large_codebase, "src/large_file.py", max_lines=100)
assert result.status == "success"
assert result.start_line == 1
assert result.end_line == 100
assert result.has_more is True
assert result.max_lines_per_page == 100
assert "from __future__ import annotations" in result.content
assert len(result.content.splitlines()) <= 100

# Test line numbers display
result = view_file(large_codebase, "src/large_file.py", start_line=198, end_line=202, line_numbers=True)
assert result.status == "success"
assert "198|" in result.content
assert "199|" in result.content
assert "200|" in result.content
assert "201|" in result.content
assert "202|" in result.content

# Test without line numbers
result = view_file(large_codebase, "src/large_file.py", start_line=198, end_line=202, line_numbers=False)
assert result.status == "success"
assert "198|" not in result.content
assert "199|" not in result.content


def test_view_file_pagination_edge_cases(large_codebase):
"""Test edge cases for file pagination."""
# Test start_line > end_line (should respect provided end_line)
result = view_file(large_codebase, "src/large_file.py", start_line=200, end_line=100)
assert result.status == "success"
assert result.start_line == 200
assert result.end_line == 100 # Should respect provided end_line
assert result.content == "" # No content since end_line < start_line

# Test start_line > file length (should adjust to valid range)
result = view_file(large_codebase, "src/large_file.py", start_line=2000)
assert result.status == "success"
assert result.start_line == 2000 # Should use provided start_line
assert result.end_line == 2010 # Should adjust to total lines
assert result.has_more is False

# Test end_line > file length (should truncate to file length)
result = view_file(large_codebase, "src/large_file.py", start_line=200, end_line=2000)
assert result.status == "success"
assert result.start_line == 200
assert result.end_line == min(200 + 250 - 1, 2010) # Should respect max_lines and file length

# Test negative start_line (should default to 1)
result = view_file(large_codebase, "src/large_file.py", start_line=-10)
assert result.status == "success"
assert result.start_line == 1
assert result.end_line == 250


def test_list_directory(codebase):
Expand Down
Loading