Skip to content

feat: message highlighting for links #483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions src/codegen/extensions/tools/link_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Tool for viewing PR contents and modified symbols."""

import re
from enum import StrEnum
from typing import Callable

from codegen import Codebase


class MessageChannel(StrEnum):
LINEAR = "linear"
MARKDOWN = "markdown"
HTML = "html"
SLACK = "slack"


def format_link_linear(name: str, url: str) -> str:
return f"[{name}]({url})"


def format_link_markdown(name: str, url: str) -> str:
return f"[{name}]({url})"


def format_link_html(name: str, url: str) -> str:
return f"<a href='{url}'>{name}</a>"


def format_link_slack(name: str, url: str) -> str:
return f"<{url}|{name}>"


LINK_FORMATS: dict[MessageChannel, Callable[[str, str], str]] = {
"linear": format_link_linear,
"markdown": format_link_markdown,
"html": format_link_html,
"slack": format_link_slack,
}


def clean_github_url(url: str) -> str:
"""Clean a GitHub URL by removing access tokens and standardizing format."""
# Remove access token if present
url = re.sub(r"https://[^@]+@", "https://", url)

# Ensure it starts with standard github.com
if not url.startswith("https://github.com"):
url = "https://github.com" + url.split("github.com")[-1]

return url


def format_link(name: str, url: str | None, format: MessageChannel = MessageChannel.SLACK) -> str:
# Clean the URL if it's a GitHub URL
if url is None:
url = ""
if "github.com" in url:
url = clean_github_url(url)
return LINK_FORMATS[format](name, url)


def extract_code_snippets(message: str) -> list[str]:
"""Find all text wrapped in single backticks, excluding content in code blocks.

Args:
message: The message to process

Returns:
List of strings found between single backticks, excluding those in code blocks
"""
# First remove all code blocks (text between ```)
code_block_pattern = r"```[^`]*```"
message_without_blocks = re.sub(code_block_pattern, "", message)

# Then find all text wrapped in single backticks
matches = re.findall(r"`([^`]+)`", message_without_blocks)
return matches


def is_likely_filepath(text: str) -> bool:
"""Check if a string looks like a filepath."""
# Common file extensions we want to link
extensions = [".py", ".ts", ".tsx", ".jsx", ".js", ".json", ".mdx", ".md", ".yaml", ".yml", ".toml"]

# Check if it contains a slash (path separator)
if "/" in text:
return True

# Check if it ends with a common file extension
return any(text.endswith(ext) for ext in extensions)


def add_links_to_message(message: str, codebase: Codebase, channel: MessageChannel = MessageChannel.SLACK) -> str:
"""Add links to symbols and files in a message.

This function:
1. Links code snippets that match symbol names
2. Links anything that looks like a filepath

Args:
message: The message to process
codebase: The codebase to look up symbols and files in
channel: The message channel format to use

Returns:
The message with appropriate links added
"""
snippets = extract_code_snippets(message)
for snippet in snippets:
# Filepaths
if is_likely_filepath(snippet):
file = codebase.get_file(snippet, optional=True)
if file:
link = format_link(snippet, file.github_url, channel)
message = message.replace(f"`{snippet}`", link)

# Symbols
else:
symbols = codebase.get_symbols(snippet)
# Only link if there's exactly one symbol
if len(symbols) == 1:
link = format_link(symbols[0].name, symbols[0].github_url, channel)
message = message.replace(f"`{snippet}`", link)

return message
6 changes: 5 additions & 1 deletion src/codegen/sdk/core/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@


@apidoc
class File(Editable[None]):

Check failure on line 50 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Type argument "None" of "Editable" must be a subtype of "Editable[Any]" [type-var]
"""Represents a generic file.

Could represent a source file or a non-code file such as a markdown file or image file.
Expand All @@ -74,8 +74,8 @@
parser = get_parser_by_filepath_or_extension(".py")
ts_node = parser.parse(bytes("", "utf-8")).root_node
self._range_index = RangeIndex()
super().__init__(ts_node, getattr(self, "node_id", None), ctx, None)

Check failure on line 77 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 2 to "__init__" of "Editable" has incompatible type "Any | None"; expected "int" [arg-type]
self.path = self.ctx.to_absolute(filepath)

Check failure on line 78 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "__call__" of "_lru_cache_wrapper" has incompatible type "PathLike[Any]"; expected "Hashable" [arg-type]
self.file_path = str(self.ctx.to_relative(self.path))
self.name = self.path.stem
self._directory = None
Expand All @@ -92,7 +92,7 @@
return self.content

@property
def file(self) -> Self:

Check failure on line 95 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Signature of "file" incompatible with supertype "Editable" [override]
"""A property that returns the file object for non-source files.

This is used by Editable.file to work with non-source files, allowing consistent interface usage across both source and non-source files.
Expand All @@ -116,7 +116,7 @@
ctx.io.write_file(path, content)
ctx.io.save_files({path})

new_file = cls(filepath, ctx, ts_node=None, binary=binary)

Check failure on line 119 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "File" has incompatible type "str | Path"; expected "PathLike[Any]" [arg-type]
return new_file

@property
Expand Down Expand Up @@ -221,9 +221,13 @@

@cached_property
@noapidoc
def github_url(self) -> str | None:

Check failure on line 224 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Missing return statement [return]
if self.ctx.base_url:
return self.ctx.base_url + "/" + self.file_path
if self.ctx.base_url.endswith(".git"):
print("HERE")
return self.ctx.base_url.replace(".git", "/blob/develop/") + self.file_path

Check warning on line 228 in src/codegen/sdk/core/file.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/sdk/core/file.py#L226-L228

Added lines #L226 - L228 were not covered by tests
else:
return self.ctx.base_url + "/" + self.file_path

Check warning on line 230 in src/codegen/sdk/core/file.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/sdk/core/file.py#L230

Added line #L230 was not covered by tests

@property
@reader
Expand Down Expand Up @@ -405,7 +409,7 @@


@apidoc
class SourceFile(

Check failure on line 412 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Definition of "node_type" in base class "File" is incompatible with definition in base class "Expression" [misc]
File,
HasBlock,
Usable,
Expand Down Expand Up @@ -453,7 +457,7 @@
def parse(self, ctx: CodebaseContext) -> None:
self.__dict__.pop("_source", None)
# Add self to the graph
self.code_block = self._parse_code_block(self.ts_node)

Check failure on line 460 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible types in assignment (expression has type "Any | None", variable has type "TCodeBlock") [assignment]

self.code_block.parse()
self._parse_imports()
Expand All @@ -470,7 +474,7 @@
def remove_internal_edges(self) -> None:
"""Removes all its direct nodes and edges for each of its internal symbols and imports."""
# ==== [ Classes, Assignments, Function, Interfaces ] ====
for symbol in self.symbols(nested=True):

Check failure on line 477 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Missing positional argument "self" in call to "__call__" of "ProxyProperty" [call-arg]
symbol._remove_internal_edges()

# ==== [ Exports ] ====
Expand All @@ -489,7 +493,7 @@

Returns a list of external import node ids that need to be re-resolved
"""
external_edges_to_resolve = []

Check failure on line 496 in src/codegen/sdk/core/file.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "external_edges_to_resolve" (hint: "external_edges_to_resolve: list[<type>] = ...") [var-annotated]

# Collect node ids of all the file's nested children and itself to remove
node_ids_to_remove = set()
Expand Down
Empty file.
Empty file.
194 changes: 194 additions & 0 deletions tests/unit/codegen/extensions/test_message_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""Tests for message annotation functionality."""

import pytest

from codegen.extensions.tools.link_annotation import (
MessageChannel,
add_links_to_message,
extract_code_snippets,
format_link,
is_likely_filepath,
)
from codegen.sdk.codebase.factory.get_session import get_codebase_session


@pytest.fixture
def codebase(tmpdir):
"""Create a simple codebase for testing."""
# language=python
content = """
def hello():
print("Hello, world!")

class Greeter:
def greet(self):
hello()

def hello_duplicate():
pass

def hello_duplicate():
pass

class Greeter_duplicate:
pass
"""
# Create multiple files to test file linking
files = {
"src/main.py": content,
"src/utils/helpers.py": "# Helper functions",
"docs/README.md": "# Documentation",
"tsconfig.json": "{}",
}
with get_codebase_session(tmpdir=tmpdir, files=files) as codebase:
yield codebase


def test_format_link_linear():
"""Test linear link formatting."""
assert format_link("test", "http://example.com", MessageChannel.LINEAR) == "[test](http://example.com)"


def test_format_link_markdown():
"""Test markdown link formatting."""
assert format_link("test", "http://example.com", MessageChannel.MARKDOWN) == "[test](http://example.com)"


def test_format_link_html():
"""Test HTML link formatting."""
assert format_link("test", "http://example.com", MessageChannel.HTML) == "<a href='http://example.com'>test</a>"


def test_format_link_slack():
"""Test Slack link formatting."""
assert format_link("test", "http://example.com", MessageChannel.SLACK) == "<http://example.com|test>"


def test_extract_code_snippets():
"""Test extracting code snippets from messages."""
message = "Here is some `code` and `more code` and ```a code block``` and `final code`"
snippets = extract_code_snippets(message)
assert snippets == ["code", "more code", "final code"]


def test_is_likely_filepath():
"""Test filepath detection."""
# Should detect paths with slashes
assert is_likely_filepath("src/file.py")
assert is_likely_filepath("path/to/file")

# Should detect common extensions
assert is_likely_filepath("file.py")
assert is_likely_filepath("component.tsx")
assert is_likely_filepath("config.json")
assert is_likely_filepath("README.md")

# Should not detect regular words
assert not is_likely_filepath("hello")
assert not is_likely_filepath("Greeter")
assert not is_likely_filepath("function")


def test_add_links_single_symbol(codebase):
"""Test adding links for a single symbol."""
message = "Here is the `hello` function"
result = add_links_to_message(message, codebase, channel=MessageChannel.SLACK)
assert "|hello>" in result


def test_add_links_class(codebase):
"""Test adding links for a class."""
message = "The `Greeter` class"
result = add_links_to_message(message, codebase)
assert "Greeter" in result
assert result.count("<") == 1 # One link should be created


def test_add_links_filepath(codebase):
"""Test adding links for filepaths."""
message = "Check out `src/main.py` and `src/utils/helpers.py`"
result = add_links_to_message(message, codebase)
assert "|src/main.py>" in result
assert "|src/utils/helpers.py>" in result


def test_add_links_filepath_with_extension(codebase):
"""Test adding links for files with common extensions."""
message = "See `tsconfig.json` and `docs/README.md`"
result = add_links_to_message(message, codebase)
assert "|tsconfig.json>" in result
assert "|docs/README.md>" in result


def test_nonexistent_filepath(codebase):
"""Test handling of nonexistent filepaths."""
message = "This `src/nonexistent.py` should not be linked"
result = add_links_to_message(message, codebase)
assert result == message # Message should remain unchanged


def test_ignore_code_blocks(codebase):
"""Test that code blocks are ignored."""
message = """Here's a code block:
```python
def hello():
print("Hello!")
```
And here's an inline `hello` reference."""

result = add_links_to_message(message, codebase)
# The inline reference should be linked
assert "<" in result
# But the code block should remain unchanged
assert "```python" in result
assert "def hello():" in result


def test_nonexistent_symbol(codebase):
"""Test handling of nonexistent symbols."""
message = "This `nonexistent_function` should not be linked"
result = add_links_to_message(message, codebase)
assert result == message # Message should remain unchanged


def test_duplicate_symbols(codebase):
"""Test handling of duplicate symbols."""
message = "This `hello_duplicate` should not be linked"
result = add_links_to_message(message, codebase)
assert result == message # Message should remain unchanged


def test_mixed_content(codebase):
"""Test message with mixed content types."""
message = """Here's a complex message:
- Valid symbol: `hello`
- Valid file: `src/main.py`
- Invalid symbol: `nonexistent`
- Invalid file: `src/nonexistent.py`
- Code block:
```python
def hello():
pass
```
- Duplicate symbol: `hello_duplicate`
- Another valid symbol: `Greeter`
- Another valid file: `docs/README.md`
"""
result = add_links_to_message(message, codebase)

# Valid symbols should be linked
assert "|hello>" in result
assert "|Greeter>" in result

# Valid files should be linked
assert "|src/main.py>" in result
assert "|docs/README.md>" in result

# Invalid symbols and files should remain as-is
assert "`nonexistent`" in result
assert "`src/nonexistent.py`" in result
assert "`hello_duplicate`" in result

# Code block should be preserved
assert "```python" in result
assert "def hello():" in result