Skip to content

Commit 9de8877

Browse files
authored
feat: message highlighting for links (#483)
1 parent bcaba53 commit 9de8877

File tree

5 files changed

+324
-1
lines changed

5 files changed

+324
-1
lines changed
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
"""Tool for viewing PR contents and modified symbols."""
2+
3+
import re
4+
from enum import StrEnum
5+
from typing import Callable
6+
7+
from codegen import Codebase
8+
9+
10+
class MessageChannel(StrEnum):
11+
LINEAR = "linear"
12+
MARKDOWN = "markdown"
13+
HTML = "html"
14+
SLACK = "slack"
15+
16+
17+
def format_link_linear(name: str, url: str) -> str:
18+
return f"[{name}]({url})"
19+
20+
21+
def format_link_markdown(name: str, url: str) -> str:
22+
return f"[{name}]({url})"
23+
24+
25+
def format_link_html(name: str, url: str) -> str:
26+
return f"<a href='{url}'>{name}</a>"
27+
28+
29+
def format_link_slack(name: str, url: str) -> str:
30+
return f"<{url}|{name}>"
31+
32+
33+
LINK_FORMATS: dict[MessageChannel, Callable[[str, str], str]] = {
34+
"linear": format_link_linear,
35+
"markdown": format_link_markdown,
36+
"html": format_link_html,
37+
"slack": format_link_slack,
38+
}
39+
40+
41+
def clean_github_url(url: str) -> str:
42+
"""Clean a GitHub URL by removing access tokens and standardizing format."""
43+
# Remove access token if present
44+
url = re.sub(r"https://[^@]+@", "https://", url)
45+
46+
# Ensure it starts with standard github.com
47+
if not url.startswith("https://github.com"):
48+
url = "https://github.com" + url.split("github.com")[-1]
49+
50+
return url
51+
52+
53+
def format_link(name: str, url: str | None, format: MessageChannel = MessageChannel.SLACK) -> str:
54+
# Clean the URL if it's a GitHub URL
55+
if url is None:
56+
url = ""
57+
if "github.com" in url:
58+
url = clean_github_url(url)
59+
return LINK_FORMATS[format](name, url)
60+
61+
62+
def extract_code_snippets(message: str) -> list[str]:
63+
"""Find all text wrapped in single backticks, excluding content in code blocks.
64+
65+
Args:
66+
message: The message to process
67+
68+
Returns:
69+
List of strings found between single backticks, excluding those in code blocks
70+
"""
71+
# First remove all code blocks (text between ```)
72+
code_block_pattern = r"```[^`]*```"
73+
message_without_blocks = re.sub(code_block_pattern, "", message)
74+
75+
# Then find all text wrapped in single backticks
76+
matches = re.findall(r"`([^`]+)`", message_without_blocks)
77+
return matches
78+
79+
80+
def is_likely_filepath(text: str) -> bool:
81+
"""Check if a string looks like a filepath."""
82+
# Common file extensions we want to link
83+
extensions = [".py", ".ts", ".tsx", ".jsx", ".js", ".json", ".mdx", ".md", ".yaml", ".yml", ".toml"]
84+
85+
# Check if it contains a slash (path separator)
86+
if "/" in text:
87+
return True
88+
89+
# Check if it ends with a common file extension
90+
return any(text.endswith(ext) for ext in extensions)
91+
92+
93+
def add_links_to_message(message: str, codebase: Codebase, channel: MessageChannel = MessageChannel.SLACK) -> str:
94+
"""Add links to symbols and files in a message.
95+
96+
This function:
97+
1. Links code snippets that match symbol names
98+
2. Links anything that looks like a filepath
99+
100+
Args:
101+
message: The message to process
102+
codebase: The codebase to look up symbols and files in
103+
channel: The message channel format to use
104+
105+
Returns:
106+
The message with appropriate links added
107+
"""
108+
snippets = extract_code_snippets(message)
109+
for snippet in snippets:
110+
# Filepaths
111+
if is_likely_filepath(snippet):
112+
file = codebase.get_file(snippet, optional=True)
113+
if file:
114+
link = format_link(snippet, file.github_url, channel)
115+
message = message.replace(f"`{snippet}`", link)
116+
117+
# Symbols
118+
else:
119+
symbols = codebase.get_symbols(snippet)
120+
# Only link if there's exactly one symbol
121+
if len(symbols) == 1:
122+
link = format_link(symbols[0].name, symbols[0].github_url, channel)
123+
message = message.replace(f"`{snippet}`", link)
124+
125+
return message

src/codegen/sdk/core/file.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,11 @@ def owners(self) -> set[str]:
223223
@noapidoc
224224
def github_url(self) -> str | None:
225225
if self.ctx.base_url:
226-
return self.ctx.base_url + "/" + self.file_path
226+
if self.ctx.base_url.endswith(".git"):
227+
print("HERE")
228+
return self.ctx.base_url.replace(".git", "/blob/develop/") + self.file_path
229+
else:
230+
return self.ctx.base_url + "/" + self.file_path
227231

228232
@property
229233
@reader

tests/unit/codegen/extensions/langchain/__init__.py

Whitespace-only changes.

tests/unit/codegen/extensions/langchain/test_agent.py

Whitespace-only changes.
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""Tests for message annotation functionality."""
2+
3+
import pytest
4+
5+
from codegen.extensions.tools.link_annotation import (
6+
MessageChannel,
7+
add_links_to_message,
8+
extract_code_snippets,
9+
format_link,
10+
is_likely_filepath,
11+
)
12+
from codegen.sdk.codebase.factory.get_session import get_codebase_session
13+
14+
15+
@pytest.fixture
16+
def codebase(tmpdir):
17+
"""Create a simple codebase for testing."""
18+
# language=python
19+
content = """
20+
def hello():
21+
print("Hello, world!")
22+
23+
class Greeter:
24+
def greet(self):
25+
hello()
26+
27+
def hello_duplicate():
28+
pass
29+
30+
def hello_duplicate():
31+
pass
32+
33+
class Greeter_duplicate:
34+
pass
35+
"""
36+
# Create multiple files to test file linking
37+
files = {
38+
"src/main.py": content,
39+
"src/utils/helpers.py": "# Helper functions",
40+
"docs/README.md": "# Documentation",
41+
"tsconfig.json": "{}",
42+
}
43+
with get_codebase_session(tmpdir=tmpdir, files=files) as codebase:
44+
yield codebase
45+
46+
47+
def test_format_link_linear():
48+
"""Test linear link formatting."""
49+
assert format_link("test", "http://example.com", MessageChannel.LINEAR) == "[test](http://example.com)"
50+
51+
52+
def test_format_link_markdown():
53+
"""Test markdown link formatting."""
54+
assert format_link("test", "http://example.com", MessageChannel.MARKDOWN) == "[test](http://example.com)"
55+
56+
57+
def test_format_link_html():
58+
"""Test HTML link formatting."""
59+
assert format_link("test", "http://example.com", MessageChannel.HTML) == "<a href='http://example.com'>test</a>"
60+
61+
62+
def test_format_link_slack():
63+
"""Test Slack link formatting."""
64+
assert format_link("test", "http://example.com", MessageChannel.SLACK) == "<http://example.com|test>"
65+
66+
67+
def test_extract_code_snippets():
68+
"""Test extracting code snippets from messages."""
69+
message = "Here is some `code` and `more code` and ```a code block``` and `final code`"
70+
snippets = extract_code_snippets(message)
71+
assert snippets == ["code", "more code", "final code"]
72+
73+
74+
def test_is_likely_filepath():
75+
"""Test filepath detection."""
76+
# Should detect paths with slashes
77+
assert is_likely_filepath("src/file.py")
78+
assert is_likely_filepath("path/to/file")
79+
80+
# Should detect common extensions
81+
assert is_likely_filepath("file.py")
82+
assert is_likely_filepath("component.tsx")
83+
assert is_likely_filepath("config.json")
84+
assert is_likely_filepath("README.md")
85+
86+
# Should not detect regular words
87+
assert not is_likely_filepath("hello")
88+
assert not is_likely_filepath("Greeter")
89+
assert not is_likely_filepath("function")
90+
91+
92+
def test_add_links_single_symbol(codebase):
93+
"""Test adding links for a single symbol."""
94+
message = "Here is the `hello` function"
95+
result = add_links_to_message(message, codebase, channel=MessageChannel.SLACK)
96+
assert "|hello>" in result
97+
98+
99+
def test_add_links_class(codebase):
100+
"""Test adding links for a class."""
101+
message = "The `Greeter` class"
102+
result = add_links_to_message(message, codebase)
103+
assert "Greeter" in result
104+
assert result.count("<") == 1 # One link should be created
105+
106+
107+
def test_add_links_filepath(codebase):
108+
"""Test adding links for filepaths."""
109+
message = "Check out `src/main.py` and `src/utils/helpers.py`"
110+
result = add_links_to_message(message, codebase)
111+
assert "|src/main.py>" in result
112+
assert "|src/utils/helpers.py>" in result
113+
114+
115+
def test_add_links_filepath_with_extension(codebase):
116+
"""Test adding links for files with common extensions."""
117+
message = "See `tsconfig.json` and `docs/README.md`"
118+
result = add_links_to_message(message, codebase)
119+
assert "|tsconfig.json>" in result
120+
assert "|docs/README.md>" in result
121+
122+
123+
def test_nonexistent_filepath(codebase):
124+
"""Test handling of nonexistent filepaths."""
125+
message = "This `src/nonexistent.py` should not be linked"
126+
result = add_links_to_message(message, codebase)
127+
assert result == message # Message should remain unchanged
128+
129+
130+
def test_ignore_code_blocks(codebase):
131+
"""Test that code blocks are ignored."""
132+
message = """Here's a code block:
133+
```python
134+
def hello():
135+
print("Hello!")
136+
```
137+
And here's an inline `hello` reference."""
138+
139+
result = add_links_to_message(message, codebase)
140+
# The inline reference should be linked
141+
assert "<" in result
142+
# But the code block should remain unchanged
143+
assert "```python" in result
144+
assert "def hello():" in result
145+
146+
147+
def test_nonexistent_symbol(codebase):
148+
"""Test handling of nonexistent symbols."""
149+
message = "This `nonexistent_function` should not be linked"
150+
result = add_links_to_message(message, codebase)
151+
assert result == message # Message should remain unchanged
152+
153+
154+
def test_duplicate_symbols(codebase):
155+
"""Test handling of duplicate symbols."""
156+
message = "This `hello_duplicate` should not be linked"
157+
result = add_links_to_message(message, codebase)
158+
assert result == message # Message should remain unchanged
159+
160+
161+
def test_mixed_content(codebase):
162+
"""Test message with mixed content types."""
163+
message = """Here's a complex message:
164+
- Valid symbol: `hello`
165+
- Valid file: `src/main.py`
166+
- Invalid symbol: `nonexistent`
167+
- Invalid file: `src/nonexistent.py`
168+
- Code block:
169+
```python
170+
def hello():
171+
pass
172+
```
173+
- Duplicate symbol: `hello_duplicate`
174+
- Another valid symbol: `Greeter`
175+
- Another valid file: `docs/README.md`
176+
"""
177+
result = add_links_to_message(message, codebase)
178+
179+
# Valid symbols should be linked
180+
assert "|hello>" in result
181+
assert "|Greeter>" in result
182+
183+
# Valid files should be linked
184+
assert "|src/main.py>" in result
185+
assert "|docs/README.md>" in result
186+
187+
# Invalid symbols and files should remain as-is
188+
assert "`nonexistent`" in result
189+
assert "`src/nonexistent.py`" in result
190+
assert "`hello_duplicate`" in result
191+
192+
# Code block should be preserved
193+
assert "```python" in result
194+
assert "def hello():" in result

0 commit comments

Comments
 (0)