Skip to content

feat: Tooling fixes #598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions src/codegen/extensions/langchain/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,7 @@ class GithubCreatePRReviewCommentInput(BaseModel):
body: str = Field(..., description="The comment text")
commit_sha: str = Field(..., description="The commit SHA to attach the comment to")
path: str = Field(..., description="The file path to comment on")
line: int | None = Field(None, description="The line number to comment on")
side: str | None = Field(None, description="Which version of the file to comment on ('LEFT' or 'RIGHT')")
line: int = Field(..., description="The line number to comment on use the indices from the diff")
start_line: int | None = Field(None, description="For multi-line comments, the starting line")


Expand All @@ -515,8 +514,7 @@ def _run(
body: str,
commit_sha: str,
path: str,
line: int | None = None,
side: str | None = None,
line: int,
start_line: int | None = None,
) -> str:
result = create_pr_review_comment(
Expand All @@ -526,8 +524,6 @@ def _run(
commit_sha=commit_sha,
path=path,
line=line,
side=side,
start_line=start_line,
)
return result.render()

Expand Down
23 changes: 11 additions & 12 deletions src/codegen/extensions/tools/github/create_pr_review_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@ class PRReviewCommentObservation(Observation):
pr_number: int = Field(
description="PR number the comment was added to",
)
body: str = Field(
description="Content of the comment",
)
commit_sha: str = Field(
description="Commit SHA the comment was added to",
)
path: str = Field(
description="File path the comment was added to",
)
line: Optional[int] = Field(
default=None,
line: int = Field(
description="Line number the comment was added to",
)
body: str = Field(
description="Content of the comment",
)

str_template: ClassVar[str] = "Added review comment to PR #{pr_number} at {path}:{line}"


Expand All @@ -35,8 +36,7 @@ def create_pr_review_comment(
body: str,
commit_sha: str,
path: str,
line: Optional[int] = None,
side: Optional[str] = None,
line: int,
start_line: Optional[int] = None,
) -> PRReviewCommentObservation:
"""Create an inline review comment on a specific line in a pull request.
Expand All @@ -48,8 +48,6 @@ def create_pr_review_comment(
commit_sha: The commit SHA to attach the comment to
path: The file path to comment on
line: The line number to comment on
side: Which version of the file to comment on ('LEFT' or 'RIGHT')
start_line: For multi-line comments, the starting line
"""
try:
codebase.create_pr_review_comment(
Expand All @@ -58,15 +56,15 @@ def create_pr_review_comment(
commit_sha=commit_sha,
path=path,
line=line,
side=side,
start_line=start_line,
side="RIGHT",
)
return PRReviewCommentObservation(
status="success",
pr_number=pr_number,
path=path,
line=line,
body=body,
commit_sha=commit_sha,
)
except Exception as e:
return PRReviewCommentObservation(
Expand All @@ -76,4 +74,5 @@ def create_pr_review_comment(
path=path,
line=line,
body=body,
commit_sha=commit_sha,
)
7 changes: 6 additions & 1 deletion src/codegen/extensions/tools/github/view_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ class ViewPRObservation(Observation):
patch: str = Field(
description="The PR's patch/diff content",
)
file_commit_sha: dict[str, str] = Field(
description="Commit SHAs for each file in the PR",
)

str_template: ClassVar[str] = "PR #{pr_id}"

Expand All @@ -30,12 +33,13 @@ def view_pr(codebase: Codebase, pr_id: int) -> ViewPRObservation:
pr_id: Number of the PR to get the contents for
"""
try:
modified_symbols, patch = codebase.get_modified_symbols_in_pr(pr_id)
patch, file_commit_sha = codebase.get_modified_symbols_in_pr(pr_id)

return ViewPRObservation(
status="success",
pr_id=pr_id,
patch=patch,
file_commit_sha=file_commit_sha,
)

except Exception as e:
Expand All @@ -44,4 +48,5 @@ def view_pr(codebase: Codebase, pr_id: int) -> ViewPRObservation:
error=f"Failed to view PR: {e!s}",
pr_id=pr_id,
patch="",
file_commit_sha={},
)
3 changes: 1 addition & 2 deletions src/codegen/git/repo_operator/repo_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
email_level = None
levels = ["system", "global", "user", "repository"]
for level in levels:
with git_cli.config_reader(level) as reader:

Check failure on line 145 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "config_reader" of "Repo" has incompatible type "str"; expected "Literal['system', 'global', 'user', 'repository'] | None" [arg-type]
if reader.has_option("user", "name") and not username:
username = reader.get("user", "name")
user_level = level
Expand Down Expand Up @@ -554,7 +554,7 @@
return content
except UnicodeDecodeError:
print(f"Warning: Unable to decode file {file_path}. Skipping.")
return None

Check failure on line 557 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "None", expected "str") [return-value]

def write_file(self, relpath: str, content: str) -> None:
"""Writes file content to disk"""
Expand Down Expand Up @@ -621,7 +621,7 @@
filepaths = self.get_filepaths_for_repo(ignore_list)
# Iterate through files and yield contents
for rel_filepath in filepaths:
rel_filepath: str

Check failure on line 624 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 623 [no-redef]
filepath = os.path.join(self.repo_path, rel_filepath)

# Filter by subdirectory (includes full filenames)
Expand Down Expand Up @@ -652,7 +652,7 @@
list_files = []

for rel_filepath in self.git_cli.git.ls_files().split("\n"):
rel_filepath: str

Check failure on line 655 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 654 [no-redef]
if subdirs and not any(d in rel_filepath for d in subdirs):
continue
if extensions is None or any(rel_filepath.endswith(e) for e in extensions):
Expand All @@ -676,7 +676,7 @@

def get_modified_files_in_last_n_days(self, days: int = 1) -> tuple[list[str], list[str]]:
"""Returns a list of files modified and deleted in the last n days"""
modified_files = []

Check failure on line 679 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "modified_files" (hint: "modified_files: list[<type>] = ...") [var-annotated]
deleted_files = []
allowed_extensions = [".py"]

Expand All @@ -692,9 +692,9 @@
if file in modified_files:
modified_files.remove(file)
else:
if file not in modified_files and file[-3:] in allowed_extensions:

Check failure on line 695 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Value of type "str | PathLike[str]" is not indexable [index]
modified_files.append(file)
return modified_files, deleted_files

Check failure on line 697 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "tuple[list[str | PathLike[str]], list[str | PathLike[str]]]", expected "tuple[list[str], list[str]]") [return-value]

@cached_property
def base_url(self) -> str | None:
Expand All @@ -715,7 +715,7 @@

def get_pr_data(self, pr_number: int) -> dict:
"""Returns the data associated with a PR"""
return self.remote_git_repo.get_pr_data(pr_number)

Check failure on line 718 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: "GitRepoClient" has no attribute "get_pr_data" [attr-defined]

def create_pr_comment(self, pr_number: int, body: str) -> None:
"""Create a general comment on a pull request.
Expand All @@ -735,7 +735,7 @@
commit_sha: str,
path: str,
line: int | None = None,
side: str | None = None,
side: str = "RIGHT",
start_line: int | None = None,
) -> None:
"""Create an inline review comment on a specific line in a pull request.
Expand All @@ -758,9 +758,8 @@
body=body,
commit=commit,
path=path,
line=line,

Check failure on line 761 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument "line" to "create_review_comment" of "GitRepoClient" has incompatible type "int | None"; expected "int | _NotSetType" [arg-type]
side=side,
start_line=start_line,
)

def get_pull_request(self, pr_number: int) -> PullRequest | None:
Expand Down
47 changes: 47 additions & 0 deletions src/codegen/git/utils/pr_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from codegen.git.repo_operator.repo_operator import RepoOperator

if TYPE_CHECKING:
from codegen.sdk.core.codebase import Codebase, Editable, File, Symbol

Check failure on line 12 in src/codegen/git/utils/pr_review.py

View workflow job for this annotation

GitHub Actions / mypy

error: Module "codegen.sdk.core.codebase" has no attribute "Codebase" [attr-defined]


def get_merge_base(git_repo_client: Repository, pull: PullRequest | PullRequestContext) -> str:
Expand Down Expand Up @@ -73,6 +73,45 @@
return max(range1.start, range2.start) < min(range1.stop, range2.stop)


def get_file_to_commit_sha(op: RepoOperator, pull: PullRequest) -> dict[str, str]:
"""Gets a mapping of file paths to their latest commit SHA in the PR.

Args:
op (RepoOperator): The repository operator
pull (PullRequest): The pull request object

Returns:
dict[str, str]: A dictionary mapping file paths to their latest commit SHA
"""
if not op.remote_git_repo:
msg = "GitHub API client is required to get PR commit information"
raise ValueError(msg)

file_to_commit = {}

# Get all commits in the PR
commits = list(pull.get_commits())

# Get all modified files
files = pull.get_files()

# For each file, find its latest commit
for file in files:
# Look through commits in reverse order to find the latest one that modified this file
for commit in reversed(commits):
# Get the files modified in this commit
files_in_commit = commit.files
if any(f.filename == file.filename for f in files_in_commit):
file_to_commit[file.filename] = commit.sha
break

# If we didn't find a commit (shouldn't happen), use the head SHA
if file.filename not in file_to_commit:
file_to_commit[file.filename] = pull.head.sha

return file_to_commit


class CodegenPR:
"""Wrapper around PRs - enables codemods to interact with them"""

Expand Down Expand Up @@ -143,3 +182,11 @@
else:
# If diff_url not available, get the patch directly
return self._gh_pr.get_patch()

def get_commit_sha(self) -> str:
"""Get the commit SHA of the PR"""
return self._gh_pr.head.sha

def get_file_commit_shas(self) -> dict[str, str]:
"""Get a mapping of file paths to their latest commit SHA in the PR"""
return get_file_to_commit_sha(op=self._op, pull=self._gh_pr)
22 changes: 20 additions & 2 deletions src/codegen/sdk/core/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,17 +1311,35 @@ def from_repo(
logger.exception(f"Failed to initialize codebase: {e}")
raise

def get_modified_symbols_in_pr(self, pr_id: int) -> tuple[list[Symbol], str]:
def get_modified_symbols_in_pr(self, pr_id: int) -> tuple[str, dict[str, str]]:
"""Get all modified symbols in a pull request"""
pr = self._op.get_pull_request(pr_id)
cg_pr = CodegenPR(self._op, self, pr)
patch = cg_pr.get_pr_diff()
return cg_pr.modified_symbols, patch
commit_sha = cg_pr.get_file_commit_shas()
return patch, commit_sha

def create_pr_comment(self, pr_number: int, body: str) -> None:
"""Create a comment on a pull request"""
return self._op.create_pr_comment(pr_number, body)

def create_pr_review_comment(self, pr_number: int, body: str, commit_sha: str, path: str, line: int | None = None, side: str = "RIGHT", start_line: int | None = None) -> None:
"""Create a review comment on a pull request.

Args:
pr_number: The number of the pull request
body: The body of the comment
commit_sha: The SHA of the commit to comment on
path: The path of the file to comment on
line: The line number to comment on
side: The side of the comment to create
start_line: The start line number to comment on

Returns:
None
"""
return self._op.create_pr_review_comment(pr_number, body, commit_sha, path, line, side, start_line)


# The last 2 lines of code are added to the runner. See codegen-backend/cli/generate/utils.py
# Type Aliases
Expand Down