Skip to content

Foundations for PR BOT static analisis #343

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/mint.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"og:locale": "en_US",
"og:logo": "https://i.imgur.com/f4OVOqI.png",
"article:publisher": "Codegen, Inc.",
"twitter:site": "@codegen",
"twitter:site": "@codegen"
},
"favicon": "/favicon.svg",
"colors": {
Expand Down
86 changes: 79 additions & 7 deletions src/codegen/git/repo_operator/local_repo_operator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
from functools import cached_property
from typing import Self, override
Expand All @@ -6,13 +7,19 @@
from git import Remote
from git import Repo as GitCLI
from git.remote import PushInfoList
from github import Github
from github.PullRequest import PullRequest

from codegen.git.clients.git_repo_client import GitRepoClient
from codegen.git.repo_operator.repo_operator import RepoOperator
from codegen.git.schemas.enums import FetchResult
from codegen.git.schemas.github import GithubType
from codegen.git.schemas.repo_config import BaseRepoConfig
from codegen.git.utils.clone_url import url_to_github
from codegen.git.utils.file_utils import create_files

logger = logging.getLogger(__name__)


class OperatorIsLocal(Exception):
"""Error raised while trying to do a remote operation on a local operator"""
Expand All @@ -29,20 +36,54 @@
_repo_name: str
_git_cli: GitCLI
repo_config: BaseRepoConfig
_github_api_key: str | None
_remote_git_repo: GitRepoClient | None = None

def __init__(
self,
repo_path: str, # full path to the repo
github_api_key: str | None = None,
repo_config: BaseRepoConfig | None = None,
bot_commit: bool = False,
) -> None:
self._repo_path = repo_path
self._repo_name = os.path.basename(repo_path)
self._github_api_key = github_api_key
self.github_type = GithubType.Github
self._remote_git_repo = None
os.makedirs(self.repo_path, exist_ok=True)
GitCLI.init(self.repo_path)
repo_config = repo_config or BaseRepoConfig()
super().__init__(repo_config, self.repo_path, bot_commit)

####################################################################################################################
# PROPERTIES
####################################################################################################################

@property
def remote_git_repo(self) -> GitRepoClient:
if self._remote_git_repo is None:
if not self._github_api_key:
return None

Check failure on line 67 in src/codegen/git/repo_operator/local_repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "None", expected "GitRepoClient") [return-value]

Check warning on line 67 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L65-L67

Added lines #L65 - L67 were not covered by tests

if not (base_url := self.base_url):
msg = "Could not determine GitHub URL from remotes"
raise ValueError(msg)

Check warning on line 71 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L69-L71

Added lines #L69 - L71 were not covered by tests

# Extract owner and repo from the base URL
# Format: https://github.com/owner/repo
parts = base_url.split("/")
if len(parts) < 2:
msg = f"Invalid GitHub URL format: {base_url}"
raise ValueError(msg)

Check warning on line 78 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L75-L78

Added lines #L75 - L78 were not covered by tests

owner = parts[-4]
repo = parts[-3]

Check warning on line 81 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L80-L81

Added lines #L80 - L81 were not covered by tests

github = Github(self._github_api_key)
self._remote_git_repo = github.get_repo(f"{owner}/{repo}")

Check failure on line 84 in src/codegen/git/repo_operator/local_repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible types in assignment (expression has type "Repository", variable has type "GitRepoClient | None") [assignment]
return self._remote_git_repo

Check failure on line 85 in src/codegen/git/repo_operator/local_repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "GitRepoClient | None", expected "GitRepoClient") [return-value]

Check warning on line 85 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L83-L85

Added lines #L83 - L85 were not covered by tests

####################################################################################################################
# CLASS METHODS
####################################################################################################################
Expand Down Expand Up @@ -70,9 +111,16 @@
return op

@classmethod
def create_from_commit(cls, repo_path: str, commit: str, url: str) -> Self:
"""Do a shallow checkout of a particular commit to get a repository from a given remote URL."""
op = cls(repo_config=BaseRepoConfig(), repo_path=repo_path, bot_commit=False)
def create_from_commit(cls, repo_path: str, commit: str, url: str, github_api_key: str | None = None) -> Self:
"""Do a shallow checkout of a particular commit to get a repository from a given remote URL.

Args:
repo_path (str): Path where the repo should be cloned
commit (str): The commit hash to checkout
url (str): Git URL of the repository
github_api_key (str | None): Optional GitHub API key for operations that need GitHub access
"""
op = cls(repo_path=repo_path, bot_commit=False, github_api_key=github_api_key)

Check warning on line 123 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L123

Added line #L123 was not covered by tests
op.discard_changes()
if op.get_active_branch_or_commit() != commit:
op.create_remote("origin", url)
Expand All @@ -81,12 +129,13 @@
return op

@classmethod
def create_from_repo(cls, repo_path: str, url: str) -> Self:
def create_from_repo(cls, repo_path: str, url: str, github_api_key: str | None = None) -> Self:
"""Create a fresh clone of a repository or use existing one if up to date.

Args:
repo_path (str): Path where the repo should be cloned
url (str): Git URL of the repository
github_api_key (str | None): Optional GitHub API key for operations that need GitHub access
"""
# Check if repo already exists
if os.path.exists(repo_path):
Expand All @@ -102,7 +151,7 @@
remote_head = git_cli.remotes.origin.refs[git_cli.active_branch.name].commit
# If up to date, use existing repo
if local_head.hexsha == remote_head.hexsha:
return cls(repo_config=BaseRepoConfig(), repo_path=repo_path, bot_commit=False)
return cls(repo_path=repo_path, bot_commit=False, github_api_key=github_api_key)

Check warning on line 154 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L154

Added line #L154 was not covered by tests
except Exception:
# If any git operations fail, fallback to fresh clone
pass
Expand All @@ -113,13 +162,13 @@

shutil.rmtree(repo_path)

# Do a fresh clone with depth=1 to get latest commit
# Clone the repository
GitCLI.clone_from(url=url, to_path=repo_path, depth=1)

# Initialize with the cloned repo
git_cli = GitCLI(repo_path)

return cls(repo_config=BaseRepoConfig(), repo_path=repo_path, bot_commit=False)
return cls(repo_path=repo_path, bot_commit=False, github_api_key=github_api_key)

Check warning on line 171 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L171

Added line #L171 was not covered by tests

####################################################################################################################
# PROPERTIES
Expand All @@ -134,11 +183,11 @@
return self._repo_path

@property
def codeowners_parser(self) -> CodeOwnersParser | None:

Check failure on line 186 in src/codegen/git/repo_operator/local_repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Signature of "codeowners_parser" incompatible with supertype "RepoOperator" [override]
return None

@cached_property
def base_url(self) -> str | None:

Check failure on line 190 in src/codegen/git/repo_operator/local_repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Missing return statement [return]

Check failure on line 190 in src/codegen/git/repo_operator/local_repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Signature of "base_url" incompatible with supertype "RepoOperator" [override]
if remote := next(iter(self.git_cli.remotes), None):
return url_to_github(remote.url, self.get_active_branch_or_commit())

Expand All @@ -153,3 +202,26 @@

def fetch_remote(self, remote_name: str = "origin", refspec: str | None = None, force: bool = True) -> FetchResult:
raise OperatorIsLocal()

def get_pull_request(self, pr_number: int) -> PullRequest | None:
"""Get a GitHub Pull Request object for the given PR number.

Args:
pr_number (int): The PR number to fetch

Returns:
PullRequest | None: The PyGitHub PullRequest object if found, None otherwise

Note:
This requires a GitHub API key to be set when creating the LocalRepoOperator
"""
try:

Check warning on line 218 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L218

Added line #L218 was not covered by tests
# Create GitHub client and get the PR
repo = self.remote_git_repo
if repo is None:
logger.warning("GitHub API key is required to fetch pull requests")
return None
return repo.get_pull(pr_number)

Check failure on line 224 in src/codegen/git/repo_operator/local_repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: "GitRepoClient" has no attribute "get_pull"; maybe "edit_pull" or "get_pull_safe"? [attr-defined]
except Exception as e:
logger.warning(f"Failed to get PR {pr_number}: {e!s}")
return None

Check warning on line 227 in src/codegen/git/repo_operator/local_repo_operator.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/repo_operator/local_repo_operator.py#L220-L227

Added lines #L220 - L227 were not covered by tests
129 changes: 129 additions & 0 deletions src/codegen/git/utils/pr_review.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from typing import TYPE_CHECKING

import requests
from github import Repository
from github.PullRequest import PullRequest
from unidiff import PatchSet

from codegen.git.models.pull_request_context import PullRequestContext
from codegen.git.repo_operator.local_repo_operator import LocalRepoOperator
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator

if TYPE_CHECKING:
from codegen.sdk.core.codebase import Codebase, Editable, File, Symbol


def get_merge_base(git_repo_client: Repository, pull: PullRequest | PullRequestContext) -> str:

Check failure on line 16 in src/codegen/git/utils/pr_review.py

View workflow job for this annotation

GitHub Actions / mypy

error: Module "github.Repository" is not valid as a type [valid-type]
"""Gets the merge base of a pull request using a remote GitHub API client.

Args:
git_repo_client (GitRepoClient): The GitHub repository client.
pull (PullRequest): The pull request object.

Returns:
str: The SHA of the merge base commit.
"""
comparison = git_repo_client.compare(pull.base.sha, pull.head.sha)

Check failure on line 26 in src/codegen/git/utils/pr_review.py

View workflow job for this annotation

GitHub Actions / mypy

error: Repository? has no attribute "compare" [attr-defined]
return comparison.merge_base_commit.sha

Check warning on line 27 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L26-L27

Added lines #L26 - L27 were not covered by tests


def get_file_to_changed_ranges(pull_patch_set: PatchSet) -> dict[str, list]:
file_to_changed_ranges = {}
for patched_file in pull_patch_set:

Check warning on line 32 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L31-L32

Added lines #L31 - L32 were not covered by tests
# TODO: skip is deleted
if patched_file.is_removed_file:
continue
changed_ranges = [] # list of changed lines for the file
for hunk in patched_file:
changed_ranges.append(range(hunk.target_start, hunk.target_start + hunk.target_length))
file_to_changed_ranges[patched_file.path] = changed_ranges
return file_to_changed_ranges

Check warning on line 40 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L34-L40

Added lines #L34 - L40 were not covered by tests


def get_pull_patch_set(op: LocalRepoOperator | RemoteRepoOperator, pull: PullRequestContext) -> PatchSet:
# Get the diff directly from GitHub's API
if not op.remote_git_repo:
msg = "GitHub API client is required to get PR diffs"
raise ValueError(msg)

Check warning on line 47 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L45-L47

Added lines #L45 - L47 were not covered by tests

# Get the diff directly from the PR
diff_url = pull.raw_data.get("diff_url")

Check failure on line 50 in src/codegen/git/utils/pr_review.py

View workflow job for this annotation

GitHub Actions / mypy

error: "PullRequestContext" has no attribute "raw_data" [attr-defined]
if diff_url:

Check warning on line 51 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L50-L51

Added lines #L50 - L51 were not covered by tests
# Fetch the diff content from the URL
response = requests.get(diff_url)
response.raise_for_status()
diff = response.text

Check warning on line 55 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L53-L55

Added lines #L53 - L55 were not covered by tests
else:
# If diff_url not available, get the patch directly
diff = pull.get_patch()

Check warning on line 58 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L58

Added line #L58 was not covered by tests

# Parse the diff into a PatchSet
pull_patch_set = PatchSet(diff)
return pull_patch_set

Check warning on line 62 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L61-L62

Added lines #L61 - L62 were not covered by tests


def to_1_indexed(zero_indexed_range: range) -> range:
"""Converts a n-indexed range to n+1-indexed.
Primarily to convert 0-indexed ranges to 1 indexed
"""
return range(zero_indexed_range.start + 1, zero_indexed_range.stop + 1)

Check warning on line 69 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L69

Added line #L69 was not covered by tests


def overlaps(range1: range, range2: range) -> bool:
"""Returns True if the two ranges overlap, False otherwise."""
return max(range1.start, range2.start) < min(range1.stop, range2.stop)

Check warning on line 74 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L74

Added line #L74 was not covered by tests


class CodegenPR:
"""Wrapper around PRs - enables codemods to interact with them"""

_gh_pr: PullRequest
_codebase: "Codebase"
_op: LocalRepoOperator | RemoteRepoOperator

# =====[ Computed ]=====
_modified_file_ranges: dict[str, list[tuple[int, int]]] = None

def __init__(self, op: LocalRepoOperator, codebase: "Codebase", pr: PullRequest):
self._op = op
self._gh_pr = pr
self._codebase = codebase

Check warning on line 90 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L88-L90

Added lines #L88 - L90 were not covered by tests

@property
def modified_file_ranges(self) -> dict[str, list[tuple[int, int]]]:
"""Files and the ranges within that are modified"""
if not self._modified_file_ranges:
pull_patch_set = get_pull_patch_set(op=self._op, pull=self._gh_pr)
self._modified_file_ranges = get_file_to_changed_ranges(pull_patch_set)
return self._modified_file_ranges

Check warning on line 98 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L95-L98

Added lines #L95 - L98 were not covered by tests

@property
def modified_files(self) -> list["File"]:
filenames = self.modified_file_ranges.keys()
return [self._codebase.get_file(f, optional=True) for f in filenames]

Check warning on line 103 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L102-L103

Added lines #L102 - L103 were not covered by tests

def is_modified(self, editable: "Editable") -> bool:
"""Returns True if the Editable's range contains any modified lines"""
filepath = editable.filepath
changed_ranges = self._modified_file_ranges.get(filepath, [])
symbol_range = to_1_indexed(editable.line_range)
if any(overlaps(symbol_range, changed_range) for changed_range in changed_ranges):
return True
return False

Check warning on line 112 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L107-L112

Added lines #L107 - L112 were not covered by tests

@property
def modified_symbols(self) -> list["Symbol"]:
# Import SourceFile locally to avoid circular dependencies
from codegen.sdk.core.file import SourceFile

Check warning on line 117 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L117

Added line #L117 was not covered by tests

all_modified = []
for file in self.modified_files:
if file is None:
print("Warning: File is None")
continue
if not isinstance(file, SourceFile):
continue
for symbol in file.symbols:
if self.is_modified(symbol):
all_modified.append(symbol)
return all_modified

Check warning on line 129 in src/codegen/git/utils/pr_review.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/git/utils/pr_review.py#L119-L129

Added lines #L119 - L129 were not covered by tests
31 changes: 22 additions & 9 deletions src/codegen/sdk/core/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
from codegen.git.repo_operator.repo_operator import RepoOperator
from codegen.git.schemas.enums import CheckoutResult
from codegen.git.utils.pr_review import CodegenPR
from codegen.sdk._proxy import proxy_property
from codegen.sdk.ai.helpers import AbstractAIHelper, MultiProviderAIHelper
from codegen.sdk.codebase.codebase_ai import generate_system_prompt, generate_tools
Expand Down Expand Up @@ -112,7 +113,7 @@
console: Manages console output for the codebase.
"""

_op: RepoOperator | RemoteRepoOperator
_op: RepoOperator | RemoteRepoOperator | LocalRepoOperator
viz: VisualizationManager
repo_path: Path
console: Console
Expand Down Expand Up @@ -1162,7 +1163,16 @@
self.G.transaction_manager.reset_stopwatch(self.G.session_options.max_seconds)

@classmethod
def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str | None = None, shallow: bool = True, programming_language: ProgrammingLanguage | None = None) -> "Codebase":
def from_repo(
cls,
repo_name: str,
*,
tmp_dir: str | None = None,
commit: str | None = None,
shallow: bool = True,
programming_language: ProgrammingLanguage | None = None,
config: CodebaseConfig = DefaultConfig,
) -> "Codebase":
"""Fetches a codebase from GitHub and returns a Codebase instance.

Args:
Expand All @@ -1171,6 +1181,7 @@
commit (Optional[str]): The specific commit hash to clone. Defaults to HEAD
shallow (bool): Whether to do a shallow clone. Defaults to True
programming_language (ProgrammingLanguage | None): The programming language of the repo. Defaults to None.
config (CodebaseConfig): Configuration for the codebase. Defaults to DefaultConfig.

Returns:
Codebase: A Codebase instance initialized with the cloned repository
Expand Down Expand Up @@ -1198,26 +1209,28 @@
# Use LocalRepoOperator to fetch the repository
logger.info("Cloning repository...")
if commit is None:
repo_operator = LocalRepoOperator.create_from_repo(repo_path=repo_path, url=repo_url)
repo_operator = LocalRepoOperator.create_from_repo(repo_path=repo_path, url=repo_url, github_api_key=config.secrets.github_api_key if config.secrets else None)

Check warning on line 1212 in src/codegen/sdk/core/codebase.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/sdk/core/codebase.py#L1212

Added line #L1212 was not covered by tests
else:
# Ensure the operator can handle remote operations
repo_operator = LocalRepoOperator.create_from_commit(
repo_path=repo_path,
commit=commit,
url=repo_url,
)
repo_operator = LocalRepoOperator.create_from_commit(repo_path=repo_path, commit=commit, url=repo_url, github_api_key=config.secrets.github_api_key if config.secrets else None)

Check warning on line 1215 in src/codegen/sdk/core/codebase.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/sdk/core/codebase.py#L1215

Added line #L1215 was not covered by tests
logger.info("Clone completed successfully")

# Initialize and return codebase with proper context
logger.info("Initializing Codebase...")
project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=programming_language)
codebase = Codebase(projects=[project], config=DefaultConfig)
codebase = Codebase(projects=[project], config=config)

Check warning on line 1221 in src/codegen/sdk/core/codebase.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/sdk/core/codebase.py#L1221

Added line #L1221 was not covered by tests
logger.info("Codebase initialization complete")
return codebase
except Exception as e:
logger.exception(f"Failed to initialize codebase: {e}")
raise

def get_modified_symbols_in_pr(self, pr_id: int) -> list[Symbol]:
"""Get all modified symbols in a pull request"""
pr = self._op.get_pull_request(pr_id)
cg_pr = CodegenPR(self._op, self, pr)
return cg_pr.modified_symbols

Check warning on line 1232 in src/codegen/sdk/core/codebase.py

View check run for this annotation

Codecov / codecov/patch

src/codegen/sdk/core/codebase.py#L1230-L1232

Added lines #L1230 - L1232 were not covered by tests


# The last 2 lines of code are added to the runner. See codegen-backend/cli/generate/utils.py
# Type Aliases
Expand Down
1 change: 1 addition & 0 deletions src/codegen/sdk/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
@dataclass
class Secrets:
openai_key: str | None = None
github_api_key: str | None = None
Loading