Skip to content

Add string compilation + grouping utils #48

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/codegen/cli/codemod/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def convert_to_cli(input: str, language: str, name: str) -> str:
# from app.codemod.compilation.models.context import CodemodContext
#from app.codemod.compilation.models.pr_options import PROptions

from graph_sitter import {codebase_type}
from codegen.sdk import {codebase_type}

context: Any

Expand Down
2 changes: 1 addition & 1 deletion src/codegen/cli/utils/count_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# from app.codemod.compilation.models.context import CodemodContext
# from app.codemod.compilation.models.pr_options import PROptions
# from graph_sitter import PyCodebaseType
# from codegen.sdk import PyCodebaseType

# context: CodemodContext

Expand Down
Empty file added src/codegen/git/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions src/codegen/git/configs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
CODEGEN_BOT_NAME = "codegen-bot"
CODEGEN_BOT_EMAIL = "[email protected]"
CODEOWNERS_FILEPATHS = [".github/CODEOWNERS", "CODEOWNERS", "docs/CODEOWNERS"]
HIGHSIDE_REMOTE_NAME = "highside"
LOWSIDE_REMOTE_NAME = "lowside"
35 changes: 35 additions & 0 deletions src/codegen/git/models/codemod_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from __future__ import annotations

import logging
from typing import Any

from pydantic import BaseModel, Field

from codegen.git.models.pull_request_context import PullRequestContext

logger = logging.getLogger(__name__)


class CodemodContext(BaseModel):
CODEMOD_ID: int | None = None
CODEMOD_LINK: str | None = None
CODEMOD_AUTHOR: str | None = None
TEMPLATE_ARGS: dict[str, Any] = Field(default_factory=dict)

# TODO: add fields for version
# CODEMOD_VERSION_ID: int | None = None
# CODEMOD_VERSION_AUTHOR: str | None = None

PULL_REQUEST: PullRequestContext | None = None

@classmethod
def _render_template(cls, template_schema: dict[str, str], template_values: dict[str, Any]) -> dict[str, Any]:
template_data: dict[str, Any] = {}
for var_name, var_value in template_values.items():
var_type = template_schema.get(var_name)

if var_type == "list":
template_data[var_name] = [str(v).strip() for v in var_value.split(",")]
else:
template_data[var_name] = str(var_value)
return template_data
12 changes: 12 additions & 0 deletions src/codegen/git/models/github_named_user_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pydantic import BaseModel


class GithubNamedUserContext(BaseModel):
"""Represents a GitHub user parsed from a webhook payload"""

login: str
email: str | None = None

@classmethod
def from_payload(cls, payload: dict) -> "GithubNamedUserContext":
return cls(login=payload.get("login"), email=payload.get("email"))
13 changes: 13 additions & 0 deletions src/codegen/git/models/pr_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pydantic import BaseModel

from codegen.utils.decorators.docs import apidoc


@apidoc
class PROptions(BaseModel):
"""Options for generating a PR."""

title: str | None = None
body: str | None = None
labels: list[str] | None = None # TODO: not used until we add labels to GithubPullRequestModel
force_push_head_branch: bool | None = None
12 changes: 12 additions & 0 deletions src/codegen/git/models/pr_part_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pydantic import BaseModel


class PRPartContext(BaseModel):
"""Represents a GitHub pull request part parsed from a webhook payload"""

ref: str
sha: str

@classmethod
def from_payload(cls, payload: dict) -> "PRPartContext":
return cls(ref=payload.get("ref"), sha=payload.get("sha"))
52 changes: 52 additions & 0 deletions src/codegen/git/models/pull_request_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from pydantic import BaseModel

from codegen.git.models.github_named_user_context import GithubNamedUserContext
from codegen.git.models.pr_part_context import PRPartContext
from codegen.git.schemas.github import GithubType


class PullRequestContext(BaseModel):
"""Represents a GitHub pull request"""

id: int
url: str
html_url: str
number: int
state: str
title: str
user: GithubNamedUserContext
body: str
draft: bool
head: PRPartContext
base: PRPartContext
merged: bool | None
merged_by: dict | None
additions: int | None
deletions: int | None
changed_files: int | None
github_type: GithubType | None = None
webhook_data: dict | None = None

@classmethod
def from_payload(cls, webhook_payload: dict) -> "PullRequestContext":
webhook_data = webhook_payload.get("pull_request", {})
return cls(
id=webhook_data.get("id"),
url=webhook_data.get("url"),
html_url=webhook_data.get("html_url"),
number=webhook_data.get("number"),
state=webhook_data.get("state"),
title=webhook_data.get("title"),
user=GithubNamedUserContext.from_payload(webhook_data.get("user", {})),
body=webhook_data.get("body"),
draft=webhook_data.get("draft"),
head=PRPartContext.from_payload(webhook_data.get("head", {})),
base=PRPartContext.from_payload(webhook_data.get("base", {})),
merged=webhook_data.get("merged"),
merged_by=webhook_data.get("merged_by", {}),
additions=webhook_data.get("additions"),
deletions=webhook_data.get("deletions"),
changed_files=webhook_data.get("changed_files"),
github_type=GithubType.from_url(webhook_data.get("html_url")),
webhook_data=webhook_data,
)
2 changes: 1 addition & 1 deletion src/codegen/git/repo_operator/repo_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from codegen.git.schemas.enums import CheckoutResult, FetchResult
from codegen.git.schemas.repo_config import BaseRepoConfig
from codegen.utils.performance.stopwatch_utils import stopwatch
from codegen.utils.time_utils import humanize_duration
from codegen.utils.performance.time_utils import humanize_duration

logger = logging.getLogger(__name__)

Expand Down
8 changes: 4 additions & 4 deletions src/codegen/gscli/generate/runner_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
import plotly
""".strip()
CODEGEN_IMPORTS = """
from app.codemod.compilation.models.context import CodemodContext
from app.codemod.compilation.models.github_named_user_context import GithubNamedUserContext
from app.codemod.compilation.models.pr_part_context import PRPartContext
from app.codemod.compilation.models.pull_request_context import PullRequestContext
from codegen.git.models.codemod_context import CodemodContext
from codegen.git.models.github_named_user_context import GithubNamedUserContext
from codegen.git.models.pr_part_context import PRPartContext
from codegen.git.models.pull_request_context import PullRequestContext
"""
# TODO: these should also be made public (i.e. included in the docs site)
GS_PRIVATE_IMPORTS = """
Expand Down
10 changes: 5 additions & 5 deletions src/codegen/gscli/generate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ def generate_builtins_file(path_to_builtins: str, language_type: LanguageType):
# This file is auto-generated, do not modify manually

{{all_imports}}
from app.codemod.compilation.models.context import CodemodContext
from app.codemod.compilation.models.pr_options import PROptions
from app.codemod.compilation.models.github_named_user_context import GithubNamedUserContext
from app.codemod.compilation.models.pr_part_context import PRPartContext
from app.codemod.compilation.models.pull_request_context import PullRequestContext
from codegen.git.models.codemod_context import CodemodContext
from codegen.git.models.pr_options import PROptions
from codegen.git.models.github_named_user_context import GithubNamedUserContext
from codegen.git.models.pr_part_context import PRPartContext
from codegen.git.models.pull_request_context import PullRequestContext
from codegen.sdk.codebase.flagging.code_flag import MessageType as MessageType

{"\n".join(inspect.getsource(codebase).splitlines()[-2:])}
Expand Down
7 changes: 2 additions & 5 deletions src/codegen/sdk/codebase/flagging/flags.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING

from codegen.sdk.codebase.flagging.code_flag import CodeFlag
from codegen.sdk.codebase.flagging.enums import MessageType
from codegen.sdk.codebase.flagging.group import Group
from codegen.sdk.core.interfaces.editable import Editable
from codegen.utils.decorators.docs import noapidoc

if TYPE_CHECKING:
from app.codemod.types import Group


@dataclass
class Flags:
Expand Down Expand Up @@ -69,7 +66,7 @@ def set_find_mode(self, find_mode: bool) -> None:
self._find_mode = find_mode

@noapidoc
def set_active_group(self, group: "Group") -> None:
def set_active_group(self, group: Group) -> None:
"""Will only fix these flags."""
# TODO - flesh this out more with Group datatype and GroupBy
self._active_group = group.flags
Expand Down
17 changes: 17 additions & 0 deletions src/codegen/sdk/codebase/flagging/group.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from dataclasses import dataclass

from dataclasses_json import dataclass_json

from codegen.sdk.codebase.flagging.code_flag import CodeFlag
from codegen.sdk.codebase.flagging.groupers.enums import GroupBy

DEFAULT_GROUP_ID = 0


@dataclass_json
@dataclass
class Group:
group_by: GroupBy
segment: str
flags: list[CodeFlag] | None = None
id: int = DEFAULT_GROUP_ID
21 changes: 21 additions & 0 deletions src/codegen/sdk/codebase/flagging/groupers/all_grouper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
from codegen.sdk.codebase.flagging.code_flag import CodeFlag
from codegen.sdk.codebase.flagging.group import Group
from codegen.sdk.codebase.flagging.groupers.base_grouper import BaseGrouper
from codegen.sdk.codebase.flagging.groupers.enums import GroupBy


class AllGrouper(BaseGrouper):
"""Group all flags into one group."""

type: GroupBy = GroupBy.ALL

@staticmethod
def create_all_groups(flags: list[CodeFlag], repo_operator: RemoteRepoOperator | None = None) -> list[Group]:
return [Group(group_by=GroupBy.ALL, segment="all", flags=flags)] if flags else []

@staticmethod
def create_single_group(flags: list[CodeFlag], segment: str, repo_operator: RemoteRepoOperator | None = None) -> Group:
if segment != "all":
raise ValueError(f"❌ Invalid segment for AllGrouper: {segment}. Only 'all' is a valid segment.")
return Group(group_by=GroupBy.ALL, segment=segment, flags=flags)
34 changes: 34 additions & 0 deletions src/codegen/sdk/codebase/flagging/groupers/app_grouper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import logging

from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
from codegen.sdk.codebase.flagging.code_flag import CodeFlag
from codegen.sdk.codebase.flagging.group import Group
from codegen.sdk.codebase.flagging.groupers.base_grouper import BaseGrouper
from codegen.sdk.codebase.flagging.groupers.enums import GroupBy

logger = logging.getLogger(__name__)


class AppGrouper(BaseGrouper):
"""Group flags by segment=app.
Ex: apps/profile.
"""

type: GroupBy = GroupBy.APP

@staticmethod
def create_all_groups(flags: list[CodeFlag], repo_operator: RemoteRepoOperator | None = None) -> list[Group]:
unique_apps = list({"/".join(flag.filepath.split("/")[:3]) for flag in flags})
groups = []
for idx, app in enumerate(unique_apps):
matches = [f for f in flags if f.filepath.startswith(app)]
if len(matches) > 0:
groups.append(Group(id=idx, group_by=GroupBy.APP, segment=app, flags=matches))
return groups

@staticmethod
def create_single_group(flags: list[CodeFlag], segment: str, repo_operator: RemoteRepoOperator | None = None) -> Group:
segment_flags = [f for f in flags if f.filepath.startswith(segment)]
if len(segment_flags) == 0:
logger.warning(f"🤷‍♀️ No flags found for APP segment: {segment}")
return Group(group_by=GroupBy.APP, segment=segment, flags=segment_flags)
26 changes: 26 additions & 0 deletions src/codegen/sdk/codebase/flagging/groupers/base_grouper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
from codegen.sdk.codebase.flagging.code_flag import CodeFlag
from codegen.sdk.codebase.flagging.group import Group
from codegen.sdk.codebase.flagging.groupers.enums import GroupBy


class BaseGrouper:
"""Base class of all groupers.
Children of this class should include in their doc string:
- a short desc of what the segment format is. ex: for FileGrouper the segment is a filename
"""

type: GroupBy

def __init__(self) -> None:
if type is None:
raise ValueError("Must set type in BaseGrouper")

@staticmethod
def create_all_groups(flags: list[CodeFlag], repo_operator: RemoteRepoOperator | None = None) -> list[Group]:
raise NotImplementedError("Must implement create_all_groups in BaseGrouper")

@staticmethod
def create_single_group(flags: list[CodeFlag], segment: str, repo_operator: RemoteRepoOperator | None = None) -> Group:
"""TODO: handle the case when 0 flags are passed in"""
raise NotImplementedError("Must implement create_single_group in BaseGrouper")
40 changes: 40 additions & 0 deletions src/codegen/sdk/codebase/flagging/groupers/codeowner_grouper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
from codegen.sdk.codebase.flagging.code_flag import CodeFlag
from codegen.sdk.codebase.flagging.group import Group
from codegen.sdk.codebase.flagging.groupers.base_grouper import BaseGrouper
from codegen.sdk.codebase.flagging.groupers.enums import GroupBy

DEFAULT_CHUNK_SIZE = 5


class CodeownerGrouper(BaseGrouper):
"""Group flags by CODEOWNERS.

Parses .github/CODEOWNERS and groups by each possible codeowners

Segment should be either a github username or github team name.
"""

type: GroupBy = GroupBy.CODEOWNER

@staticmethod
def create_all_groups(flags: list[CodeFlag], repo_operator: RemoteRepoOperator | None = None) -> list[Group]:
owner_to_group: dict[str, Group] = {}
no_owner_group = Group(group_by=GroupBy.CODEOWNER, segment="@no-owner", flags=[])
for idx, flag in enumerate(flags):
flag_owners = repo_operator.codeowners_parser.of(flag.filepath) # TODO: handle codeowners_parser could be null
if not flag_owners:
no_owner_group.flags.append(flag)
continue
# NOTE: always use the first owner. ex if the line is /dir @team1 @team2 then use team1
flag_owner = flag_owners[0][1]
group = owner_to_group.get(flag_owner, Group(id=idx, group_by=GroupBy.CODEOWNER, segment=flag_owner, flags=[]))
group.flags.append(flag)
owner_to_group[flag_owner] = group

no_owner_group.id = len(owner_to_group)
return [*list(owner_to_group.values()), no_owner_group]

@staticmethod
def create_single_group(flags: list[CodeFlag], segment: str, repo_operator: RemoteRepoOperator | None = None) -> Group:
raise NotImplementedError("TODO: implement single group creation")
15 changes: 15 additions & 0 deletions src/codegen/sdk/codebase/flagging/groupers/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from codegen.sdk.codebase.flagging.groupers.all_grouper import AllGrouper
from codegen.sdk.codebase.flagging.groupers.app_grouper import AppGrouper
from codegen.sdk.codebase.flagging.groupers.codeowner_grouper import CodeownerGrouper
from codegen.sdk.codebase.flagging.groupers.file_chunk_grouper import FileChunkGrouper
from codegen.sdk.codebase.flagging.groupers.file_grouper import FileGrouper
from codegen.sdk.codebase.flagging.groupers.instance_grouper import InstanceGrouper

ALL_GROUPERS = [
AllGrouper,
AppGrouper,
CodeownerGrouper,
FileChunkGrouper,
FileGrouper,
InstanceGrouper,
]
11 changes: 11 additions & 0 deletions src/codegen/sdk/codebase/flagging/groupers/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from enum import StrEnum


class GroupBy(StrEnum):
ALL = "all"
APP = "app"
CODEOWNER = "codeowner"
FILE = "file"
FILE_CHUNK = "file_chunk"
HOT_COLD = "hot_cold"
INSTANCE = "instance"
Loading
Loading