Skip to content

Commit 52396cc

Browse files
authored
refactor: Clean up RepoConfig + LocalRepoOperator (#418)
1 parent 9accce9 commit 52396cc

File tree

25 files changed

+86
-123
lines changed

25 files changed

+86
-123
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ jobs:
141141
timeout-minutes: 5
142142
env:
143143
GITHUB_WORKSPACE: $GITHUB_WORKSPACE
144-
GITHUB_TOKEN: ${{ secrets.GHA_PAT }}
144+
CODEGEN_SECRETS__GITHUB_TOKEN: ${{ secrets.GHA_PAT }}
145145
run: |
146146
uv run pytest \
147147
-n auto \

ruff.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ ignore = [
4747
exclude = [
4848
"*.ipynb",
4949
] # disable just linting for notebooks (allow for formatting)
50+
[lint.per-file-ignores]
51+
"src/codegen/cli/mcp/resources/system_prompt.py" = ["E501"]
5052
[lint.pydocstyle]
5153
convention = "google"
5254
[lint.pyflakes]

src/codegen/cli/commands/init/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def init_command(repo_name: str | None = None, organization_name: str | None = N
2323
"""Initialize or update the Codegen folder."""
2424
# Print a message if not in a git repo
2525
try:
26-
subprocess.run(["git", "rev-parse", "--is-inside-work-tree"], capture_output=True, check=True, text=True)
26+
output = subprocess.run(["git", "rev-parse", "--show-toplevel"], capture_output=True, check=True, text=True)
2727
except (subprocess.CalledProcessError, FileNotFoundError):
2828
rich.print("\n[bold red]Error:[/bold red] Not in a git repository")
2929
rich.print("[white]Please run this command from within a git repository.[/white]")

src/codegen/cli/mcp/resources/system_prompt.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,16 +1421,15 @@ def baz():
14211421
```python
14221422
from codegen import Codebase
14231423
from codegen.git.repo_operator.local_repo_operator import LocalRepoOperator
1424-
from codegen.git.schemas.repo_config import BaseRepoConfig
1424+
from codegen.git.schemas.repo_config import RepoConfig
14251425
from codegen.sdk.codebase.config import ProjectConfig
14261426
from codegen.sdk.enums import ProgrammingLanguage
14271427
14281428
codebase = Codebase(
14291429
projects = [
14301430
ProjectConfig(
14311431
repo_operator=LocalRepoOperator(
1432-
repo_path="/tmp/codegen-sdk",
1433-
repo_config=BaseRepoConfig(),
1432+
repo_config=RepoConfig(name="codegen-sdk"),
14341433
bot_commit=True
14351434
),
14361435
programming_language=ProgrammingLanguage.TYPESCRIPT,

src/codegen/git/clients/git_repo_client.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,6 @@ def repo(self) -> Repository:
5151
# PROPERTIES
5252
####################################################################################################################
5353

54-
@property
55-
def id(self) -> int:
56-
return self.repo_config.id
57-
5854
@property
5955
def default_branch(self) -> str:
6056
return self.repo.default_branch

src/codegen/git/configs/config.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

src/codegen/git/repo_operator/local_repo_operator.py

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from codegen.git.clients.git_repo_client import GitRepoClient
1414
from codegen.git.repo_operator.repo_operator import RepoOperator
1515
from codegen.git.schemas.enums import FetchResult
16-
from codegen.git.schemas.repo_config import BaseRepoConfig
16+
from codegen.git.schemas.repo_config import RepoConfig
1717
from codegen.git.utils.clone_url import url_to_github
1818
from codegen.git.utils.file_utils import create_files
1919

@@ -31,28 +31,21 @@ class LocalRepoOperator(RepoOperator):
3131
- Creating "fake" repos from a dictionary of files contents
3232
"""
3333

34-
_repo_path: str
35-
_repo_name: str
3634
_git_cli: GitCLI
37-
repo_config: BaseRepoConfig
3835
_github_api_key: str | None
3936
_remote_git_repo: GitRepoClient | None = None
4037

4138
def __init__(
4239
self,
43-
repo_path: str, # full path to the repo
40+
repo_config: RepoConfig,
4441
github_api_key: str | None = None,
45-
repo_config: BaseRepoConfig | None = None,
4642
bot_commit: bool = False,
4743
) -> None:
48-
self._repo_path = repo_path
49-
self._repo_name = os.path.basename(repo_path)
5044
self._github_api_key = github_api_key
5145
self._remote_git_repo = None
46+
super().__init__(repo_config, bot_commit)
5247
os.makedirs(self.repo_path, exist_ok=True)
5348
GitCLI.init(self.repo_path)
54-
repo_config = repo_config or BaseRepoConfig()
55-
super().__init__(repo_config, self.repo_path, bot_commit)
5649

5750
####################################################################################################################
5851
# PROPERTIES
@@ -86,7 +79,7 @@ def remote_git_repo(self) -> GitRepoClient:
8679
# CLASS METHODS
8780
####################################################################################################################
8881
@classmethod
89-
def create_from_files(cls, repo_path: str, files: dict[str, str], bot_commit: bool = True, repo_config: BaseRepoConfig = BaseRepoConfig()) -> "LocalRepoOperator":
82+
def create_from_files(cls, repo_path: str, files: dict[str, str], bot_commit: bool = True) -> "LocalRepoOperator":
9083
"""Used when you want to create a directory from a set of files and then create a LocalRepoOperator that points to that directory.
9184
Use cases:
9285
- Unit testing
@@ -96,14 +89,13 @@ def create_from_files(cls, repo_path: str, files: dict[str, str], bot_commit: bo
9689
Args:
9790
repo_path (str): The path to the directory to create.
9891
files (dict[str, str]): A dictionary of file names and contents to create in the directory.
99-
repo_config (BaseRepoConfig): The configuration of the repo.
10092
"""
10193
# Step 1: Create dir (if not exists) + files
10294
os.makedirs(repo_path, exist_ok=True)
10395
create_files(base_dir=repo_path, files=files)
10496

10597
# Step 2: Init git repo
106-
op = cls(repo_path=repo_path, bot_commit=bot_commit, repo_config=repo_config)
98+
op = cls(repo_config=RepoConfig.from_repo_path(repo_path), bot_commit=bot_commit)
10799
if op.stage_and_commit_all_changes("[Codegen] initial commit"):
108100
op.checkout_branch(None, create_if_missing=True)
109101
return op
@@ -118,7 +110,7 @@ def create_from_commit(cls, repo_path: str, commit: str, url: str, github_api_ke
118110
url (str): Git URL of the repository
119111
github_api_key (str | None): Optional GitHub API key for operations that need GitHub access
120112
"""
121-
op = cls(repo_path=repo_path, bot_commit=False, github_api_key=github_api_key)
113+
op = cls(repo_config=RepoConfig.from_repo_path(repo_path), bot_commit=False, github_api_key=github_api_key)
122114
op.discard_changes()
123115
if op.get_active_branch_or_commit() != commit:
124116
op.create_remote("origin", url)
@@ -149,7 +141,7 @@ def create_from_repo(cls, repo_path: str, url: str, github_api_key: str | None =
149141
remote_head = git_cli.remotes.origin.refs[git_cli.active_branch.name].commit
150142
# If up to date, use existing repo
151143
if local_head.hexsha == remote_head.hexsha:
152-
return cls(repo_path=repo_path, bot_commit=False, github_api_key=github_api_key)
144+
return cls(repo_config=RepoConfig.from_repo_path(repo_path), bot_commit=False, github_api_key=github_api_key)
153145
except Exception:
154146
# If any git operations fail, fallback to fresh clone
155147
pass
@@ -166,20 +158,12 @@ def create_from_repo(cls, repo_path: str, url: str, github_api_key: str | None =
166158
# Initialize with the cloned repo
167159
git_cli = GitCLI(repo_path)
168160

169-
return cls(repo_path=repo_path, bot_commit=False, github_api_key=github_api_key)
161+
return cls(repo_config=RepoConfig.from_repo_path(repo_path), bot_commit=False, github_api_key=github_api_key)
170162

171163
####################################################################################################################
172164
# PROPERTIES
173165
####################################################################################################################
174166

175-
@property
176-
def repo_name(self) -> str:
177-
return self._repo_name
178-
179-
@property
180-
def repo_path(self) -> str:
181-
return self._repo_path
182-
183167
@property
184168
def codeowners_parser(self) -> CodeOwnersParser | None:
185169
return None

src/codegen/git/repo_operator/remote_repo_operator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,12 @@ class RemoteRepoOperator(RepoOperator):
3737
def __init__(
3838
self,
3939
repo_config: RepoConfig,
40-
base_dir: str = "/tmp",
4140
setup_option: SetupOption = SetupOption.PULL_OR_CLONE,
4241
shallow: bool = True,
4342
bot_commit: bool = True,
4443
access_token: str | None = None,
4544
) -> None:
46-
super().__init__(repo_config=repo_config, base_dir=base_dir, bot_commit=bot_commit)
45+
super().__init__(repo_config=repo_config, bot_commit=bot_commit)
4746
self.access_token = access_token
4847
self.setup_repo_dir(setup_option=setup_option, shallow=shallow)
4948

src/codegen/git/repo_operator/repo_operator.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from codegen.git.configs.constants import CODEGEN_BOT_EMAIL, CODEGEN_BOT_NAME
1818
from codegen.git.schemas.enums import CheckoutResult, FetchResult
19-
from codegen.git.schemas.repo_config import BaseRepoConfig
19+
from codegen.git.schemas.repo_config import RepoConfig
2020
from codegen.shared.performance.stopwatch_utils import stopwatch
2121
from codegen.shared.performance.time_utils import humanize_duration
2222

@@ -26,21 +26,20 @@
2626
class RepoOperator(ABC):
2727
"""A wrapper around GitPython to make it easier to interact with a repo."""
2828

29-
repo_config: BaseRepoConfig
29+
repo_config: RepoConfig
3030
base_dir: str
3131
bot_commit: bool = True
3232
_codeowners_parser: CodeOwnersParser | None = None
3333
_default_branch: str | None = None
3434

3535
def __init__(
3636
self,
37-
repo_config: BaseRepoConfig,
38-
base_dir: str = "/tmp",
37+
repo_config: RepoConfig,
3938
bot_commit: bool = True,
4039
) -> None:
4140
assert repo_config is not None
4241
self.repo_config = repo_config
43-
self.base_dir = base_dir
42+
self.base_dir = repo_config.base_dir
4443
self.bot_commit = bot_commit
4544

4645
####################################################################################################################

src/codegen/git/schemas/repo_config.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import base64
22
import logging
3+
import os.path
34

45
from pydantic import BaseModel
56

@@ -8,35 +9,36 @@
89
logger = logging.getLogger(__name__)
910

1011

11-
class BaseRepoConfig(BaseModel):
12-
"""Base version of RepoConfig that does not depend on the db."""
13-
14-
name: str = ""
15-
respect_gitignore: bool = True
16-
17-
1812
class RepoConfig(BaseModel):
1913
"""All the information about the repo needed to build a codebase"""
2014

21-
id: int
2215
name: str
23-
full_name: str
16+
full_name: str | None = None
17+
organization_name: str | None = None
2418
visibility: RepoVisibility | None = None
2519

26-
# Org fields
27-
organization_id: int
28-
organization_name: str
29-
3020
# Codebase fields
31-
base_dir: str = "/tmp"
32-
base_path: str | None = None
21+
base_dir: str = "/tmp" # parent directory of the git repo
22+
base_path: str | None = None # root directory of the codebase within the repo
3323
language: str | None = "PYTHON"
3424
subdirectories: list[str] | None = None
3525
respect_gitignore: bool = True
3626

27+
@property
28+
def repo_path(self) -> str:
29+
return f"{self.base_dir}/{self.name}"
30+
31+
@classmethod
32+
def from_repo_path(cls, repo_path: str, **kwargs) -> "RepoConfig":
33+
name = os.path.basename(repo_path)
34+
base_dir = os.path.dirname(repo_path)
35+
return cls(name=name, base_dir=base_dir, **kwargs)
36+
37+
# TODO: remove
3738
def encoded_json(self):
3839
return base64.b64encode(self.model_dump_json().encode("utf-8")).decode("utf-8")
3940

41+
# TODO: remove, read from shared config instead
4042
@staticmethod
4143
def from_encoded_json(encoded_json: str) -> "RepoConfig":
4244
decoded = base64.b64decode(encoded_json).decode("utf-8")

src/codegen/git/utils/codeowner_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ def create_codeowners_parser_for_repo(py_github_repo: GitRepoClient) -> CodeOwne
3131
return codeowners
3232
except Exception as e:
3333
continue
34-
logger.info(f"Failed to create CODEOWNERS parser for repo: {py_github_repo.repo_config.id}. Returning None.")
34+
logger.info(f"Failed to create CODEOWNERS parser for repo: {py_github_repo.repo_config.name}. Returning None.")
3535
return None
3636

3737

3838
def get_codeowners_for_pull(repo: GitRepoClient, pull: PullRequest) -> list[str]:
3939
codeowners_parser = create_codeowners_parser_for_repo(repo)
4040
if not codeowners_parser:
41-
logger.warning(f"Failed to create codeowners parser for repo: {repo.repo_config.id}. Returning empty list.")
41+
logger.warning(f"Failed to create codeowners parser for repo: {repo.repo_config.name}. Returning empty list.")
4242
return []
4343
codeowners_for_pull_set = set()
4444
pull_files = pull.get_files()

src/codegen/runner/sandbox/runner.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from git import Commit as GitCommit
55

6-
from codegen.git.configs.config import config
76
from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator
87
from codegen.git.schemas.repo_config import RepoConfig
98
from codegen.runner.models.apis import CreateBranchRequest, CreateBranchResponse, GetDiffRequest, GetDiffResponse
@@ -14,6 +13,7 @@
1413
from codegen.sdk.core.codebase import Codebase
1514
from codegen.sdk.enums import ProgrammingLanguage
1615
from codegen.shared.compilation.string_to_code import create_execute_function_from_codeblock
16+
from codegen.shared.configs.config import config
1717
from codegen.shared.performance.stopwatch_utils import stopwatch
1818

1919
logger = logging.getLogger(__name__)
@@ -36,12 +36,12 @@ def __init__(
3636
repo_config: RepoConfig,
3737
) -> None:
3838
self.repo = repo_config
39-
self.op = RemoteRepoOperator(repo_config=repo_config, base_dir=repo_config.base_dir, access_token=config.GITHUB_TOKEN)
39+
self.op = RemoteRepoOperator(repo_config=repo_config, access_token=config.secrets.github_token)
4040
self.commit = self.op.git_cli.head.commit
4141

4242
async def warmup(self) -> None:
4343
"""Warms up this runner by cloning the repo and parsing the graph."""
44-
logger.info(f"===== Warming runner for {self.repo.full_name} (ID={self.repo.id}) =====")
44+
logger.info(f"===== Warming runner for {self.repo.full_name or self.repo.name} =====")
4545
sys.setrecursionlimit(10000) # for graph parsing
4646

4747
self.codebase = await self._build_graph()

src/codegen/sdk/code_generation/current_code_codebase.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import TypedDict
66

77
from codegen.git.repo_operator.local_repo_operator import LocalRepoOperator
8-
from codegen.git.schemas.repo_config import BaseRepoConfig
8+
from codegen.git.schemas.repo_config import RepoConfig
99
from codegen.sdk.codebase.config import CodebaseConfig, DefaultConfig, ProjectConfig
1010
from codegen.sdk.core.codebase import Codebase, CodebaseType
1111
from codegen.sdk.enums import ProgrammingLanguage
@@ -35,10 +35,14 @@ def get_codegen_codebase_base_path() -> str:
3535
def get_current_code_codebase(config: CodebaseConfig = DefaultConfig, subdirectories: list[str] | None = None) -> CodebaseType:
3636
"""Returns a Codebase for the code that is *currently running* (i.e. the Codegen repo)"""
3737
codegen_repo_path = get_graphsitter_repo_path()
38-
logger.info(f"Creating codebase from repo at: {codegen_repo_path} with base_path {get_codegen_codebase_base_path()}")
39-
op = LocalRepoOperator(repo_path=codegen_repo_path, bot_commit=False, repo_config=BaseRepoConfig(respect_gitignore=False))
40-
config = config.model_copy(update={"base_path": get_codegen_codebase_base_path()})
41-
projects = [ProjectConfig(repo_operator=op, programming_language=ProgrammingLanguage.PYTHON, subdirectories=subdirectories, base_path=get_codegen_codebase_base_path())]
38+
base_dir = get_codegen_codebase_base_path()
39+
logger.info(f"Creating codebase from repo at: {codegen_repo_path} with base_path {base_dir}")
40+
41+
repo_config = RepoConfig.from_repo_path(codegen_repo_path, respect_gitignore=False)
42+
op = LocalRepoOperator(repo_config=repo_config, bot_commit=False)
43+
44+
config = config.model_copy(update={"base_path": base_dir})
45+
projects = [ProjectConfig(repo_operator=op, programming_language=ProgrammingLanguage.PYTHON, subdirectories=subdirectories, base_path=base_dir)]
4246
codebase = Codebase(projects=projects, config=config)
4347
return codebase
4448

0 commit comments

Comments
 (0)