Skip to content

Add node_modules to GLOBAL_FILE_IGNORE_LIST & Fix "git_most_common" to follow GLOBAL_FILE_IGNORE_LIST #579

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/codegen/git/repo_operator/repo_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
else:
os.makedirs(self.repo_path, exist_ok=True)
GitCLI.init(self.repo_path)
self._local_git_repo = LocalGitRepo(repo_path=repo_config.repo_path)

Check failure on line 74 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument "repo_path" to "LocalGitRepo" has incompatible type "str"; expected "Path" [arg-type]

if repo_config.full_name is None:
repo_config.full_name = self._local_git_repo.full_name
Expand Down Expand Up @@ -143,7 +143,7 @@
email_level = None
levels = ["system", "global", "user", "repository"]
for level in levels:
with git_cli.config_reader(level) as reader:

Check failure on line 146 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "config_reader" of "Repo" has incompatible type "str"; expected "Literal['system', 'global', 'user', 'repository'] | None" [arg-type]
if reader.has_option("user", "name") and not username:
username = reader.get("user", "name")
user_level = level
Expand Down Expand Up @@ -555,7 +555,7 @@
return content
except UnicodeDecodeError:
print(f"Warning: Unable to decode file {file_path}. Skipping.")
return None

Check failure on line 558 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "None", expected "str") [return-value]

def write_file(self, relpath: str, content: str) -> None:
"""Writes file content to disk"""
Expand Down Expand Up @@ -622,7 +622,7 @@
filepaths = self.get_filepaths_for_repo(ignore_list)
# Iterate through files and yield contents
for rel_filepath in filepaths:
rel_filepath: str

Check failure on line 625 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 624 [no-redef]
filepath = os.path.join(self.repo_path, rel_filepath)

# Filter by subdirectory (includes full filenames)
Expand All @@ -634,7 +634,7 @@
content = self.get_file(filepath)
yield rel_filepath, content
except Exception as e:
print(f"Error reading file {filepath}: {e}")
logger.warning(f"Error reading file {filepath}: {e}")

def list_files(self, subdirs: list[str] | None = None, extensions: list[str] | None = None) -> list[str]:
"""List files matching subdirs + extensions in a repo.
Expand All @@ -653,7 +653,7 @@
list_files = []

for rel_filepath in self.git_cli.git.ls_files().split("\n"):
rel_filepath: str

Check failure on line 656 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 655 [no-redef]
if subdirs and not any(d in rel_filepath for d in subdirs):
continue
if extensions is None or any(rel_filepath.endswith(e) for e in extensions):
Expand All @@ -677,7 +677,7 @@

def get_modified_files_in_last_n_days(self, days: int = 1) -> tuple[list[str], list[str]]:
"""Returns a list of files modified and deleted in the last n days"""
modified_files = []

Check failure on line 680 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "modified_files" (hint: "modified_files: list[<type>] = ...") [var-annotated]
deleted_files = []
allowed_extensions = [".py"]

Expand All @@ -693,9 +693,9 @@
if file in modified_files:
modified_files.remove(file)
else:
if file not in modified_files and file[-3:] in allowed_extensions:

Check failure on line 696 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Value of type "str | PathLike[str]" is not indexable [index]
modified_files.append(file)
return modified_files, deleted_files

Check failure on line 698 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "tuple[list[str | PathLike[str]], list[str | PathLike[str]]]", expected "tuple[list[str], list[str]]") [return-value]

@cached_property
def base_url(self) -> str | None:
Expand Down
3 changes: 2 additions & 1 deletion src/codegen/git/utils/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
raise ValueError(msg)

# Initialize counters for each language
language_counts = Counter()

Check failure on line 63 in src/codegen/git/utils/language.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "language_counts" [var-annotated]
total_files = 0

# Walk through the directory
Expand Down Expand Up @@ -109,6 +109,7 @@
"""
from codegen.git.repo_operator.repo_operator import RepoOperator
from codegen.git.schemas.repo_config import RepoConfig
from codegen.sdk.codebase.codebase_context import GLOBAL_FILE_IGNORE_LIST
from codegen.sdk.python import PyFile
from codegen.sdk.typescript.file import TSFile

Expand All @@ -123,7 +124,7 @@
raise ValueError(msg)

# Initialize counters for each language
language_counts = Counter()

Check failure on line 127 in src/codegen/git/utils/language.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "language_counts" [var-annotated]
total_files = 0

# Initiate RepoOperator
Expand All @@ -132,7 +133,7 @@
repo_operator = RepoOperator(repo_config=repo_config)

# Walk through the directory
for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None):
for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None, ignore_list=GLOBAL_FILE_IGNORE_LIST):
# Convert to Path object
file_path = Path(git_root) / Path(rel_path)

Expand Down
11 changes: 10 additions & 1 deletion src/codegen/sdk/codebase/codebase_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,16 @@


# src/vs/platform/contextview/browser/contextMenuService.ts is ignored as there is a parsing error with tree-sitter
GLOBAL_FILE_IGNORE_LIST = [".git/*", ".yarn/releases/*", ".*/tests/static/chunk-.*.js", ".*/ace/.*.js", "src/vs/platform/contextview/browser/contextMenuService.ts"]
GLOBAL_FILE_IGNORE_LIST = [
".git/*",
"*/.git/*",
"node_modules/*",
"*/node_modules/*",
".yarn/releases/*",
".*/tests/static/chunk-.*.js",
".*/ace/.*.js",
"src/vs/platform/contextview/browser/contextMenuService.ts",
]


@unique
Expand Down