Skip to content

Add package.json-based repo language detection #186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 44 additions & 9 deletions src/codegen/sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections.abc import Iterable
from contextlib import contextmanager
from pathlib import Path
from typing import Literal
from xml.dom.minidom import parseString

import dicttoxml
Expand Down Expand Up @@ -242,25 +243,59 @@ def get_language_file_extensions(language: ProgrammingLanguage):
return set(TSFile.get_extensions())


def determine_project_language(folder_path: str):
from codegen.sdk.python import PyFile
from codegen.sdk.typescript.file import TSFile
def determine_project_language(folder_path: str, strategy: Literal["most_common", "package_json"] = "package_json") -> ProgrammingLanguage:
"""Determines the primary programming language of a project.

EXTENSIONS = {
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
}
Args:
folder_path (str): Path to the folder to analyze
strategy (Literal["most_common", "package_json"]): Strategy to use for determining language.
"most_common" analyzes file extensions, "package_json" checks for package.json presence.

Returns:
ProgrammingLanguage: The determined programming language
"""
# TODO: Create a new strategy that follows gitignore
if strategy == "most_common":
return _determine_language_by_file_count(folder_path)
elif strategy == "package_json":
return _determine_language_by_package_json(folder_path)


def _determine_language_by_package_json(folder_path: str) -> ProgrammingLanguage:
"""Determines project language by checking for presence of package.json.
Faster but less accurate than file count strategy.

Args:
folder_path (str): Path to the folder to analyze

Returns:
ProgrammingLanguage: TYPESCRIPT if package.json exists, otherwise PYTHON
"""
Analyzes a folder to determine the primary programming language based on file extensions.
package_json_path = Path(folder_path) / "package.json"
if package_json_path.exists():
return ProgrammingLanguage.TYPESCRIPT
else:
return ProgrammingLanguage.PYTHON


def _determine_language_by_file_count(folder_path: str) -> ProgrammingLanguage:
"""Analyzes a folder to determine the primary programming language based on file extensions.
Returns the language with the most matching files.

Args:
folder_path (str): Path to the folder to analyze

Returns:
Optional[ProgrammingLanguage]: The dominant programming language, or None if no matching files found
ProgrammingLanguage: The dominant programming language, or UNSUPPORTED if no matching files found
"""
from codegen.sdk.python import PyFile
from codegen.sdk.typescript.file import TSFile

EXTENSIONS = {
ProgrammingLanguage.PYTHON: PyFile.get_extensions(),
ProgrammingLanguage.TYPESCRIPT: TSFile.get_extensions(),
}

folder = Path(folder_path)
if not folder.exists() or not folder.is_dir():
msg = f"Invalid folder path: {folder_path}"
Expand Down