Skip to content

test: switch rest of language of detection to parameters #683

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 27, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 60 additions & 42 deletions tests/unit/codegen/git/utils/test_language_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,56 @@
from codegen.shared.enums.programming_language import ProgrammingLanguage


def test_determine_language_python(tmpdir) -> None:
@pytest.mark.parametrize(
"strategy, expected_language",
[
("package_json", ProgrammingLanguage.PYTHON), # Check for package.json -> False, therefore return PYTHON
("git_most_common", ProgrammingLanguage.PYTHON), # Check for git_most_common -> PYTHON
("most_common", ProgrammingLanguage.PYTHON), # Check for most_common -> PYTHON
],
)
def test_determine_language_python(tmpdir, strategy, expected_language) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "", "file2.py": "", "file3.py": ""}, programming_language=ProgrammingLanguage.PYTHON) as codebase:
# Check for package.json -> False, therefore return PYTHON
assert determine_project_language(tmpdir, strategy="package_json") == ProgrammingLanguage.PYTHON
# Check for git_most_common -> PYTHON
assert determine_project_language(tmpdir, strategy="git_most_common") == ProgrammingLanguage.PYTHON
# Check for most_common -> PYTHON
assert determine_project_language(tmpdir, strategy="most_common") == ProgrammingLanguage.PYTHON
assert determine_project_language(tmpdir, strategy=strategy) == expected_language


def test_determine_language_typescript(tmpdir) -> None:
@pytest.mark.parametrize(
"strategy, expected_language",
[
("package_json", ProgrammingLanguage.PYTHON), # Check for package.json -> False, therefore return PYTHON (THIS IS EXPECTED, even if it's a TS project)
("git_most_common", ProgrammingLanguage.TYPESCRIPT), # Check for git_most_common -> TYPESCRIPT
("most_common", ProgrammingLanguage.TYPESCRIPT), # Check for most_common -> TYPESCRIPT
],
)
def test_determine_language_typescript(tmpdir, strategy, expected_language) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.ts": "", "file2.ts": "", "file3.ts": ""}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
# Check for package.json -> False, therefore return PYTHON (THIS IS EXPECTED, even if it's a TS project)
assert determine_project_language(tmpdir, strategy="package_json") == ProgrammingLanguage.PYTHON
# Check for git_most_common -> TYPESCRIPT
assert determine_project_language(tmpdir, strategy="git_most_common") == ProgrammingLanguage.TYPESCRIPT
# Check for most_common -> TYPESCRIPT
assert determine_project_language(tmpdir, strategy="most_common") == ProgrammingLanguage.TYPESCRIPT
assert determine_project_language(tmpdir, strategy=strategy) == expected_language


def test_determine_language_other(tmpdir) -> None:
@pytest.mark.parametrize(
"strategy, expected_language",
[
("package_json", ProgrammingLanguage.PYTHON), # Check for package.json -> False, therefore return PYTHON (THIS IS EXPECTED)
("git_most_common", ProgrammingLanguage.OTHER), # Check for git_most_common -> OTHER
("most_common", ProgrammingLanguage.OTHER), # Check for most_common -> OTHER
],
)
def test_determine_language_other(tmpdir, strategy, expected_language) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file1.txt": "", "file2.txt": "", "file3.txt": ""}, programming_language=ProgrammingLanguage.OTHER) as codebase:
# Check for package.json -> False, therefore return PYTHON (THIS IS EXPECTED)
assert determine_project_language(tmpdir, strategy="package_json") == ProgrammingLanguage.PYTHON
# Check for git_most_common -> OTHER
assert determine_project_language(tmpdir, strategy="git_most_common") == ProgrammingLanguage.OTHER
# Check for most_common -> OTHER
assert determine_project_language(tmpdir, strategy="most_common") == ProgrammingLanguage.OTHER
assert determine_project_language(tmpdir, strategy=strategy) == expected_language


def test_determine_language_package_json(tmpdir) -> None:
@pytest.mark.parametrize(
"strategy, expected_language",
[
("package_json", ProgrammingLanguage.TYPESCRIPT), # Check for package.json -> True, therefore return Typescript
("git_most_common", ProgrammingLanguage.OTHER), # Check for git_most_common -> OTHER
("most_common", ProgrammingLanguage.OTHER), # Check for most_common -> OTHER
],
)
def test_determine_language_package_json(tmpdir, strategy, expected_language) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"package.json": ""}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
# Check for package.json -> True, therefore return Typescript
assert determine_project_language(tmpdir, strategy="package_json") == ProgrammingLanguage.TYPESCRIPT
# Check for git_most_common -> OTHER
assert determine_project_language(tmpdir, strategy="git_most_common") == ProgrammingLanguage.OTHER
# Check for most_common -> OTHER
assert determine_project_language(tmpdir, strategy="most_common") == ProgrammingLanguage.OTHER
assert determine_project_language(tmpdir, strategy=strategy) == expected_language


@pytest.mark.parametrize(
Expand All @@ -67,21 +79,27 @@ def test_determine_language_mixed(tmpdir, strategy, expected_language) -> None:
assert determine_project_language(tmpdir, strategy=strategy) == expected_language


def test_determine_language_threshold(tmpdir) -> None:
@pytest.mark.parametrize(
"strategy, expected_language",
[
("package_json", ProgrammingLanguage.PYTHON), # Check for package.json -> False, therefore return PYTHON
("git_most_common", ProgrammingLanguage.OTHER), # Check for git_most_common -> OTHER
("most_common", ProgrammingLanguage.OTHER), # Check for most_common -> OTHER
],
)
def test_determine_language_threshold(tmpdir, strategy, expected_language) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"file0.py": ""} | {f"file{i}.txt": "" for i in range(1, 20)}, programming_language=ProgrammingLanguage.PYTHON) as codebase:
# Check for package.json -> False, therefore return PYTHON
assert determine_project_language(tmpdir, strategy="package_json") == ProgrammingLanguage.PYTHON
# Check for git_most_common -> OTHER
assert determine_project_language(tmpdir, strategy="git_most_common") == ProgrammingLanguage.OTHER
# Check for most_common -> OTHER
assert determine_project_language(tmpdir, strategy="most_common") == ProgrammingLanguage.OTHER
assert determine_project_language(tmpdir, strategy=strategy) == expected_language


def test_determine_language_gitignore(tmpdir) -> None:
@pytest.mark.parametrize(
"strategy, expected_language",
[
("package_json", ProgrammingLanguage.PYTHON), # Check for package.json -> False, therefore return PYTHON
("git_most_common", ProgrammingLanguage.OTHER), # Check for git_most_common -> OTHER (follows gitignore, therefore finds no files)
("most_common", ProgrammingLanguage.PYTHON), # Check for most_common -> PYTHON (ignores gitignore)
],
)
def test_determine_language_gitignore(tmpdir, strategy, expected_language) -> None:
with get_codebase_session(tmpdir=tmpdir, files={"dir/file1.py": "", "dir/file2.py": "", "dir/file3.py": "", ".gitignore": "dir"}, programming_language=ProgrammingLanguage.PYTHON) as codebase:
# Check for package.json -> False, therefore return PYTHON
assert determine_project_language(tmpdir, strategy="package_json") == ProgrammingLanguage.PYTHON
# Check for git_most_common -> OTHER (follows gitignore, therefore finds no files)
assert determine_project_language(tmpdir, strategy="git_most_common") == ProgrammingLanguage.OTHER
# Check for most_common -> PYTHON (ignores gitignore)
assert determine_project_language(tmpdir, strategy="most_common") == ProgrammingLanguage.PYTHON
assert determine_project_language(tmpdir, strategy=strategy) == expected_language
Loading