Skip to content

Tawsif add support for codebase exports #117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 27, 2025
36 changes: 33 additions & 3 deletions src/codegen/sdk/core/codebase.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from codegen.sdk.core.detached_symbols.code_block import CodeBlock
from codegen.sdk.core.detached_symbols.parameter import Parameter
from codegen.sdk.core.directory import Directory
from codegen.sdk.core.export import Export
from codegen.sdk.core.external_module import ExternalModule
from codegen.sdk.core.file import File, SourceFile
from codegen.sdk.core.function import Function
Expand All @@ -59,19 +60,22 @@
from codegen.sdk.python.file import PyFile
from codegen.sdk.python.function import PyFunction
from codegen.sdk.python.import_resolution import PyImport
from codegen.sdk.python.statements.import_statement import PyImportStatement
from codegen.sdk.python.symbol import PySymbol
from codegen.sdk.typescript.assignment import TSAssignment
from codegen.sdk.typescript.class_definition import TSClass
from codegen.sdk.typescript.detached_symbols.code_block import TSCodeBlock
from codegen.sdk.typescript.detached_symbols.parameter import TSParameter
from codegen.sdk.typescript.export import TSExport
from codegen.sdk.typescript.file import TSFile
from codegen.sdk.typescript.function import TSFunction
from codegen.sdk.typescript.import_resolution import TSImport
from codegen.sdk.typescript.interface import TSInterface
from codegen.sdk.typescript.statements.import_statement import TSImportStatement
from codegen.sdk.typescript.symbol import TSSymbol
from codegen.sdk.typescript.type_alias import TSTypeAlias
from codegen.sdk.utils import determine_project_language
from codegen.shared.decorators.docs import apidoc, noapidoc
from codegen.shared.decorators.docs import apidoc, noapidoc, py_noapidoc
from codegen.shared.exceptions.control_flow import MaxAIRequestsError
from codegen.shared.performance.stopwatch_utils import stopwatch
from codegen.visualizations.visualization_manager import VisualizationManager
Expand All @@ -91,6 +95,11 @@
TTypeAlias = TypeVar("TTypeAlias", bound="TypeAlias")
TParameter = TypeVar("TParameter", bound="Parameter")
TCodeBlock = TypeVar("TCodeBlock", bound="CodeBlock")
TExport = TypeVar("TExport", bound="Export")
TSGlobalVar = TypeVar("TSGlobalVar", bound="Assignment")
PyGlobalVar = TypeVar("PyGlobalVar", bound="Assignment")
TSDirectory = Directory[TSFile, TSSymbol, TSImportStatement, TSGlobalVar, TSClass, TSFunction, TSImport]
PyDirectory = Directory[PyFile, PySymbol, PyImportStatement, PyGlobalVar, PyClass, PyFunction, PyImport]


@apidoc
Expand Down Expand Up @@ -263,6 +272,27 @@ def imports(self) -> list[TImport]:
"""
return self.G.get_nodes(NodeType.IMPORT)

@property
@py_noapidoc
def exports(self: "TSCodebaseType") -> list[TSExport]:
"""Returns a list of all Export nodes in the codebase.

Retrieves all Export nodes from the codebase graph. These exports represent all export statements across all files in the codebase,
including exports from both internal modules and external packages. This is a TypeScript-only codebase property.

Args:
None

Returns:
list[TSExport]: A list of Export nodes representing all exports in the codebase.
TExport can only be a TSExport for TypeScript codebases.

"""
if self.language == ProgrammingLanguage.PYTHON:
raise NotImplementedError("Exports are not supported for Python codebases since Python does not have an export mechanism.")

return self.G.get_nodes(NodeType.EXPORT)

@property
def external_modules(self) -> list[ExternalModule]:
"""Returns a list of all external modules in the codebase.
Expand Down Expand Up @@ -1145,5 +1175,5 @@ def from_repo(cls, repo_name: str, *, tmp_dir: str | None = None, commit: str |
# The last 2 lines of code are added to the runner. See codegen-backend/cli/generate/utils.py
# Type Aliases
CodebaseType = Codebase[SourceFile, Directory, Symbol, Class, Function, Import, Assignment, Interface, TypeAlias, Parameter, CodeBlock]
PyCodebaseType = Codebase[PyFile, Directory, PySymbol, PyClass, PyFunction, PyImport, PyAssignment, Interface, TypeAlias, PyParameter, PyCodeBlock]
TSCodebaseType = Codebase[TSFile, Directory, TSSymbol, TSClass, TSFunction, TSImport, TSAssignment, TSInterface, TSTypeAlias, TSParameter, TSCodeBlock]
PyCodebaseType = Codebase[PyFile, PyDirectory, PySymbol, PyClass, PyFunction, PyImport, PyAssignment, Interface, TypeAlias, PyParameter, PyCodeBlock]
TSCodebaseType = Codebase[TSFile, TSDirectory, TSSymbol, TSClass, TSFunction, TSImport, TSAssignment, TSInterface, TSTypeAlias, TSParameter, TSCodeBlock]
36 changes: 33 additions & 3 deletions src/codegen/sdk/core/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,22 @@
from pathlib import Path
from typing import TYPE_CHECKING, Generic, Self, TypeVar

from codegen.shared.decorators.docs import apidoc
from codegen.shared.decorators.docs import apidoc, py_noapidoc

if TYPE_CHECKING:
from codegen.sdk.core.assignment import Assignment
from codegen.sdk.core.class_definition import Class
from codegen.sdk.core.file import File
from codegen.sdk.core.function import Function
from codegen.sdk.core.import_resolution import ImportStatement
from codegen.sdk.core.import_resolution import Import, ImportStatement
from codegen.sdk.core.symbol import Symbol
from codegen.sdk.typescript.class_definition import TSClass
from codegen.sdk.typescript.export import TSExport
from codegen.sdk.typescript.file import TSFile
from codegen.sdk.typescript.function import TSFunction
from codegen.sdk.typescript.import_resolution import TSImport
from codegen.sdk.typescript.statements.import_statement import TSImportStatement
from codegen.sdk.typescript.symbol import TSSymbol

import logging

Expand All @@ -24,10 +31,13 @@
TGlobalVar = TypeVar("TGlobalVar", bound="Assignment")
TClass = TypeVar("TClass", bound="Class")
TFunction = TypeVar("TFunction", bound="Function")
TImport = TypeVar("TImport", bound="Import")

TSGlobalVar = TypeVar("TSGlobalVar", bound="Assignment")


@apidoc
class Directory(Generic[TFile, TSymbol, TImportStatement, TGlobalVar, TClass, TFunction]):
class Directory(Generic[TFile, TSymbol, TImportStatement, TGlobalVar, TClass, TFunction, TImport]):
"""Directory representation for codebase.
GraphSitter abstraction of a file directory that can be used to look for files and symbols within a specific directory.
"""
Expand Down Expand Up @@ -133,6 +143,17 @@ def functions(self) -> list[TFunction]:
"""Get a recursive list of all functions in the directory and its subdirectories."""
return list(chain.from_iterable(f.functions for f in self.files))

@property
@py_noapidoc
def exports(self: "Directory[TSFile, TSSymbol, TSImportStatement, TSGlobalVar, TSClass, TSFunction, TSImport]") -> "list[TSExport]":
"""Get a recursive list of all exports in the directory and its subdirectories."""
return list(chain.from_iterable(f.exports for f in self.files))

@property
def imports(self) -> list[TImport]:
"""Get a recursive list of all imports in the directory and its subdirectories."""
return list(chain.from_iterable(f.imports for f in self.files))

def get_symbol(self, name: str) -> TSymbol | None:
"""Get a symbol by name in the directory and its subdirectories."""
return next((s for s in self.symbols if s.name == name), None)
Expand Down Expand Up @@ -176,6 +197,15 @@ def get_file(self, filename: str, ignore_case: bool = False) -> TFile | None:
return next((f for name, f in self.items.items() if name.lower() == filename.lower() and isinstance(f, File)), None)
return self.items.get(filename, None)

@py_noapidoc
def get_export(self: "Directory[TSFile, TSSymbol, TSImportStatement, TSGlobalVar, TSClass, TSFunction, TSImport]", name: str) -> "TSExport | None":
"""Get an export by name in the directory and its subdirectories (supports only typescript)."""
return next((s for s in self.exports if s.name == name), None)

def get_import(self, name: str) -> TImport | None:
"""Get an import by name in the directory and its subdirectories."""
return next((s for s in self.imports if s.name == name), None)

def add_subdirectory(self, subdirectory: Self) -> None:
"""Add a subdirectory to the directory."""
rel_path = os.path.relpath(subdirectory.dirpath, self.dirpath)
Expand Down
14 changes: 14 additions & 0 deletions src/codegen/shared/decorators/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ def noapidoc(obj: T) -> T:
return obj


py_no_apidoc_objects: list[DocumentedObject] = []
py_no_apidoc_signatures: set[str] = set()


def py_noapidoc(obj: T) -> T:
"""Decorator for things that are hidden from the Python API documentation for AI-agent prompts."""
obj._py_apidoc = False
obj._api_doc_lang = "python"
if doc_obj := get_documented_object(obj):
bisect.insort(py_no_apidoc_objects, doc_obj)
py_no_apidoc_signatures.add(doc_obj.signature())
return obj


def get_documented_object(obj) -> DocumentedObject | None:
module = inspect.getmodule(obj)
module_name = module.__name__ if module else ""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest

from codegen.sdk.codebase.factory.get_session import get_codebase_session
from codegen.sdk.enums import ProgrammingLanguage


def test_python_exports_not_supported(tmpdir):
"""Test that exports are not supported in Python codebases."""
# language=python
content = """
def hello():
pass
"""
# Create a Python codebase with a simple Python file
with get_codebase_session(tmpdir=tmpdir, files={"test.py": content}, programming_language=ProgrammingLanguage.PYTHON) as codebase:
# Verify that accessing exports raises NotImplementedError
with pytest.raises(NotImplementedError):
_ = codebase.exports
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from codegen.sdk.codebase.factory.get_session import get_codebase_session
from codegen.sdk.enums import ProgrammingLanguage


def test_codebase_exports(tmpdir) -> None:
# language=typescript
content = """
export const a = 1;
export let b = 2;
export var c = 3;
export function foo() {}
export class Bar {}
export interface IFoo {}
export type MyType = string;
export { foo as default };
"""
with get_codebase_session(tmpdir=tmpdir, files={"file.ts": content}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
assert len(codebase.exports) == 8
export_names = {exp.name for exp in codebase.exports}
assert export_names == {"a", "b", "c", "foo", "Bar", "IFoo", "MyType", "default"}


def test_codebase_reexports(tmpdir) -> None:
# language=typescript
content1 = """
export const x = 1;
export const y = 2;
"""
content2 = """
export { x } from './file1';
export { y as z } from './file1';
"""
with get_codebase_session(tmpdir=tmpdir, files={"file1.ts": content1, "file2.ts": content2}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
assert len(codebase.exports) == 4
export_names = {exp.name for exp in codebase.exports}
assert export_names == {"x", "y", "z"}


def test_codebase_default_exports(tmpdir) -> None:
# language=typescript
content = """
const value = 42;
export default value;
"""
with get_codebase_session(tmpdir=tmpdir, files={"file.ts": content}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
assert len(codebase.exports) == 1
export = codebase.exports[0]
assert export.name == "value"
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from codegen.sdk.codebase.factory.get_session import get_codebase_session
from codegen.sdk.enums import ProgrammingLanguage


def test_codebase_exports(tmpdir) -> None:
# language=typescript
content = """
export const a = 1;
export let b = 2;
export var c = 3;
export function foo() {}
export class Bar {}
export interface IFoo {}
export type MyType = string;
export { foo as default };
"""
with get_codebase_session(tmpdir=tmpdir, files={"file.ts": content}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
assert len(codebase.exports) == 8
export_names = {exp.name for exp in codebase.exports}
assert export_names == {"a", "b", "c", "foo", "Bar", "IFoo", "MyType", "default"}


def test_codebase_reexports(tmpdir) -> None:
# language=typescript
content1 = """
export const x = 1;
export const y = 2;
"""
content2 = """
export { x } from './file1';
export { y as z } from './file1';
"""
with get_codebase_session(tmpdir=tmpdir, files={"file1.ts": content1, "file2.ts": content2}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
assert len(codebase.exports) == 4
export_names = {exp.name for exp in codebase.exports}
assert export_names == {"x", "y", "z"}


def test_codebase_default_exports(tmpdir) -> None:
# language=typescript
content = """
const value = 42;
export default value;
"""
with get_codebase_session(tmpdir=tmpdir, files={"file.ts": content}, programming_language=ProgrammingLanguage.TYPESCRIPT) as codebase:
assert len(codebase.exports) == 1
export = codebase.exports[0]
assert export.name == "value"
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from codegen.sdk.codebase.factory.get_session import get_codebase_session
from codegen.sdk.enums import ProgrammingLanguage


def test_directory_imports(tmpdir) -> None:
# language=typescript
content1 = """
import { a, b } from '../shared';
import type { IFoo } from './types';
"""
content2 = """
import { c } from '../shared';
import defaultExport from './module';
"""
with get_codebase_session(
tmpdir=tmpdir, files={"dir1/file1.ts": content1, "dir1/file2.ts": content2, "dir2/file3.ts": "import { d } from '../shared';"}, programming_language=ProgrammingLanguage.TYPESCRIPT
) as codebase:
dir1 = codebase.get_directory("dir1")
dir2 = codebase.get_directory("dir2")

# Test dir1 imports
assert len(dir1.imports) == 5
dir1_import_names = {imp.name for imp in dir1.imports}
assert dir1_import_names == {"a", "b", "IFoo", "c", "defaultExport"}

# Test dir2 imports
assert len(dir2.imports) == 1
assert dir2.imports[0].name == "d"

# Test get_import method
assert dir1.get_import("a") is not None
assert dir1.get_import("d") is None
assert dir2.get_import("d") is not None


def test_directory_nested_imports(tmpdir) -> None:
# language=typescript
content1 = """
import { a } from './module1';
"""
content2 = """
import { b } from '../module2';
"""
content3 = """
import { c } from '../../module3';
"""
with get_codebase_session(
tmpdir=tmpdir, files={"dir1/file1.ts": content1, "dir1/subdir/file2.ts": content2, "dir1/subdir/deepdir/file3.ts": content3}, programming_language=ProgrammingLanguage.TYPESCRIPT
) as codebase:
dir1 = codebase.get_directory("dir1")
subdir = codebase.get_directory("dir1/subdir")
deepdir = codebase.get_directory("dir1/subdir/deepdir")

# Test imports at each directory level
assert len(dir1.imports) == 3 # Should include all nested imports
assert len(subdir.imports) == 2 # Should include its own and deeper imports
assert len(deepdir.imports) == 1 # Should only include its own imports
Loading