Skip to content

Import Parsing #664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/codegen/sdk/codebase/node_classes/node_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from codegen.sdk.core.file import SourceFile
from codegen.sdk.core.function import Function
from codegen.sdk.core.import_resolution import Import
from codegen.sdk.core.interfaces.editable import Editable
from codegen.sdk.core.statements.comment import Comment
from codegen.sdk.core.symbol import Symbol

Expand All @@ -33,7 +34,7 @@ class NodeClasses:
function_call_cls: type[FunctionCall]
comment_cls: type[Comment]
bool_conversion: dict[bool, str]
dynamic_import_parent_types: set[str]
dynamic_import_parent_types: set[type[Editable]]
symbol_map: dict[str, type[Symbol]] = field(default_factory=dict)
expression_map: dict[str, type[Expression]] = field(default_factory=dict)
type_map: dict[str, type[Type] | dict[str, type[Type]]] = field(default_factory=dict)
Expand Down
27 changes: 16 additions & 11 deletions src/codegen/sdk/codebase/node_classes/py_node_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
from codegen.sdk.core.expressions.subscript_expression import SubscriptExpression
from codegen.sdk.core.expressions.unary_expression import UnaryExpression
from codegen.sdk.core.expressions.unpack import Unpack
from codegen.sdk.core.function import Function
from codegen.sdk.core.statements.comment import Comment
from codegen.sdk.core.statements.for_loop_statement import ForLoopStatement
from codegen.sdk.core.statements.if_block_statement import IfBlockStatement
from codegen.sdk.core.statements.switch_statement import SwitchStatement
from codegen.sdk.core.statements.try_catch_statement import TryCatchStatement
from codegen.sdk.core.statements.while_statement import WhileStatement
from codegen.sdk.core.symbol_groups.dict import Dict
from codegen.sdk.core.symbol_groups.list import List
from codegen.sdk.core.symbol_groups.tuple import Tuple
Expand All @@ -29,6 +35,8 @@
from codegen.sdk.python.expressions.string import PyString
from codegen.sdk.python.expressions.union_type import PyUnionType
from codegen.sdk.python.statements.import_statement import PyImportStatement
from codegen.sdk.python.statements.match_case import PyMatchCase
from codegen.sdk.python.statements.with_statement import WithStatement


def parse_subscript(node: TSNode, file_node_id, ctx, parent):
Expand Down Expand Up @@ -110,16 +118,13 @@ def parse_subscript(node: TSNode, file_node_id, ctx, parent):
False: "False",
},
dynamic_import_parent_types={
"function_definition",
"if_statement",
"try_statement",
"with_statement",
"else_clause",
"for_statement",
"except_clause",
"while_statement",
"match_statement",
"case_clause",
"finally_clause",
Function,
IfBlockStatement,
TryCatchStatement,
WithStatement,
ForLoopStatement,
WhileStatement,
SwitchStatement,
PyMatchCase,
},
)
27 changes: 14 additions & 13 deletions src/codegen/sdk/codebase/node_classes/ts_node_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@
from codegen.sdk.core.expressions.unary_expression import UnaryExpression
from codegen.sdk.core.expressions.unpack import Unpack
from codegen.sdk.core.expressions.value import Value
from codegen.sdk.core.function import Function
from codegen.sdk.core.statements.comment import Comment
from codegen.sdk.core.statements.for_loop_statement import ForLoopStatement
from codegen.sdk.core.statements.if_block_statement import IfBlockStatement
from codegen.sdk.core.statements.switch_case import SwitchCase
from codegen.sdk.core.statements.switch_statement import SwitchStatement
from codegen.sdk.core.statements.try_catch_statement import TryCatchStatement
from codegen.sdk.core.statements.while_statement import WhileStatement
from codegen.sdk.core.symbol_groups.list import List
from codegen.sdk.core.symbol_groups.type_parameters import TypeParameters
from codegen.sdk.typescript.class_definition import TSClass
Expand Down Expand Up @@ -166,18 +173,12 @@ def parse_new(node: TSNode, *args):
False: "false",
},
dynamic_import_parent_types={
"function_declaration",
"method_definition",
"arrow_function",
"if_statement",
"try_statement",
"else_clause",
"catch_clause",
"finally_clause",
"while_statement",
"for_statement",
"do_statement",
"switch_case",
"switch_statement",
Function,
IfBlockStatement,
TryCatchStatement,
ForLoopStatement,
WhileStatement,
SwitchStatement,
SwitchCase,
},
)
5 changes: 0 additions & 5 deletions src/codegen/sdk/core/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,15 +459,10 @@ def parse(self, ctx: CodebaseContext) -> None:
self.code_block = self._parse_code_block(self.ts_node)

self.code_block.parse()
self._parse_imports()
# We need to clear the valid symbol/import names before we start resolving exports since these can be outdated.
self.invalidate()
sort_editables(self._nodes)

@abstractmethod
@commiter
def _parse_imports(self) -> None: ...

@noapidoc
@commiter
def remove_internal_edges(self) -> None:
Expand Down
10 changes: 1 addition & 9 deletions src/codegen/sdk/core/import_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,15 +428,7 @@ def my_function():
bool: True if the import is dynamic (within a control flow or scope block),
False if it's a top-level import.
"""
curr = self.ts_node

# always traverses upto the module level
while curr:
if curr.type in self.ctx.node_classes.dynamic_import_parent_types:
return True
curr = curr.parent

return False
return self.parent_of_types(self.ctx.node_classes.dynamic_import_parent_types) is not None

####################################################################################################################
# MANIPULATIONS
Expand Down
10 changes: 9 additions & 1 deletion src/codegen/sdk/core/interfaces/editable.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ def children_by_field_types(self, field_types: str | Iterable[str]) -> Generator
@reader
@noapidoc
def child_by_field_types(self, field_types: str | Iterable[str]) -> Expression[Self] | None:
"""Get child by field types."""
"""Get child by fiexld types."""
return next(self.children_by_field_types(field_types), None)

@property
Expand Down Expand Up @@ -1097,6 +1097,14 @@ def parent_of_type(self, type: type[T]) -> T | None:
return self.parent.parent_of_type(type)
return None

def parent_of_types(self, types: set[type[T]]) -> T | None:
"""Find the first ancestor of the node of the given type. Does not return itself"""
if self.parent and any(isinstance(self.parent, t) for t in types):
return self.parent
if self.parent is not self and self.parent is not None:
return self.parent.parent_of_types(types)
return None

@reader
def ancestors(self, type: type[T]) -> list[T]:
"""Find all ancestors of the node of the given type. Does not return itself"""
Expand Down
29 changes: 17 additions & 12 deletions src/codegen/sdk/core/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from codegen.sdk.core.expressions.placeholder_type import PlaceholderType
from codegen.sdk.core.expressions.value import Value
from codegen.sdk.core.statements.symbol_statement import SymbolStatement
from codegen.sdk.utils import find_first_function_descendant
from codegen.sdk.utils import find_first_function_descendant, find_import_node

if TYPE_CHECKING:
from tree_sitter import Node as TSNode
Expand Down Expand Up @@ -108,6 +108,7 @@ def parse_ts_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC
from codegen.sdk.typescript.statements.comment import TSComment
from codegen.sdk.typescript.statements.for_loop_statement import TSForLoopStatement
from codegen.sdk.typescript.statements.if_block_statement import TSIfBlockStatement
from codegen.sdk.typescript.statements.import_statement import TSImportStatement
from codegen.sdk.typescript.statements.labeled_statement import TSLabeledStatement
from codegen.sdk.typescript.statements.switch_statement import TSSwitchStatement
from codegen.sdk.typescript.statements.try_catch_statement import TSTryCatchStatement
Expand All @@ -117,11 +118,13 @@ def parse_ts_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC

if node.type in self.expressions or node.type == "expression_statement":
return [ExpressionStatement(node, file_node_id, ctx, parent, 0, expression_node=node)]

for child in node.named_children:
# =====[ Functions + Methods ]=====
if child.type in _VALID_TYPE_NAMES:
statements.append(SymbolStatement(child, file_node_id, ctx, parent, len(statements)))

elif child.type == "import_statement":
statements.append(TSImportStatement(child, file_node_id, ctx, parent, len(statements)))
# =====[ Classes ]=====
elif child.type in ("class_declaration", "abstract_class_declaration"):
statements.append(SymbolStatement(child, file_node_id, ctx, parent, len(statements)))
Expand All @@ -132,7 +135,10 @@ def parse_ts_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC

# =====[ Type Alias Declarations ]=====
elif child.type == "type_alias_declaration":
statements.append(SymbolStatement(child, file_node_id, ctx, parent, len(statements)))
if import_node := find_import_node(child):
statements.append(TSImportStatement(child, file_node_id, ctx, parent, len(statements), source_node=import_node))
else:
statements.append(SymbolStatement(child, file_node_id, ctx, parent, len(statements)))

# =====[ Enum Declarations ]=====
elif child.type == "enum_declaration":
Expand All @@ -142,11 +148,6 @@ def parse_ts_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC
elif child.type == "export_statement" or child.text.decode("utf-8") == "export *;":
statements.append(ExportStatement(child, file_node_id, ctx, parent, len(statements)))

# =====[ Imports ] =====
elif child.type == "import_statement":
# statements.append(TSImportStatement(child, file_node_id, ctx, parent, len(statements)))
pass # Temporarily opting to identify all imports using find_all_descendants

# =====[ Non-symbol statements ] =====
elif child.type == "comment":
statements.append(TSComment.from_code_block(child, parent, pos=len(statements)))
Expand All @@ -167,6 +168,8 @@ def parse_ts_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC
elif child.type in ["lexical_declaration", "variable_declaration"]:
if function_node := find_first_function_descendant(child):
statements.append(SymbolStatement(child, file_node_id, ctx, parent, len(statements), function_node))
elif import_node := find_import_node(child):
statements.append(TSImportStatement(child, file_node_id, ctx, parent, len(statements), source_node=import_node))
else:
statements.append(
TSAssignmentStatement.from_assignment(
Expand All @@ -176,6 +179,10 @@ def parse_ts_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC
elif child.type in ["public_field_definition", "property_signature", "enum_assignment"]:
statements.append(TSAttribute(child, file_node_id, ctx, parent, pos=len(statements)))
elif child.type == "expression_statement":
if import_node := find_import_node(child):
statements.append(TSImportStatement(child, file_node_id, ctx, parent, pos=len(statements), source_node=import_node))
continue

for var in child.named_children:
if var.type == "string":
statements.append(TSComment.from_code_block(var, parent, pos=len(statements)))
Expand All @@ -185,7 +192,6 @@ def parse_ts_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC
statements.append(ExpressionStatement(child, file_node_id, ctx, parent, pos=len(statements), expression_node=var))
elif child.type in self.expressions:
statements.append(ExpressionStatement(child, file_node_id, ctx, parent, len(statements), expression_node=child))

else:
self.log("Couldn't parse statement with type: %s", child.type)
statements.append(Statement.from_code_block(child, parent, pos=len(statements)))
Expand All @@ -204,6 +210,7 @@ def parse_py_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC
from codegen.sdk.python.statements.comment import PyComment
from codegen.sdk.python.statements.for_loop_statement import PyForLoopStatement
from codegen.sdk.python.statements.if_block_statement import PyIfBlockStatement
from codegen.sdk.python.statements.import_statement import PyImportStatement
from codegen.sdk.python.statements.match_statement import PyMatchStatement
from codegen.sdk.python.statements.pass_statement import PyPassStatement
from codegen.sdk.python.statements.try_catch_statement import PyTryCatchStatement
Expand Down Expand Up @@ -237,9 +244,7 @@ def parse_py_statements(self, node: TSNode, file_node_id: NodeId, ctx: CodebaseC

# =====[ Imports ] =====
elif child.type in ["import_statement", "import_from_statement", "future_import_statement"]:
# statements.append(PyImportStatement(child, file_node_id, ctx, parent, len(statements)))
pass # Temporarily opting to identify all imports using find_all_descendants

statements.append(PyImportStatement(child, file_node_id, ctx, parent, len(statements)))
# =====[ Non-symbol statements ] =====
elif child.type == "comment":
statements.append(PyComment.from_code_block(child, parent, pos=len(statements)))
Expand Down
1 change: 1 addition & 0 deletions src/codegen/sdk/extensions/utils.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def find_all_descendants(
type_names: Iterable[str] | str,
max_depth: int | None = None,
nested: bool = True,
stop_at_first: str | None = None,
) -> list[TSNode]: ...
def find_line_start_and_end_nodes(node: TSNode) -> list[tuple[TSNode, TSNode]]:
"""Returns a list of tuples of the start and end nodes of each line in the node"""
Expand Down
5 changes: 4 additions & 1 deletion src/codegen/sdk/extensions/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def get_all_identifiers(node: TSNode) -> list[TSNode]:
return sorted(dict.fromkeys(identifiers), key=lambda x: x.start_byte)


def find_all_descendants(node: TSNode, type_names: Iterable[str] | str, max_depth: int | None = None, nested: bool = True) -> list[TSNode]:
def find_all_descendants(node: TSNode, type_names: Iterable[str] | str, max_depth: int | None = None, nested: bool = True, stop_at_first: str | None = None) -> list[TSNode]:
if isinstance(type_names, str):
type_names = [type_names]
descendants = []
Expand All @@ -45,6 +45,9 @@ def find_all_descendants(node: TSNode, type_names: Iterable[str] | str, max_dept
if not nested and current_node != node:
return

if stop_at_first and current_node.type == stop_at_first:
return

for child in current_node.children:
traverse(child, depth + 1)

Expand Down
11 changes: 2 additions & 9 deletions src/codegen/sdk/python/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

from typing import TYPE_CHECKING

from codegen.sdk.core.autocommit import commiter, reader, writer
from codegen.sdk.core.autocommit import reader, writer
from codegen.sdk.core.file import SourceFile
from codegen.sdk.core.interface import Interface
from codegen.sdk.enums import ImportType
from codegen.sdk.extensions.utils import cached_property, iter_all_descendants
from codegen.sdk.extensions.utils import cached_property
from codegen.sdk.python import PyAssignment
from codegen.sdk.python.class_definition import PyClass
from codegen.sdk.python.detached_symbols.code_block import PyCodeBlock
Expand All @@ -15,7 +15,6 @@
from codegen.sdk.python.import_resolution import PyImport
from codegen.sdk.python.interfaces.has_block import PyHasBlock
from codegen.sdk.python.statements.attribute import PyAttribute
from codegen.sdk.python.statements.import_statement import PyImportStatement
from codegen.shared.decorators.docs import noapidoc, py_apidoc
from codegen.shared.enums.programming_language import ProgrammingLanguage

Expand Down Expand Up @@ -59,12 +58,6 @@ def symbol_can_be_added(self, symbol: PySymbol) -> bool:
"""
return True

@noapidoc
@commiter
def _parse_imports(self) -> None:
for import_node in iter_all_descendants(self.ts_node, frozenset({"import_statement", "import_from_statement", "future_import_statement"})):
PyImportStatement(import_node, self.node_id, self.ctx, self.code_block, 0)

####################################################################################################################
# GETTERS
####################################################################################################################
Expand Down
17 changes: 2 additions & 15 deletions src/codegen/sdk/typescript/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from typing import TYPE_CHECKING

from codegen.sdk.core.autocommit import commiter, mover, reader, writer
from codegen.sdk.core.autocommit import mover, reader, writer
from codegen.sdk.core.file import SourceFile
from codegen.sdk.core.interfaces.exportable import Exportable
from codegen.sdk.enums import ImportType, NodeType, SymbolType
Expand All @@ -18,8 +18,7 @@
from codegen.sdk.typescript.interface import TSInterface
from codegen.sdk.typescript.interfaces.has_block import TSHasBlock
from codegen.sdk.typescript.namespace import TSNamespace
from codegen.sdk.typescript.statements.import_statement import TSImportStatement
from codegen.sdk.utils import calculate_base_path, find_all_descendants
from codegen.sdk.utils import calculate_base_path
from codegen.shared.decorators.docs import noapidoc, ts_apidoc
from codegen.shared.enums.programming_language import ProgrammingLanguage

Expand Down Expand Up @@ -228,18 +227,6 @@ def add_export_to_symbol(self, symbol: TSSymbol) -> None:
# TODO: this should be in symbol.py class. Rename as `add_export`
symbol.add_keyword("export")

@noapidoc
@commiter
def _parse_imports(self) -> None:
import_nodes = find_all_descendants(self.ts_node, {"import_statement", "call_expression"})
for import_node in import_nodes:
if import_node.type == "import_statement":
TSImportStatement(import_node, self.node_id, self.ctx, self.code_block, 0)
elif import_node.type == "call_expression":
function = import_node.child_by_field_name("function")
if function.type == "import" or (function.type == "identifier" and function.text.decode("utf-8") == "require"):
TSImportStatement(import_node, self.node_id, self.ctx, self.code_block, 0)

@writer
def remove_unused_exports(self) -> None:
"""Removes unused exports from the file.
Expand Down
5 changes: 4 additions & 1 deletion src/codegen/sdk/typescript/import_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,10 @@ def from_dynamic_import_statement(cls, import_call_node: TSNode, module_node: TS
return imports

# If import statement is a variable declaration, capture the variable scoping keyword (const, let, var, etc)
statement_node = import_statement_node.parent if import_statement_node.type in ["variable_declarator", "assignment_expression"] else import_statement_node
if import_statement_node.type == "lexical_declaration":
statement_node = import_statement_node
else:
statement_node = import_statement_node.parent if import_statement_node.type in ["variable_declarator", "assignment_expression"] else import_statement_node

# ==== [ Named dynamic import ] ====
if name_node.type == "property_identifier":
Expand Down
Loading
Loading