Skip to content

[CG-8871] fix: removing dead code removes code that should stay #355

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/codegen/sdk/core/dataclasses/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class UsageKind(IntEnum):
IMPORTED: Imported with an import statement.
IMPORTED_WILDCARD: Imported with a wildcard import statement.
DEFAULT_VALUE: Represents a default value in a function/method parameter.
ASSIGNMENT_SIBLING: Usage within the assigment where it is necessary for the assigment of another.
"""

SUBCLASS = auto()
Expand All @@ -90,3 +91,4 @@ class UsageKind(IntEnum):
IMPORTED = auto()
IMPORTED_WILDCARD = auto()
DEFAULT_VALUE = auto()
ASSIGNMENT_SIBLING = auto()
2 changes: 2 additions & 0 deletions src/codegen/sdk/core/import_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,8 @@ def names(self) -> Generator[tuple[str, Self | WildcardImport[Self]], None, None
imp.file.invalidate()

return
elif self.resolved_symbol is None:
self._resolving_wildcards = False
yield self.name, self

@property
Expand Down
60 changes: 59 additions & 1 deletion src/codegen/sdk/python/assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

from typing import TYPE_CHECKING

from codegen.sdk._proxy import proxy_property
from codegen.sdk.core.assignment import Assignment
from codegen.sdk.core.dataclasses.usage import Usage, UsageKind, UsageType
from codegen.sdk.core.expressions.multi_expression import MultiExpression
from codegen.sdk.extensions.autocommit import reader
from codegen.sdk.enums import EdgeType
from codegen.sdk.extensions.autocommit import commiter, reader
from codegen.sdk.python.symbol import PySymbol
from codegen.sdk.python.symbol_groups.comment_group import PyCommentGroup
from codegen.shared.decorators.docs import noapidoc, py_apidoc
Expand All @@ -13,6 +16,7 @@
from tree_sitter import Node as TSNode

from codegen.sdk.codebase.codebase_context import CodebaseContext
from codegen.sdk.core.interfaces.has_name import HasName
from codegen.sdk.core.node_id_factory import NodeId
from codegen.sdk.python.statements.assignment_statement import PyAssignmentStatement

Expand All @@ -33,8 +37,8 @@

left_node = ts_node.child_by_field_name("left")
right_node = ts_node.child_by_field_name("right")
assignments = cls._from_left_and_right_nodes(ts_node, file_node_id, ctx, parent, left_node, right_node)

Check failure on line 40 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 5 to "_from_left_and_right_nodes" of "Assignment" has incompatible type "Node | None"; expected "Node" [arg-type]

Check failure on line 40 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 6 to "_from_left_and_right_nodes" of "Assignment" has incompatible type "Node | None"; expected "Node" [arg-type]
return MultiExpression(ts_node, file_node_id, ctx, parent, assignments)

Check failure on line 41 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 5 to "MultiExpression" has incompatible type "list[Assignment[Any]]"; expected "list[PyAssignment]" [arg-type]

@classmethod
def from_named_expression(cls, ts_node: TSNode, file_node_id: NodeId, ctx: CodebaseContext, parent: PyAssignmentStatement) -> MultiExpression[PyAssignmentStatement, PyAssignment]:
Expand All @@ -60,8 +64,8 @@

left_node = ts_node.child_by_field_name("name")
right_node = ts_node.child_by_field_name("value")
assignments = cls._from_left_and_right_nodes(ts_node, file_node_id, ctx, parent, left_node, right_node)

Check failure on line 67 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 5 to "_from_left_and_right_nodes" of "Assignment" has incompatible type "Node | None"; expected "Node" [arg-type]

Check failure on line 67 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 6 to "_from_left_and_right_nodes" of "Assignment" has incompatible type "Node | None"; expected "Node" [arg-type]
return MultiExpression(ts_node, file_node_id, ctx, parent, assignments)

Check failure on line 68 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 5 to "MultiExpression" has incompatible type "list[Assignment[Any]]"; expected "list[PyAssignment]" [arg-type]

@property
@reader
Expand Down Expand Up @@ -96,3 +100,57 @@
"""
# HACK: This is a temporary solution until comments are fixed
return PyCommentGroup.from_symbol_inline_comments(self, self.ts_node.parent)

@noapidoc
@commiter
def _compute_dependencies(self, usage_type: UsageKind | None = None, dest: HasName | None = None) -> None:
super()._compute_dependencies(usage_type, dest)
if len(self.parent.assignments) >= 2:

Check failure on line 108 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: "Statement[CodeBlock[PyAssignmentStatement, Any]]" has no attribute "assignments" [attr-defined]
for assigment_sibling in self.parent.assignments:

Check failure on line 109 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: "Statement[CodeBlock[PyAssignmentStatement, Any]]" has no attribute "assignments" [attr-defined]
if assigment_sibling.name != self.name:
self.ctx.add_edge(
self.node_id,
assigment_sibling.node_id,
EdgeType.SYMBOL_USAGE,
usage=Usage(match=self.get_name(), kind=UsageKind.ASSIGNMENT_SIBLING, usage_type=UsageType.DIRECT, usage_symbol=self, imported_by=None),

Check failure on line 115 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument "match" to "Usage" has incompatible type "Name[Any] | ChainedAttribute[Any, Any, Any] | None"; expected "Name[Any] | ChainedAttribute[Any, Any, Any] | FunctionCall[Any]" [arg-type]
)

@proxy_property
@reader(cache=False)
def usages(self, usage_types: UsageType | None = None) -> list[Usage]:
"""Returns a list of usages of the PyAssignment object.

Retrieves all locations where the exportable object is used in the codebase. By default, returns all usages, such as imports or references within the same file.

Args:
usage_types (UsageType | None): Specifies which types of usages to include in the results. Default is any usages.

Returns:
list[Usage]: A sorted list of Usage objects representing where this exportable is used, ordered by source location in reverse.

Raises:
ValueError: If no usage types are specified or if only ALIASED and DIRECT types are specified together.

Note:
This method can be called as both a property or a method. If used as a property, it is equivalent to invoking it without arguments.
"""
if usage_types == UsageType.DIRECT | UsageType.ALIASED:
msg = "Combination of only Aliased and Direct usages makes no sense"
raise ValueError(msg)

assert self.node_id is not None
usages_to_return = []
in_edges = self.ctx.in_edges(self.node_id)
for edge in in_edges:
meta_data = edge[2]
if meta_data.type == EdgeType.SYMBOL_USAGE:
if usage := meta_data.usage:
if usage.kind == UsageKind.ASSIGNMENT_SIBLING:
sibling = self.ctx.get_node(edge[0])
for s_edge in self.ctx.in_edges(sibling.node_id):
if s_edge[2].usage.kind != UsageKind.ASSIGNMENT_SIBLING:

Check failure on line 151 in src/codegen/sdk/python/assignment.py

View workflow job for this annotation

GitHub Actions / mypy

error: Item "None" of "Usage | None" has no attribute "kind" [union-attr]
usages_to_return.append(usage)
break
elif usage_types is None or usage.usage_type in usage_types:
usages_to_return.append(usage)
return sorted(dict.fromkeys(usages_to_return), key=lambda x: x.match.ts_node.start_byte if x.match else x.usage_symbol.ts_node.start_byte, reverse=True)
25 changes: 25 additions & 0 deletions src/codegen/sdk/python/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,28 @@ def valid_import_names(self) -> dict[str, PySymbol | PyImport | WildcardImport[P
ret[file.name] = file
return ret
return super().valid_import_names

def get_node_from_wildcard_chain(self, symbol_name: str):
node = None
if node := self.get_node_by_name(symbol_name):
return node

if wildcard_imports := {imp for imp in self.imports if imp.is_wildcard_import()}:
for wildcard_import in wildcard_imports:
if imp_resolution := wildcard_import.resolve_import():
node = imp_resolution.from_file.get_node_from_wildcard_chain(symbol_name=symbol_name)

return node

def get_node_wildcard_resolves_for(self, symbol_name: str):
node = None
if node := self.get_node_by_name(symbol_name):
return node

if wildcard_imports := {imp for imp in self.imports if imp.is_wildcard_import()}:
for wildcard_import in wildcard_imports:
if imp_resolution := wildcard_import.resolve_import():
if imp_resolution.from_file.get_node_from_wildcard_chain(symbol_name=symbol_name):
node = wildcard_import

return node
45 changes: 42 additions & 3 deletions src/codegen/sdk/python/import_resolution.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Self, override

from codegen.sdk.core.autocommit import reader
from codegen.sdk.core.expressions import Name
from codegen.sdk.core.import_resolution import ExternalImportResolver, Import, ImportResolution
from codegen.sdk.enums import ImportType, NodeType
from codegen.sdk.extensions.resolution import ResolutionStack
from codegen.shared.decorators.docs import noapidoc, py_apidoc

if TYPE_CHECKING:
from collections.abc import Generator

from tree_sitter import Node as TSNode

from codegen.sdk.codebase.codebase_context import CodebaseContext
Expand All @@ -28,6 +31,10 @@
class PyImport(Import["PyFile"]):
"""Extends Import for Python codebases."""

def __init__(self, ts_node, file_node_id, G, parent, module_node, name_node, alias_node, import_type=ImportType.UNKNOWN):
super().__init__(ts_node, file_node_id, G, parent, module_node, name_node, alias_node, import_type)
self.requesting_names = set()

@reader
def is_module_import(self) -> bool:
"""Determines if the import is a module-level or wildcard import.
Expand Down Expand Up @@ -117,13 +124,13 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str |
filepath = module_source.replace(".", "/") + ".py"
filepath = os.path.join(base_path, filepath)
if file := self.ctx.get_file(filepath):
symbol = file.get_node_by_name(symbol_name)
symbol = file.get_node_wildcard_resolves_for(symbol_name)
return ImportResolution(from_file=file, symbol=symbol)

# =====[ Check if `module/__init__.py` file exists in the graph ]=====
filepath = filepath.replace(".py", "/__init__.py")
if from_file := self.ctx.get_file(filepath):
symbol = from_file.get_node_by_name(symbol_name)
symbol = from_file.get_node_wildcard_resolves_for(symbol_name)
return ImportResolution(from_file=from_file, symbol=symbol)

# =====[ Case: Can't resolve the import ]=====
Expand All @@ -133,6 +140,11 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str |
if base_path == "src":
# Try "test" next
return self.resolve_import(base_path="test", add_module_name=add_module_name)
if base_path == "test" and module_source:
# Try to resolve assuming package nested in repo
possible_package_base_path = module_source.split(".")[0]
if possible_package_base_path not in ("test", "src"):
return self.resolve_import(base_path=possible_package_base_path, add_module_name=add_module_name)

# if not G_override:
# for resolver in ctx.import_resolvers:
Expand Down Expand Up @@ -232,6 +244,26 @@ def from_future_import_statement(cls, import_statement: TSNode, file_node_id: No
imports.append(imp)
return imports

@reader
@noapidoc
@override
def _resolved_types(self) -> Generator[ResolutionStack[Self], None, None]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is the right approach. It will create duplicated edges. For example

def foo():
     ...
def bar():
     ...
from filea import *
foo() # will be a usage of foo and bar

For some context, we regular imports are resolved as follows.

  1. We add the import to valid_import_names
  2. When resolving, we check valid_symbol_names for the name
  3. We call _resolved_types on the import

Since it's impossible to know which symbol you're importing in a wildcard, wildcards follow the following pattern

  1. We check the imported file and get all valid_import_names (this is the same as valid_symbol_names in python) from that file
  2. For each imported symbol, we created a wilcardImport for that name and put it in the valid imports
  3. When resolving, we call _resolved_types on the wildcard import. We create an edge through the import but we point it at the specific imported symbol.

For example, in the above case, we'd call _resolved_types on the wildcard for foo. But it'll create edges from the import to foo and from the call site to foo

Copy link
Contributor Author

@tkfoss tkfoss Feb 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seemed to work as expected, I ve added additional UTs, have I missed anything?

"""Resolve the types used by this import."""
ix_seen = set()

aliased = self.is_aliased_import()
if imported := self._imported_symbol(resolve_exports=True):
if getattr(imported, "is_wildcard_import", False):
imported.set_requesting_names(self)
yield from self.with_resolution_frame(imported, direct=False, aliased=aliased)
else:
yield ResolutionStack(self, aliased=aliased)

if self.is_wildcard_import():
for name, wildcard_import in self.names:
if name in self.requesting_names:
yield from [frame.parent_frame for frame in wildcard_import.resolved_type_frames]

@property
@reader
def import_specifier(self) -> Editable:
Expand All @@ -254,6 +286,13 @@ def import_specifier(self) -> Editable:
if is_match:
return Name(import_specifier, self.file_node_id, self.ctx, self)

@noapidoc
def set_requesting_names(self, requester: PyImport):
if requester.is_wildcard_import():
self.requesting_names.update(requester.requesting_names)
else:
self.requesting_names.add(requester.name)

@reader
def get_import_string(
self,
Expand Down
49 changes: 49 additions & 0 deletions tests/unit/codegen/sdk/python/expressions/test_unpacking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import TYPE_CHECKING

from codegen.sdk.codebase.factory.get_session import get_codebase_session
from codegen.sdk.core.dataclasses.usage import UsageKind

if TYPE_CHECKING:
from codegen.sdk.core.symbol_groups.tuple import Tuple


def test_unpacking_tuple(tmpdir) -> None:
file = "test.py"
# language=python
content = """
_,symbol,_ = (a, b, c)
"""
with get_codebase_session(tmpdir=tmpdir, files={"test.py": content}) as codebase:
file = codebase.get_file(file)

symbol = file.get_symbol("symbol")

len(symbol.usages) == 0


def test_unpacking_tuple_with_use(tmpdir) -> None:
"""Tests that unused variables that are part of unpacking are in use"""
file = "test.py"
# language=python
content = """
foo,symbol,bar = (a, b, c)

test=symbol
"""
with get_codebase_session(tmpdir=tmpdir, files={"test.py": content}) as codebase:
file = codebase.get_file(file)

symbol = file.get_symbol("symbol")
foo = file.get_symbol("foo")
bar = file.get_symbol("bar")

symbol_list: Tuple = symbol.value

len(symbol.usages) == 1
symbol.usages[0].kind == UsageKind.BODY

len(foo.usages) == 1
foo.usages[0].kind == UsageKind.ASSIGNMENT_SIBLING

len(bar.usages) == 1
bar.usages[0].kind == UsageKind.ASSIGNMENT_SIBLING
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,78 @@ def func_1():
assert call_site.file == consumer_file


def test_import_resolution_init_wildcard(tmpdir: str) -> None:
"""Tests that named import from a file with wildcard resolves properly"""
# language=python
content1 = """TEST_CONST=2
foo=9
"""
content2 = """from testdir.test1 import *
bar=foo
test=TEST_CONST"""
content3 = """from testdir import TEST_CONST
test3=TEST_CONST"""
with get_codebase_session(tmpdir=tmpdir, files={"testdir/test1.py": content1, "testdir/__init__.py": content2, "test3.py": content3}) as codebase:
file1: SourceFile = codebase.get_file("testdir/test1.py")
file2: SourceFile = codebase.get_file("testdir/__init__.py")
file3: SourceFile = codebase.get_file("test3.py")

symb = file1.get_symbol("TEST_CONST")
test = file2.get_symbol("test")
test3 = file3.get_symbol("test3")
test3_import = file3.get_import("TEST_CONST")

assert len(symb.usages) == 3
assert symb.symbol_usages == [test, test3, test3_import]


def test_import_resolution_chaining_wildcards(tmpdir: str) -> None:
"""Tests that chaining wildcard imports resolves properly"""
# language=python
content1 = """TEST_CONST=2
foo=9
"""
content2 = """from testdir.test1 import *
bar=foo
test=TEST_CONST"""
content3 = """from testdir import *
test3=TEST_CONST"""
with get_codebase_session(tmpdir=tmpdir, files={"testdir/test1.py": content1, "testdir/__init__.py": content2, "test3.py": content3}) as codebase:
file1: SourceFile = codebase.get_file("testdir/test1.py")
file2: SourceFile = codebase.get_file("testdir/__init__.py")
file3: SourceFile = codebase.get_file("test3.py")

symb = file1.get_symbol("TEST_CONST")
test = file2.get_symbol("test")
bar = file2.get_symbol("bar")
mid_import = file2.get_import("testdir.test1")
test3 = file3.get_symbol("test3")

assert len(symb.usages) == 2
assert symb.symbol_usages == [test, test3]
assert mid_import.symbol_usages == [test, bar, test3]


def test_import_nested_installable_resolution(tmpdir: str) -> None:
"""Tests that a nested installable resolves internally instead of as external"""
# language=python
content1 = """
TEST_CONST=5
"""
content2 = """from test_pack.test import TEST_CONST
test=TEST_CONST"""
with get_codebase_session(tmpdir=tmpdir, files={"test_pack/test_pack/test.py": content1, "test1.py": content2}) as codebase:
file1: SourceFile = codebase.get_file("test_pack/test_pack/test.py")
file2: SourceFile = codebase.get_file("test1.py")

symb = file1.get_symbol("TEST_CONST")
test = file2.get_symbol("test")
test_import = file2.get_import("TEST_CONST")

assert len(symb.usages) == 2
assert symb.symbol_usages == [test, test_import]


def test_import_resolution_circular(tmpdir: str) -> None:
"""Tests function.usages returns usages from file imports"""
# language=python
Expand Down
Loading