Skip to content

[executorch] Add logs for helping debug address space overflow issue #5035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions exir/emit/_emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
TensorShapeDynamism,
)
from executorch.exir.tensor import (
AddressSpaceOverflowException,
layout_enum,
make_allocation_info,
make_tensor_value,
Expand Down Expand Up @@ -349,7 +350,20 @@ def _tensor_spec_to_evalue(self, spec: TensorSpec) -> EValue:
self.node,
f"Non-const tensor should be an activation tensor: mem_offset {spec.mem_offset}",
)
allocation_info = make_allocation_info(spec.mem_id, spec.mem_offset)
try:
allocation_info = make_allocation_info(spec.mem_id, spec.mem_offset)
except AddressSpaceOverflowException as e:
raise InternalError(
self._emit_node_specific_error(
self.node,
(
f"{e}\nHint: If you are using a memory pass based on dynamic shape bounds, "
f"such as ConstraintBasedSymShapeEvalPass, this may be the cause of an "
f"unbacked SymInt with its upper bound lazily set to 2^64-1 (uint64 max) "
"during torch.export()."
),
)
)

if spec.const:
# Tensor with a blob we need to serialize. May not actually be constant at runtime
Expand Down Expand Up @@ -1527,7 +1541,6 @@ def placeholder(
is_user_input = True

if isinstance(target, str) and isinstance(spec, TensorSpec):

fqn, is_mutable_buffer = self._find_fqn_for_placeholder(target, spec)

# From the fqn find the corresponding tensor
Expand Down
4 changes: 2 additions & 2 deletions exir/passes/sym_shape_eval_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ class HintBasedSymShapeEvalPass(PassBase):

Warning: if you're using torch.export with constrain API, this method doesn't respect the input constraints.

Not inherit from ExportPass since we simply need a way to iterate thru
Not inherited from ExportPass since we simply need a way to iterate thru
every node's output. PassBase is easier for that purpose.
"""

Expand Down Expand Up @@ -245,7 +245,7 @@ class ConstraintBasedSymShapeEvalPass(PassBase):
formula. We should convert those symbolic formula to concrete value for
static/upperbound tensors so we can properly do memory planning for them.

Not inherit from ExportPass since we simply need a way to iterate thru
Not inherited from ExportPass since we simply need a way to iterate through
every node's output. PassBase is easier for that purpose.
"""

Expand Down
8 changes: 7 additions & 1 deletion exir/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
from executorch.exir.sym_util import eval_shape


class AddressSpaceOverflowException(Exception):
pass


def num_bytes_from_shape_and_dtype(shape: torch.Size, dtype: torch.dtype) -> int:
"""
Assume the tensor is a contiguous one.
Expand Down Expand Up @@ -297,7 +301,9 @@ def make_allocation_info(mem_id: int, mem_offset: int) -> schema.AllocationDetai
memory_offset_low = mem_offset & ((1 << 32) - 1)
memory_offset_high = mem_offset >> 32
if memory_offset_high >= 1 << 32:
raise ValueError(f"mem_offset {mem_offset} does not fit in 64 bits")
raise AddressSpaceOverflowException(
f"mem_offset {mem_offset} does not fit in 64 bits"
)

allocation_info = schema.AllocationDetails(
memory_id=mem_id,
Expand Down
2 changes: 1 addition & 1 deletion exir/tests/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def test_allocation_info_fails(self) -> None:
)
for test_case in test_cases:
kwargs = test_case[0]
with self.assertRaisesRegex(ValueError, test_case[1], msg=f"{kwargs}"):
with self.assertRaisesRegex(Exception, test_case[1], msg=f"{kwargs}"):
make_allocation_info(**kwargs)

def test_contiguous_stride_from_shape(self) -> None:
Expand Down
Loading