Skip to content

Commit 0570294

Browse files
lucylqfacebook-github-bot
authored andcommitted
Add cord data structure (#2273)
Summary: Pull Request resolved: #2273 Introduce cord data structure to store bytes/bytearrays during serialization. This allows us to manipulate bytes/bytearrays without copying data. bypass-github-export-checks Reviewed By: dbort Differential Revision: D54514244 fbshipit-source-id: 65397dcdea93054d54feea1b9f3ebfb0940c8513
1 parent 12fcfcf commit 0570294

File tree

4 files changed

+123
-0
lines changed

4 files changed

+123
-0
lines changed

exir/_serialize/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ runtime.python_library(
2929
name = "lib",
3030
srcs = [
3131
"__init__.py",
32+
"_cord.py",
3233
"_dataclass.py",
3334
"_flatbuffer.py",
3435
"_program.py",

exir/_serialize/_cord.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import io
8+
from typing import List, Optional, Union
9+
10+
11+
class Cord:
12+
"""A `bytes`-like sequence of bytes, stored non-contiguously.
13+
14+
Users can use a Cord to assemble large files and data blobs using references
15+
to and slices of other data, instead of copying and appending that data to a
16+
`bytes` or `bytearray` object.
17+
"""
18+
19+
def __init__(self, data: Optional[Union[bytes, "Cord"]] = None) -> None:
20+
"""Initialize Cord data structure."""
21+
self._buffers: List[bytes] = []
22+
self._byte_size: int = 0
23+
24+
if data is not None:
25+
self.append(data)
26+
27+
def __len__(self):
28+
"""Number of bytes in the Cord."""
29+
return self._byte_size
30+
31+
def __bytes__(self) -> bytes:
32+
"""Return the contents of the Cord as a single `bytes` object."""
33+
return b"".join(self._buffers)
34+
35+
def append(self, data: Union[bytes, "Cord"]) -> None:
36+
"""Append a bytes or Cord to the current Cord."""
37+
if isinstance(data, bytes):
38+
self._buffers.append(data)
39+
self._byte_size += len(data)
40+
elif isinstance(data, Cord):
41+
self._buffers.extend(data._buffers)
42+
self._byte_size += len(data)
43+
else:
44+
raise TypeError(f"Can only append bytes or Cords, received {type(data)}")
45+
46+
def write_to_file(self, outfile: io.BufferedIOBase) -> None:
47+
"""Write the Cord to a file."""
48+
for item in self._buffers:
49+
outfile.write(item)

exir/_serialize/test/TARGETS

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,13 @@ python_unittest(
2323
"//executorch/exir/_serialize:lib",
2424
],
2525
)
26+
27+
python_unittest(
28+
name = "cord",
29+
srcs = [
30+
"test_cord.py",
31+
],
32+
deps = [
33+
"//executorch/exir/_serialize:lib",
34+
],
35+
)

exir/_serialize/test/test_cord.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
8+
import io
9+
import unittest
10+
11+
from executorch.exir._serialize._cord import Cord
12+
13+
14+
class TestCord(unittest.TestCase):
15+
def test_cord_init(self) -> None:
16+
cord_empty = Cord()
17+
self.assertEqual(0, len(cord_empty))
18+
19+
cord = Cord(b"HelloWorld")
20+
self.assertEqual(10, len(cord))
21+
self.assertEqual(b"HelloWorld", bytes(cord))
22+
23+
cord2 = Cord(cord)
24+
self.assertEqual(10, len(cord2))
25+
self.assertEqual(b"HelloWorld", bytes(cord))
26+
27+
# Confirm no copies were made.
28+
self.assertEqual(id(cord._buffers[0]), id(cord2._buffers[0]))
29+
30+
def test_cord_append(self) -> None:
31+
cord = Cord()
32+
cord.append(b"Hello")
33+
self.assertEqual(5, len(cord))
34+
self.assertEqual(b"Hello", bytes(cord))
35+
36+
cord.append(b"World")
37+
self.assertEqual(10, len(cord))
38+
self.assertEqual(b"HelloWorld", bytes(cord))
39+
40+
def test_cord_append_cord(self) -> None:
41+
cord = Cord()
42+
cord.append(b"Hello")
43+
cord.append((b"World"))
44+
45+
cord2 = Cord()
46+
cord2.append(b"Prefix")
47+
cord2.append(cord)
48+
49+
self.assertEqual(16, len(cord2))
50+
self.assertEqual(b"PrefixHelloWorld", bytes(cord2))
51+
52+
# Confirm that no copies were made when appending a Cord.
53+
self.assertEqual(id(cord2._buffers[1]), id(cord._buffers[0]))
54+
self.assertEqual(id(cord2._buffers[2]), id(cord._buffers[1]))
55+
56+
def test_cord_write_to_file(self) -> None:
57+
cord = Cord()
58+
cord.append(b"Hello")
59+
cord.append(b"World")
60+
61+
outfile = io.BytesIO()
62+
cord.write_to_file(outfile)
63+
self.assertEqual(b"HelloWorld", outfile.getvalue())

0 commit comments

Comments
 (0)