Skip to content

Commit 78d081b

Browse files
committed
Introduce data schema to store raw tensors
ghstack-source-id: da15b0f Pull Request resolved: #6540
1 parent 3b25b05 commit 78d081b

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed

exir/schema_data.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# pyre-strict
8+
9+
from dataclasses import dataclass
10+
from typing import List, Optional
11+
12+
from executorch.exir.scalar_type import ScalarType
13+
14+
# Note: check executorch/schema/data.fbs for explanations of these fields.
15+
16+
17+
@dataclass
18+
class TensorMetadata:
19+
fully_qualified_name: str
20+
scalar_type: ScalarType
21+
dim_sizes: List[int]
22+
dim_order: List[bytes]
23+
24+
offset: int
25+
size: int
26+
27+
28+
@dataclass
29+
class TensorSegment:
30+
segment_index: int
31+
tensor_metadata: List[TensorMetadata]
32+
33+
34+
@dataclass
35+
class DataSegment:
36+
offset: int
37+
size: int
38+
39+
40+
@dataclass
41+
class Data:
42+
version: int
43+
tensor_alignment: int
44+
tensor_segments: List[TensorSegment]
45+
data_segments: List[DataSegment]

schema/data.fbs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
include "scalar_type.fbs";
2+
namespace executorch_flatbuffer;
3+
4+
// Update after BC breaking changes.
5+
file_identifier "DT01";
6+
file_extension "data";
7+
8+
table TensorMetadata {
9+
// The unique id used to connect the data and program.
10+
fully_qualified_name:string;
11+
scalar_type:ScalarType;
12+
dim_sizes:[int];
13+
dim_order:[ubyte];
14+
15+
// Tensor offsets are relative to each TensorSegment.
16+
// To retrieve a given tensor:
17+
// 1. segment_base_offset: from the file header.
18+
// 2. segment offset: segments[tensor_segments[i].segment_index].offset
19+
// This is likely to be 0 (all the tensors in one segment).
20+
// 3. tensor offset: tensor_segments[i].tensor_metadata[j].offset
21+
// May need to binary search over tensor_metadata to find the matching
22+
// tensor using fqn.
23+
offset: uint64;
24+
size: uint64;
25+
}
26+
27+
table TensorSegment {
28+
// Index of the segment in Data.segments.
29+
segment_index: uint;
30+
31+
// Tensor information, including the offset and size.
32+
tensor_metadata:[TensorMetadata];
33+
}
34+
35+
table DataSegment {
36+
// Segment offsets are relative to the segment base offset provided in
37+
// the extended file header. Segments will typically be aligned in a
38+
// way to make it possible to use mmap() to load them.
39+
offset: uint64;
40+
41+
// The size in bytes of valid data starting at the offset. The segment
42+
// data may be followed by padding before the segment that follows it,
43+
// to make it easier to use mmap().
44+
size: uint64;
45+
}
46+
47+
table Data {
48+
// Schema version.
49+
version:uint;
50+
51+
// Alignment for each tensor.
52+
tensor_alignment: uint32;
53+
54+
// Tensor information.
55+
tensor_segments:[TensorSegment];
56+
57+
// Data segments.
58+
segments:[DataSegment];
59+
60+
}
61+
62+
root_type Data;

0 commit comments

Comments
 (0)