Skip to content

Commit 2a9dc7a

Browse files
committed
erofs: introduce chunk-based file on-disk format
Currently, uncompressed data except for tail-packing inline is consecutive on disk. In order to support chunk-based data deduplication, add a new corresponding inode data layout. In the future, the data source of chunks can be either (un)compressed. Link: https://lore.kernel.org/r/[email protected] Reviewed-by: Liu Bo <[email protected]> Reviewed-by: Chao Yu <[email protected]> Signed-off-by: Gao Xiang <[email protected]>
1 parent eadcd6b commit 2a9dc7a

File tree

2 files changed

+59
-4
lines changed

2 files changed

+59
-4
lines changed

Documentation/filesystems/erofs.rst

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,14 @@ may not. All metadatas can be now observed in two different spaces (views):
156156

157157
Xattrs, extents, data inline are followed by the corresponding inode with
158158
proper alignment, and they could be optional for different data mappings.
159-
_currently_ total 4 valid data mappings are supported:
159+
_currently_ total 5 data layouts are supported:
160160

161161
== ====================================================================
162162
0 flat file data without data inline (no extent);
163163
1 fixed-sized output data compression (with non-compacted indexes);
164164
2 flat file data with tail packing data inline (no extent);
165-
3 fixed-sized output data compression (with compacted indexes, v5.3+).
165+
3 fixed-sized output data compression (with compacted indexes, v5.3+);
166+
4 chunk-based file (v5.15+).
166167
== ====================================================================
167168

168169
The size of the optional xattrs is indicated by i_xattr_count in inode
@@ -213,6 +214,17 @@ Note that apart from the offset of the first filename, nameoff0 also indicates
213214
the total number of directory entries in this block since it is no need to
214215
introduce another on-disk field at all.
215216

217+
Chunk-based file
218+
----------------
219+
In order to support chunk-based data deduplication, a new inode data layout has
220+
been supported since Linux v5.15: Files are split in equal-sized data chunks
221+
with ``extents`` area of the inode metadata indicating how to get the chunk
222+
data: these can be simply as a 4-byte block address array or in the 8-byte
223+
chunk index form (see struct erofs_inode_chunk_index in erofs_fs.h for more
224+
details.)
225+
226+
By the way, chunk-based files are all uncompressed for now.
227+
216228
Data compression
217229
----------------
218230
EROFS implements LZ4 fixed-sized output compression which generates fixed-sized

fs/erofs/erofs_fs.h

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*
55
* Copyright (C) 2017-2018 HUAWEI, Inc.
66
* https://www.huawei.com/
7+
* Copyright (C) 2021, Alibaba Cloud
78
*/
89
#ifndef __EROFS_FS_H
910
#define __EROFS_FS_H
@@ -19,10 +20,12 @@
1920
#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
2021
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
2122
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
23+
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
2224
#define EROFS_ALL_FEATURE_INCOMPAT \
2325
(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
2426
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
25-
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
27+
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
28+
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE)
2629

2730
#define EROFS_SB_EXTSLOT_SIZE 16
2831

@@ -64,13 +67,16 @@ struct erofs_super_block {
6467
* inode, [xattrs], last_inline_data, ... | ... | no-holed data
6568
* 3 - inode compression D:
6669
* inode, [xattrs], map_header, extents ... | ...
67-
* 4~7 - reserved
70+
* 4 - inode chunk-based E:
71+
* inode, [xattrs], chunk indexes ... | ...
72+
* 5~7 - reserved
6873
*/
6974
enum {
7075
EROFS_INODE_FLAT_PLAIN = 0,
7176
EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1,
7277
EROFS_INODE_FLAT_INLINE = 2,
7378
EROFS_INODE_FLAT_COMPRESSION = 3,
79+
EROFS_INODE_CHUNK_BASED = 4,
7480
EROFS_INODE_DATALAYOUT_MAX
7581
};
7682

@@ -90,6 +96,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
9096
#define EROFS_I_ALL \
9197
((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
9298

99+
/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */
100+
#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F
101+
/* with chunk indexes or just a 4-byte blkaddr array */
102+
#define EROFS_CHUNK_FORMAT_INDEXES 0x0020
103+
104+
#define EROFS_CHUNK_FORMAT_ALL \
105+
(EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
106+
107+
struct erofs_inode_chunk_info {
108+
__le16 format; /* chunk blkbits, etc. */
109+
__le16 reserved;
110+
};
111+
93112
/* 32-byte reduced form of an ondisk inode */
94113
struct erofs_inode_compact {
95114
__le16 i_format; /* inode format hints */
@@ -107,6 +126,9 @@ struct erofs_inode_compact {
107126

108127
/* for device files, used to indicate old/new device # */
109128
__le32 rdev;
129+
130+
/* for chunk-based files, it contains the summary info */
131+
struct erofs_inode_chunk_info c;
110132
} i_u;
111133
__le32 i_ino; /* only used for 32-bit stat compatibility */
112134
__le16 i_uid;
@@ -135,6 +157,9 @@ struct erofs_inode_extended {
135157

136158
/* for device files, used to indicate old/new device # */
137159
__le32 rdev;
160+
161+
/* for chunk-based files, it contains the summary info */
162+
struct erofs_inode_chunk_info c;
138163
} i_u;
139164

140165
/* only used for 32-bit stat compatibility */
@@ -204,6 +229,19 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
204229
e->e_name_len + le16_to_cpu(e->e_value_size));
205230
}
206231

232+
/* represent a zeroed chunk (hole) */
233+
#define EROFS_NULL_ADDR -1
234+
235+
/* 4-byte block address array */
236+
#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32)
237+
238+
/* 8-byte inode chunk indexes */
239+
struct erofs_inode_chunk_index {
240+
__le16 advise; /* always 0, don't care for now */
241+
__le16 device_id; /* back-end storage id, always 0 for now */
242+
__le32 blkaddr; /* start block address of this inode chunk */
243+
};
244+
207245
/* maximum supported size of a physical compression cluster */
208246
#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
209247

@@ -338,9 +376,14 @@ static inline void erofs_check_ondisk_layout_definitions(void)
338376
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
339377
BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
340378
BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
379+
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4);
380+
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8);
341381
BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
342382
BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
343383
BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
384+
/* keep in sync between 2 index structures for better extendibility */
385+
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
386+
sizeof(struct z_erofs_vle_decompressed_index));
344387

345388
BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
346389
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);

0 commit comments

Comments
 (0)