Skip to content

Commit 7a7d17b

Browse files
koverstreetKent Overstreet
authored andcommitted
bcachefs: Whiteouts for snapshots
This patch adds KEY_TYPE_whiteout, a new type of whiteout for snapshots, when we're deleting and the key being deleted is in an ancestor snapshot - and updates the transaction update/commit path to use it. Signed-off-by: Kent Overstreet <[email protected]>
1 parent 8c6d298 commit 7a7d17b

File tree

4 files changed

+127
-18
lines changed

4 files changed

+127
-18
lines changed

fs/bcachefs/bcachefs_format.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ static inline void bkey_init(struct bkey *k)
327327
*/
328328
#define BCH_BKEY_TYPES() \
329329
x(deleted, 0) \
330-
x(discard, 1) \
330+
x(whiteout, 1) \
331331
x(error, 2) \
332332
x(cookie, 3) \
333333
x(hash_whiteout, 4) \
@@ -361,7 +361,7 @@ struct bch_deleted {
361361
struct bch_val v;
362362
};
363363

364-
struct bch_discard {
364+
struct bch_whiteout {
365365
struct bch_val v;
366366
};
367367

fs/bcachefs/bkey.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
6363
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted)
6464

6565
#define bkey_whiteout(_k) \
66-
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard)
66+
((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
6767

6868
enum bkey_lr_packed {
6969
BKEY_PACKED_BOTH,

fs/bcachefs/bkey_methods.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static const char *deleted_key_invalid(const struct bch_fs *c,
3131
.key_invalid = deleted_key_invalid, \
3232
}
3333

34-
#define bch2_bkey_ops_discard (struct bkey_ops) { \
34+
#define bch2_bkey_ops_whiteout (struct bkey_ops) { \
3535
.key_invalid = deleted_key_invalid, \
3636
}
3737

@@ -101,59 +101,71 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k)
101101

102102
static unsigned bch2_key_types_allowed[] = {
103103
[BKEY_TYPE_extents] =
104+
(1U << KEY_TYPE_deleted)|
105+
(1U << KEY_TYPE_whiteout)|
104106
(1U << KEY_TYPE_error)|
105107
(1U << KEY_TYPE_cookie)|
106108
(1U << KEY_TYPE_extent)|
107109
(1U << KEY_TYPE_reservation)|
108110
(1U << KEY_TYPE_reflink_p)|
109111
(1U << KEY_TYPE_inline_data),
110112
[BKEY_TYPE_inodes] =
113+
(1U << KEY_TYPE_deleted)|
114+
(1U << KEY_TYPE_whiteout)|
111115
(1U << KEY_TYPE_inode)|
112116
(1U << KEY_TYPE_inode_generation),
113117
[BKEY_TYPE_dirents] =
118+
(1U << KEY_TYPE_deleted)|
119+
(1U << KEY_TYPE_whiteout)|
114120
(1U << KEY_TYPE_hash_whiteout)|
115121
(1U << KEY_TYPE_dirent),
116122
[BKEY_TYPE_xattrs] =
123+
(1U << KEY_TYPE_deleted)|
124+
(1U << KEY_TYPE_whiteout)|
117125
(1U << KEY_TYPE_cookie)|
118126
(1U << KEY_TYPE_hash_whiteout)|
119127
(1U << KEY_TYPE_xattr),
120128
[BKEY_TYPE_alloc] =
129+
(1U << KEY_TYPE_deleted)|
121130
(1U << KEY_TYPE_alloc)|
122131
(1U << KEY_TYPE_alloc_v2),
123132
[BKEY_TYPE_quotas] =
133+
(1U << KEY_TYPE_deleted)|
124134
(1U << KEY_TYPE_quota),
125135
[BKEY_TYPE_stripes] =
136+
(1U << KEY_TYPE_deleted)|
126137
(1U << KEY_TYPE_stripe),
127138
[BKEY_TYPE_reflink] =
139+
(1U << KEY_TYPE_deleted)|
128140
(1U << KEY_TYPE_reflink_v)|
129141
(1U << KEY_TYPE_indirect_inline_data),
130142
[BKEY_TYPE_subvolumes] =
143+
(1U << KEY_TYPE_deleted)|
131144
(1U << KEY_TYPE_subvolume),
132145
[BKEY_TYPE_snapshots] =
146+
(1U << KEY_TYPE_deleted)|
133147
(1U << KEY_TYPE_snapshot),
134148
[BKEY_TYPE_btree] =
149+
(1U << KEY_TYPE_deleted)|
135150
(1U << KEY_TYPE_btree_ptr)|
136151
(1U << KEY_TYPE_btree_ptr_v2),
137152
};
138153

139154
const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
140155
enum btree_node_type type)
141156
{
142-
unsigned key_types_allowed = (1U << KEY_TYPE_deleted)|
143-
bch2_key_types_allowed[type] ;
144-
145157
if (k.k->u64s < BKEY_U64s)
146158
return "u64s too small";
147159

148-
if (!(key_types_allowed & (1U << k.k->type)))
160+
if (!(bch2_key_types_allowed[type] & (1U << k.k->type)))
149161
return "invalid key type for this btree";
150162

151163
if (type == BKEY_TYPE_btree &&
152164
bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
153165
return "value too big";
154166

155-
if (btree_node_type_is_extents(type)) {
156-
if ((k.k->size == 0) != bkey_deleted(k.k))
167+
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
168+
if (k.k->size == 0)
157169
return "bad size field";
158170

159171
if (k.k->size > k.k->p.offset)

fs/bcachefs/btree_update_leaf.c

Lines changed: 105 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,21 +1002,24 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
10021002
goto next;
10031003
}
10041004

1005-
if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k)))
1005+
if (!bkey_cmp(k.k->p, start))
10061006
goto next;
10071007

10081008
while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) {
1009+
bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0;
1010+
bool back_split = bkey_cmp(k.k->p, insert->k.p) > 0;
1011+
10091012
/*
10101013
* If we're going to be splitting a compressed extent, note it
10111014
* so that __bch2_trans_commit() can increase our disk
10121015
* reservation:
10131016
*/
1014-
if (bkey_cmp(bkey_start_pos(k.k), start) < 0 &&
1015-
bkey_cmp(k.k->p, insert->k.p) > 0 &&
1017+
if (((front_split && back_split) ||
1018+
((front_split || back_split) && k.k->p.snapshot != insert->k.p.snapshot)) &&
10161019
(compressed_sectors = bch2_bkey_sectors_compressed(k)))
10171020
trans->extra_journal_res += compressed_sectors;
10181021

1019-
if (bkey_cmp(bkey_start_pos(k.k), start) < 0) {
1022+
if (front_split) {
10201023
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
10211024
if ((ret = PTR_ERR_OR_ZERO(update)))
10221025
goto err;
@@ -1027,6 +1030,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
10271030

10281031
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
10291032
BTREE_ITER_NOT_EXTENTS|
1033+
BTREE_ITER_ALL_SNAPSHOTS|
1034+
BTREE_ITER_INTENT);
1035+
ret = bch2_btree_iter_traverse(&update_iter) ?:
1036+
bch2_trans_update(trans, &update_iter, update,
1037+
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
1038+
flags);
1039+
bch2_trans_iter_exit(trans, &update_iter);
1040+
1041+
if (ret)
1042+
goto err;
1043+
}
1044+
1045+
if (k.k->p.snapshot != insert->k.p.snapshot &&
1046+
(front_split || back_split)) {
1047+
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
1048+
if ((ret = PTR_ERR_OR_ZERO(update)))
1049+
goto err;
1050+
1051+
bkey_reassemble(update, k);
1052+
1053+
bch2_cut_front(start, update);
1054+
bch2_cut_back(insert->k.p, update);
1055+
1056+
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
1057+
BTREE_ITER_NOT_EXTENTS|
1058+
BTREE_ITER_ALL_SNAPSHOTS|
10301059
BTREE_ITER_INTENT);
10311060
ret = bch2_btree_iter_traverse(&update_iter) ?:
10321061
bch2_trans_update(trans, &update_iter, update,
@@ -1038,23 +1067,48 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
10381067
}
10391068

10401069
if (bkey_cmp(k.k->p, insert->k.p) <= 0) {
1041-
ret = bch2_btree_delete_at(trans, &iter, flags);
1070+
update = bch2_trans_kmalloc(trans, sizeof(*update));
1071+
if ((ret = PTR_ERR_OR_ZERO(update)))
1072+
goto err;
1073+
1074+
bkey_init(&update->k);
1075+
update->k.p = k.k->p;
1076+
1077+
if (insert->k.p.snapshot != k.k->p.snapshot) {
1078+
update->k.p.snapshot = insert->k.p.snapshot;
1079+
update->k.type = KEY_TYPE_whiteout;
1080+
}
1081+
1082+
bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p,
1083+
BTREE_ITER_NOT_EXTENTS|
1084+
BTREE_ITER_INTENT);
1085+
ret = bch2_btree_iter_traverse(&update_iter) ?:
1086+
bch2_trans_update(trans, &update_iter, update,
1087+
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
1088+
flags);
1089+
bch2_trans_iter_exit(trans, &update_iter);
1090+
10421091
if (ret)
10431092
goto err;
10441093
}
10451094

1046-
if (bkey_cmp(k.k->p, insert->k.p) > 0) {
1095+
if (back_split) {
10471096
update = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
10481097
if ((ret = PTR_ERR_OR_ZERO(update)))
10491098
goto err;
10501099

10511100
bkey_reassemble(update, k);
10521101
bch2_cut_front(insert->k.p, update);
10531102

1054-
ret = bch2_trans_update(trans, &iter, update, flags);
1103+
bch2_trans_copy_iter(&update_iter, &iter);
1104+
update_iter.pos = update->k.p;
1105+
ret = bch2_trans_update(trans, &update_iter, update,
1106+
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
1107+
flags);
1108+
bch2_trans_iter_exit(trans, &update_iter);
1109+
10551110
if (ret)
10561111
goto err;
1057-
10581112
goto out;
10591113
}
10601114
next:
@@ -1086,6 +1140,39 @@ static int bch2_trans_update_extent(struct btree_trans *trans,
10861140
return ret;
10871141
}
10881142

1143+
/*
1144+
* When deleting, check if we need to emit a whiteout (because we're overwriting
1145+
* something in an ancestor snapshot)
1146+
*/
1147+
static int need_whiteout_for_snapshot(struct btree_trans *trans,
1148+
enum btree_id btree_id, struct bpos pos)
1149+
{
1150+
struct btree_iter iter;
1151+
struct bkey_s_c k;
1152+
u32 snapshot = pos.snapshot;
1153+
int ret;
1154+
1155+
if (!bch2_snapshot_parent(trans->c, pos.snapshot))
1156+
return 0;
1157+
1158+
pos.snapshot++;
1159+
1160+
for_each_btree_key(trans, iter, btree_id, pos,
1161+
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
1162+
if (bkey_cmp(k.k->p, pos))
1163+
break;
1164+
1165+
if (bch2_snapshot_is_ancestor(trans->c, snapshot,
1166+
k.k->p.snapshot)) {
1167+
ret = !bkey_whiteout(k.k);
1168+
break;
1169+
}
1170+
}
1171+
bch2_trans_iter_exit(trans, &iter);
1172+
1173+
return ret;
1174+
}
1175+
10891176
int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
10901177
struct bkey_i *k, enum btree_update_flags flags)
10911178
{
@@ -1118,6 +1205,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
11181205
btree_insert_entry_cmp(i - 1, i) >= 0);
11191206
#endif
11201207

1208+
if (bkey_deleted(&n.k->k) &&
1209+
(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) {
1210+
int ret = need_whiteout_for_snapshot(trans, n.btree_id, n.k->k.p);
1211+
if (unlikely(ret < 0))
1212+
return ret;
1213+
1214+
if (ret)
1215+
n.k->k.type = KEY_TYPE_whiteout;
1216+
}
1217+
11211218
/*
11221219
* Pending updates are kept sorted: first, find position of new update,
11231220
* then delete/trim any updates the new update overwrites:

0 commit comments

Comments
 (0)