Skip to content

Commit c075ff7

Browse files
koverstreetKent Overstreet
authored andcommitted
bcachefs: BTREE_ITER_FILTER_SNAPSHOTS
For snapshots, we need to implement btree lookups that return the first key that's an ancestor of the snapshot ID the lookup is being done in - and filter out keys in unrelated snapshots. This patch adds the btree iterator flag BTREE_ITER_FILTER_SNAPSHOTS which does that filtering. Signed-off-by: Kent Overstreet <[email protected]>
1 parent 284ae18 commit c075ff7

File tree

4 files changed

+166
-15
lines changed

4 files changed

+166
-15
lines changed

fs/bcachefs/btree_iter.c

Lines changed: 154 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "extents.h"
1414
#include "journal.h"
1515
#include "replicas.h"
16+
#include "subvolume.h"
1617
#include "trace.h"
1718

1819
#include <linux/prefetch.h>
@@ -683,6 +684,55 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
683684
bkey_cmp(iter->pos, iter->k.p) > 0);
684685
}
685686

687+
static int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k)
688+
{
689+
struct btree_trans *trans = iter->trans;
690+
struct btree_iter copy;
691+
struct bkey_s_c prev;
692+
int ret = 0;
693+
694+
if (!bch2_debug_check_iterators)
695+
return 0;
696+
697+
if (!(iter->flags & BTREE_ITER_FILTER_SNAPSHOTS))
698+
return 0;
699+
700+
if (bkey_err(k) || !k.k)
701+
return 0;
702+
703+
BUG_ON(!bch2_snapshot_is_ancestor(trans->c,
704+
iter->snapshot,
705+
k.k->p.snapshot));
706+
707+
bch2_trans_iter_init(trans, &copy, iter->btree_id, iter->pos,
708+
BTREE_ITER_ALL_SNAPSHOTS);
709+
prev = bch2_btree_iter_prev(&copy);
710+
if (!prev.k)
711+
goto out;
712+
713+
ret = bkey_err(prev);
714+
if (ret)
715+
goto out;
716+
717+
if (!bkey_cmp(prev.k->p, k.k->p) &&
718+
bch2_snapshot_is_ancestor(trans->c, iter->snapshot,
719+
prev.k->p.snapshot) > 0) {
720+
char buf1[100], buf2[200];
721+
722+
bch2_bkey_to_text(&PBUF(buf1), k.k);
723+
bch2_bkey_to_text(&PBUF(buf2), prev.k);
724+
725+
panic("iter snap %u\n"
726+
"k %s\n"
727+
"prev %s\n",
728+
iter->snapshot,
729+
buf1, buf2);
730+
}
731+
out:
732+
bch2_trans_iter_exit(trans, &copy);
733+
return ret;
734+
}
735+
686736
#else
687737

688738
static inline void bch2_btree_path_verify_level(struct btree_trans *trans,
@@ -691,6 +741,7 @@ static inline void bch2_btree_path_verify(struct btree_trans *trans,
691741
struct btree_path *path) {}
692742
static inline void bch2_btree_iter_verify(struct btree_iter *iter) {}
693743
static inline void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) {}
744+
static inline int bch2_btree_iter_verify_ret(struct btree_iter *iter, struct bkey_s_c k) { return 0; }
694745

695746
#endif
696747

@@ -2004,11 +2055,25 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
20042055
}
20052056

20062057
if (likely(k.k)) {
2007-
if (likely(!bkey_deleted(k.k)))
2008-
break;
2058+
/*
2059+
* We can never have a key in a leaf node at POS_MAX, so
2060+
* we don't have to check these successor() calls:
2061+
*/
2062+
if ((iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) &&
2063+
!bch2_snapshot_is_ancestor(trans->c,
2064+
iter->snapshot,
2065+
k.k->p.snapshot)) {
2066+
search_key = bpos_successor(k.k->p);
2067+
continue;
2068+
}
20092069

2010-
/* Advance to next key: */
2011-
search_key = bkey_successor(iter, k.k->p);
2070+
if (bkey_whiteout(k.k) &&
2071+
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2072+
search_key = bkey_successor(iter, k.k->p);
2073+
continue;
2074+
}
2075+
2076+
break;
20122077
} else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
20132078
/* Advance to next leaf node: */
20142079
search_key = bpos_successor(iter->path->l[0].b->key.k.p);
@@ -2029,6 +2094,9 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
20292094
else if (bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
20302095
iter->pos = bkey_start_pos(k.k);
20312096

2097+
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2098+
iter->pos.snapshot = iter->snapshot;
2099+
20322100
cmp = bpos_cmp(k.k->p, iter->path->pos);
20332101
if (cmp) {
20342102
iter->path = bch2_btree_path_make_mut(trans, iter->path,
@@ -2041,6 +2109,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
20412109

20422110
bch2_btree_iter_verify_entry_exit(iter);
20432111
bch2_btree_iter_verify(iter);
2112+
ret = bch2_btree_iter_verify_ret(iter, k);
2113+
if (unlikely(ret))
2114+
return bkey_s_c_err(ret);
2115+
20442116
return k;
20452117
}
20462118

@@ -2064,14 +2136,20 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
20642136
{
20652137
struct btree_trans *trans = iter->trans;
20662138
struct bpos search_key = iter->pos;
2139+
struct btree_path *saved_path = NULL;
20672140
struct bkey_s_c k;
2141+
struct bkey saved_k;
2142+
const struct bch_val *saved_v;
20682143
int ret;
20692144

20702145
EBUG_ON(iter->path->cached || iter->path->level);
20712146
EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
20722147
bch2_btree_iter_verify(iter);
20732148
bch2_btree_iter_verify_entry_exit(iter);
20742149

2150+
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2151+
search_key.snapshot = U32_MAX;
2152+
20752153
while (1) {
20762154
iter->path = btree_path_set_pos(trans, iter->path, search_key,
20772155
iter->flags & BTREE_ITER_INTENT);
@@ -2088,12 +2166,55 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
20882166
&iter->path->l[0], &iter->k);
20892167
if (!k.k ||
20902168
((iter->flags & BTREE_ITER_IS_EXTENTS)
2091-
? bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0
2092-
: bkey_cmp(k.k->p, iter->pos) > 0))
2169+
? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
2170+
: bpos_cmp(k.k->p, search_key) > 0))
20932171
k = btree_path_level_prev(trans, iter->path,
20942172
&iter->path->l[0], &iter->k);
20952173

20962174
if (likely(k.k)) {
2175+
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS) {
2176+
if (k.k->p.snapshot == iter->snapshot)
2177+
goto got_key;
2178+
2179+
/*
2180+
* If we have a saved candidate, and we're no
2181+
* longer at the same _key_ (not pos), return
2182+
* that candidate
2183+
*/
2184+
if (saved_path && bkey_cmp(k.k->p, saved_k.p)) {
2185+
bch2_path_put(trans, iter->path,
2186+
iter->flags & BTREE_ITER_INTENT);
2187+
iter->path = saved_path;
2188+
saved_path = NULL;
2189+
iter->k = saved_k;
2190+
k.v = saved_v;
2191+
goto got_key;
2192+
}
2193+
2194+
if (bch2_snapshot_is_ancestor(iter->trans->c,
2195+
iter->snapshot,
2196+
k.k->p.snapshot)) {
2197+
if (saved_path)
2198+
bch2_path_put(trans, saved_path,
2199+
iter->flags & BTREE_ITER_INTENT);
2200+
saved_path = btree_path_clone(trans, iter->path,
2201+
iter->flags & BTREE_ITER_INTENT);
2202+
saved_k = *k.k;
2203+
saved_v = k.v;
2204+
}
2205+
2206+
search_key = bpos_predecessor(k.k->p);
2207+
continue;
2208+
}
2209+
got_key:
2210+
if (bkey_whiteout(k.k) &&
2211+
!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) {
2212+
search_key = bkey_predecessor(iter, k.k->p);
2213+
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2214+
search_key.snapshot = U32_MAX;
2215+
continue;
2216+
}
2217+
20972218
break;
20982219
} else if (likely(bpos_cmp(iter->path->l[0].b->data->min_key, POS_MIN))) {
20992220
/* Advance to previous leaf node: */
@@ -2111,7 +2232,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
21112232
/* Extents can straddle iter->pos: */
21122233
if (bkey_cmp(k.k->p, iter->pos) < 0)
21132234
iter->pos = k.k->p;
2235+
2236+
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
2237+
iter->pos.snapshot = iter->snapshot;
21142238
out:
2239+
if (saved_path)
2240+
bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
21152241
iter->path->should_be_locked = true;
21162242

21172243
bch2_btree_iter_verify_entry_exit(iter);
@@ -2160,7 +2286,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
21602286
if (unlikely(ret))
21612287
return bkey_s_c_err(ret);
21622288

2163-
if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) {
2289+
if ((iter->flags & BTREE_ITER_CACHED) ||
2290+
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
21642291
struct bkey_i *next_update;
21652292

21662293
next_update = btree_trans_peek_updates(iter);
@@ -2209,22 +2336,28 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
22092336
if (bkey_cmp(iter->pos, next) < 0) {
22102337
bkey_init(&iter->k);
22112338
iter->k.p = iter->pos;
2212-
bch2_key_resize(&iter->k,
2213-
min_t(u64, KEY_SIZE_MAX,
2214-
(next.inode == iter->pos.inode
2215-
? next.offset
2216-
: KEY_OFFSET_MAX) -
2217-
iter->pos.offset));
2339+
2340+
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
2341+
bch2_key_resize(&iter->k,
2342+
min_t(u64, KEY_SIZE_MAX,
2343+
(next.inode == iter->pos.inode
2344+
? next.offset
2345+
: KEY_OFFSET_MAX) -
2346+
iter->pos.offset));
2347+
EBUG_ON(!iter->k.size);
2348+
}
22182349

22192350
k = (struct bkey_s_c) { &iter->k, NULL };
2220-
EBUG_ON(!k.k->size);
22212351
}
22222352
}
22232353

22242354
iter->path->should_be_locked = true;
22252355

22262356
bch2_btree_iter_verify_entry_exit(iter);
22272357
bch2_btree_iter_verify(iter);
2358+
ret = bch2_btree_iter_verify_ret(iter, k);
2359+
if (unlikely(ret))
2360+
return bkey_s_c_err(ret);
22282361

22292362
return k;
22302363
}
@@ -2392,6 +2525,13 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
23922525
if (!btree_type_has_snapshots(btree_id) &&
23932526
!(flags & __BTREE_ITER_ALL_SNAPSHOTS))
23942527
flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
2528+
#if 0
2529+
/* let's have this be explicitly set: */
2530+
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
2531+
btree_type_has_snapshots(btree_id) &&
2532+
!(flags & BTREE_ITER_ALL_SNAPSHOTS))
2533+
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
2534+
#endif
23952535

23962536
if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
23972537
pos.snapshot = btree_type_has_snapshots(btree_id)

fs/bcachefs/btree_iter.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,15 @@ static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *it
260260
iter->pos = bkey_start_pos(&iter->k);
261261
}
262262

263+
static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
264+
{
265+
struct bpos pos = iter->pos;
266+
267+
iter->snapshot = snapshot;
268+
pos.snapshot = snapshot;
269+
bch2_btree_iter_set_pos(iter, pos);
270+
}
271+
263272
/*
264273
* Unlocks before scheduling
265274
* Note: does not revalidate iterator

fs/bcachefs/btree_key_cache.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
372372

373373
bch2_trans_iter_init(trans, &b_iter, key.btree_id, key.pos,
374374
BTREE_ITER_SLOTS|
375-
BTREE_ITER_INTENT);
375+
BTREE_ITER_INTENT|
376+
BTREE_ITER_ALL_SNAPSHOTS);
376377
bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
377378
BTREE_ITER_CACHED|
378379
BTREE_ITER_CACHED_NOFILL|

fs/bcachefs/btree_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ struct btree_node_iter {
209209
#define BTREE_ITER_WITH_UPDATES (1 << 10)
210210
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
211211
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
212+
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13)
212213

213214
enum btree_path_uptodate {
214215
BTREE_ITER_UPTODATE = 0,

0 commit comments

Comments
 (0)