Skip to content

Commit

Permalink
bcachefs: Use a heap for handling overwrites in btree node scan
Browse files Browse the repository at this point in the history
Fix an O(n^2) issue when we find many overlapping (overwritten) btree
nodes - especially when one node overwrites many smaller nodes.

This was discovered to be an issue with the bcachefs
merge_torture_flakey test - if we had a large btree that was then
emptied, the number of difficult overwrites can be unbounded.

Cc: Kuan-Wei Chiu <[email protected]>
Signed-off-by: Kent Overstreet <[email protected]>
  • Loading branch information
Kent Overstreet committed Dec 8, 2024
1 parent 51d1eda commit f22a971
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 48 deletions.
133 changes: 86 additions & 47 deletions fs/bcachefs/btree_node_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "recovery_passes.h"

#include <linux/kthread.h>
#include <linux/min_heap.h>
#include <linux/sort.h>

struct find_btree_nodes_worker {
Expand All @@ -31,8 +32,6 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con

if (n->range_updated)
prt_str(out, " range updated");
if (n->overwritten)
prt_str(out, " overwritten");

for (unsigned i = 0; i < n->nr_ptrs; i++) {
prt_char(out, ' ');
Expand Down Expand Up @@ -140,6 +139,24 @@ static int found_btree_node_cmp_pos(const void *_l, const void *_r)
-found_btree_node_cmp_time(l, r);
}

static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg)
{
return found_btree_node_cmp_pos(l, r) < 0;
}

static inline void found_btree_node_swap(void *_l, void *_r, void *arg)
{
struct found_btree_node *l = _l;
struct found_btree_node *r = _r;

swap(*l, *r);
}

const struct min_heap_callbacks found_btree_node_heap_cbs = {
.less = found_btree_node_cmp_pos_less,
.swp = found_btree_node_swap,
};

static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
struct bio *bio, struct btree_node *bn, u64 offset)
{
Expand Down Expand Up @@ -295,55 +312,48 @@ static int read_btree_nodes(struct find_btree_nodes *f)
return f->ret ?: ret;
}

static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
static bool nodes_overlap(const struct found_btree_node *l,
const struct found_btree_node *r)
{
while (n + 1 < end &&
found_btree_node_cmp_pos(n, n + 1) > 0) {
swap(n[0], n[1]);
n++;
}
return (l->btree_id == r->btree_id &&
l->level == r->level &&
bpos_gt(l->max_key, r->min_key));
}

static int handle_overwrites(struct bch_fs *c,
struct found_btree_node *start,
struct found_btree_node *end)
struct found_btree_node *l,
found_btree_nodes *nodes_heap)
{
struct found_btree_node *n;
again:
for (n = start + 1;
n < end &&
n->btree_id == start->btree_id &&
n->level == start->level &&
bpos_lt(n->min_key, start->max_key);
n++) {
int cmp = found_btree_node_cmp_time(start, n);
struct found_btree_node *r;

while ((r = min_heap_peek(nodes_heap)) &&
nodes_overlap(l, r)) {
int cmp = found_btree_node_cmp_time(l, r);

if (cmp > 0) {
if (bpos_cmp(start->max_key, n->max_key) >= 0)
n->overwritten = true;
if (bpos_cmp(l->max_key, r->max_key) >= 0)
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
else {
n->range_updated = true;
n->min_key = bpos_successor(start->max_key);
n->range_updated = true;
bubble_up(n, end);
goto again;
r->range_updated = true;
r->min_key = bpos_successor(l->max_key);
r->range_updated = true;
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
}
} else if (cmp < 0) {
BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
BUG_ON(bpos_eq(l->min_key, r->min_key));

start->max_key = bpos_predecessor(n->min_key);
start->range_updated = true;
} else if (n->level) {
n->overwritten = true;
l->max_key = bpos_predecessor(r->min_key);
l->range_updated = true;
} else if (r->level) {
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
} else {
if (bpos_cmp(start->max_key, n->max_key) >= 0)
n->overwritten = true;
if (bpos_cmp(l->max_key, r->max_key) >= 0)
min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
else {
n->range_updated = true;
n->min_key = bpos_successor(start->max_key);
n->range_updated = true;
bubble_up(n, end);
goto again;
r->range_updated = true;
r->min_key = bpos_successor(l->max_key);
r->range_updated = true;
min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
}
}
}
Expand All @@ -355,6 +365,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
{
struct find_btree_nodes *f = &c->found_btree_nodes;
struct printbuf buf = PRINTBUF;
found_btree_nodes nodes_heap = {};
size_t dst;
int ret = 0;

Expand Down Expand Up @@ -409,29 +420,57 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
bch2_print_string_as_lines(KERN_INFO, buf.buf);
}

dst = 0;
darray_for_each(f->nodes, i) {
if (i->overwritten)
continue;
swap(nodes_heap, f->nodes);

{
/* darray must have same layout as a heap */
min_heap_char real_heap;
BUILD_BUG_ON(sizeof(nodes_heap.nr) != sizeof(real_heap.nr));
BUILD_BUG_ON(sizeof(nodes_heap.size) != sizeof(real_heap.size));
BUILD_BUG_ON(offsetof(found_btree_nodes, nr) != offsetof(min_heap_char, nr));
BUILD_BUG_ON(offsetof(found_btree_nodes, size) != offsetof(min_heap_char, size));
}

ret = handle_overwrites(c, i, &darray_top(f->nodes));
min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL);

if (nodes_heap.nr) {
ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
if (ret)
goto err;

BUG_ON(i->overwritten);
f->nodes.data[dst++] = *i;
min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
}
f->nodes.nr = dst;

if (c->opts.verbose) {
while (true) {
ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap);
if (ret)
goto err;

if (!nodes_heap.nr)
break;

ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
if (ret)
goto err;

min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
}

for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++)
BUG_ON(nodes_overlap(n, n + 1));

if (0 && c->opts.verbose) {
printbuf_reset(&buf);
prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
found_btree_nodes_to_text(&buf, c, f->nodes);
bch2_print_string_as_lines(KERN_INFO, buf.buf);
} else {
bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr);
}

eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
err:
darray_exit(&nodes_heap);
printbuf_exit(&buf);
return ret;
}
Expand Down
1 change: 0 additions & 1 deletion fs/bcachefs/btree_node_scan_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

struct found_btree_node {
bool range_updated:1;
bool overwritten:1;
u8 btree_id;
u8 level;
unsigned sectors_written;
Expand Down

0 comments on commit f22a971

Please sign in to comment.