Skip to content

Commit

Permalink
bcachefs: bcachefs_metadata_version_inode_depth
Browse files Browse the repository at this point in the history
This adds a new inode field, bi_depth, for directory inodes: this allows
us to make the check_directory_structure pass much more efficient.

Currently, to ensure the filesystem is fully connect and has no loops,
for every directory we follow backpointers until we find the root. But
by adding a depth counter, it sufficies to only check the parent of each
directory, and check that the parent's bi_depth is smaller.

(fsck doesn't require that bi_depth = parent->bi_depth + 1; if a rename
causes bi_depth off, but the chain to the root is still strictly
decreasing, then the algorithm still works and there's no need for fsck
to fixup the bi_depth fields).

We've already checked backpointers, so we know that every directory
(excluding the root)has a valid parent: if bi_depth is always
decreasing, every chain must terminate, and terminate at the root
directory.

bi_depth will not necessarily be correct when fsck runs, due to
directory renames - we can't change bi_depth on every child directory
when renaming a directory. That's ok; fsck will silently fix the
bi_depth field as needed, and future fsck runs will be much faster.

Signed-off-by: Kent Overstreet <[email protected]>
  • Loading branch information
Kent Overstreet committed Dec 4, 2024
1 parent 32f34f1 commit 4a32728
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 20 deletions.
3 changes: 2 additions & 1 deletion fs/bcachefs/bcachefs_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,8 @@ struct bch_sb_field_ext {
x(inode_has_child_snapshots, BCH_VERSION(1, 13)) \
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
x(reflink_p_may_update_opts, BCH_VERSION(1, 16))
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
x(inode_depth, BCH_VERSION(1, 17))

enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
Expand Down
13 changes: 13 additions & 0 deletions fs/bcachefs/fs-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ int bch2_create_trans(struct btree_trans *trans,
new_inode->bi_dir_offset = dir_offset;
}

if (S_ISDIR(mode) &&
!new_inode->bi_subvol)
new_inode->bi_depth = dir_u->bi_depth + 1;

inode_iter.flags &= ~BTREE_ITER_all_snapshots;
bch2_btree_iter_set_snapshot(&inode_iter, snapshot);

Expand Down Expand Up @@ -512,6 +516,15 @@ int bch2_rename_trans(struct btree_trans *trans,
dst_dir_u->bi_nlink++;
}

if (S_ISDIR(src_inode_u->bi_mode) &&
!src_inode_u->bi_subvol)
src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;

if (mode == BCH_RENAME_EXCHANGE &&
S_ISDIR(dst_inode_u->bi_mode) &&
!dst_inode_u->bi_subvol)
dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;

if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
dst_dir_u->bi_nlink--;
src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
Expand Down
91 changes: 73 additions & 18 deletions fs/bcachefs/fsck.c
Original file line number Diff line number Diff line change
Expand Up @@ -2791,6 +2791,48 @@ struct pathbuf_entry {

typedef DARRAY(struct pathbuf_entry) pathbuf;

static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
u32 new_depth)
{
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
SPOS(0, p->inum, p->snapshot), 0);

struct bch_inode_unpacked inode;
int ret = bkey_err(k) ?:
!bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode
: bch2_inode_unpack(k, &inode);
if (ret)
goto err;

if (inode.bi_depth != new_depth) {
inode.bi_depth = new_depth;
ret = __bch2_fsck_write_inode(trans, &inode) ?:
bch2_trans_commit(trans, NULL, NULL, 0);
}
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}

static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
{
u32 restart_count = trans->restart_count;
int ret = 0;

darray_for_each_reverse(*path, i) {
ret = nested_lockrestart_do(trans,
bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
bch_err_fn(trans->c, ret);
if (ret)
break;

new_bi_depth++;
}

return ret ?: trans_was_restarted(trans, restart_count);
}

static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
{
darray_for_each(*p, i)
Expand All @@ -2800,22 +2842,20 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
return false;
}

static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
struct bch_inode_unpacked inode;
pathbuf path = {};
struct printbuf buf = PRINTBUF;
u32 snapshot = inode_k.k->p.snapshot;
bool redo_bi_depth = false;
u32 min_bi_depth = U32_MAX;
int ret = 0;

p->nr = 0;

struct bch_inode_unpacked inode;
BUG_ON(bch2_inode_unpack(inode_k, &inode));

if (!S_ISDIR(inode.bi_mode))
return 0;

while (!inode.bi_subvol) {
struct btree_iter dirent_iter;
struct bkey_s_c_dirent d;
Expand All @@ -2839,7 +2879,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino

bch2_trans_iter_exit(trans, &dirent_iter);

ret = darray_push(p, ((struct pathbuf_entry) {
ret = darray_push(&path, ((struct pathbuf_entry) {
.inum = inode.bi_inum,
.snapshot = snapshot,
}));
Expand All @@ -2851,22 +2891,32 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
bch2_trans_iter_exit(trans, &inode_iter);
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
SPOS(0, inode.bi_dir, snapshot), 0);

struct bch_inode_unpacked parent_inode;
ret = bkey_err(inode_k) ?:
!bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
: bch2_inode_unpack(inode_k, &inode);
: bch2_inode_unpack(inode_k, &parent_inode);
if (ret) {
/* Should have been caught in dirents pass */
bch_err_msg(c, ret, "error looking up parent directory");
break;
}

min_bi_depth = parent_inode.bi_depth;

if (parent_inode.bi_depth < inode.bi_depth &&
min_bi_depth < U16_MAX)
break;

inode = parent_inode;
snapshot = inode_k.k->p.snapshot;
redo_bi_depth = true;

if (path_is_dup(p, inode.bi_inum, snapshot)) {
if (path_is_dup(&path, inode.bi_inum, snapshot)) {
/* XXX print path */
bch_err(c, "directory structure loop");

darray_for_each(*p, i)
darray_for_each(path, i)
pr_err("%llu:%u", i->inum, i->snapshot);
pr_err("%llu:%u", inode.bi_inum, snapshot);

Expand All @@ -2879,12 +2929,21 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
ret = reattach_inode(trans, &inode);
bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
}

redo_bi_depth = false;
break;
}
}

if (inode.bi_subvol)
min_bi_depth = 0;

if (redo_bi_depth)
ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
darray_exit(&path);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
Expand All @@ -2896,24 +2955,20 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
*/
int bch2_check_directory_structure(struct bch_fs *c)
{
pathbuf path = { 0, };
int ret;

ret = bch2_trans_run(c,
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
if (!bkey_is_inode(k.k))
if (!S_ISDIR(bkey_inode_mode(k)))
continue;

if (bch2_inode_flags(k) & BCH_INODE_unlinked)
continue;

check_path(trans, &path, k);
check_path_loop(trans, k);
})));
darray_exit(&path);

bch_err_fn(c, ret);
return ret;
Expand Down
14 changes: 14 additions & 0 deletions fs/bcachefs/inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,20 @@ static inline u32 bch2_inode_flags(struct bkey_s_c k)
}
}

static inline unsigned bkey_inode_mode(struct bkey_s_c k)
{
switch (k.k->type) {
case KEY_TYPE_inode:
return le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode);
case KEY_TYPE_inode_v2:
return le16_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_mode);
case KEY_TYPE_inode_v3:
return INODEv3_MODE(bkey_s_c_to_inode_v3(k).v);
default:
return 0;
}
}

/* i_nlink: */

static inline unsigned nlink_bias(umode_t mode)
Expand Down
3 changes: 2 additions & 1 deletion fs/bcachefs/inode_format.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ struct bch_inode_generation {
x(bi_dir_offset, 64) \
x(bi_subvol, 32) \
x(bi_parent_subvol, 32) \
x(bi_nocow, 8)
x(bi_nocow, 8) \
x(bi_depth, 32)

/* subset of BCH_INODE_FIELDS */
#define BCH_INODE_OPTS() \
Expand Down

0 comments on commit 4a32728

Please sign in to comment.