Skip to content

Commit

Permalink
bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices
Browse files Browse the repository at this point in the history
This factors out ec_strie_head_devs_update(), which initializes the
bitmap of devices we're allocating from, and runs it every time
c->rw_devs_change_count changes.

We also cancel pending, not allocated stripes, since they may refer to
devices that are no longer available.

Signed-off-by: Kent Overstreet <[email protected]>
  • Loading branch information
Kent Overstreet committed Sep 7, 2024
1 parent 3b673d5 commit cf391af
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 28 deletions.
81 changes: 53 additions & 28 deletions fs/bcachefs/ec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1568,10 +1568,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
}

static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s = h->s;

lockdep_assert_held(&h->lock);

BUG_ON(!s->allocated && !s->err);

h->s = NULL;
Expand All @@ -1584,6 +1586,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
ec_stripe_new_put(c, s, STRIPE_REF_io);
}

static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err)
{
h->s->err = err;
ec_stripe_new_set_pending(c, h);
}

void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
{
struct ec_stripe_new *s = ob->ec;
Expand Down Expand Up @@ -1707,27 +1715,12 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
return 0;
}

static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
unsigned algo, unsigned redundancy,
enum bch_watermark watermark)
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_head *h;

h = kzalloc(sizeof(*h), GFP_KERNEL);
if (!h)
return NULL;

mutex_init(&h->lock);
BUG_ON(!mutex_trylock(&h->lock));

h->disk_label = disk_label;
h->algo = algo;
h->redundancy = redundancy;
h->watermark = watermark;

rcu_read_lock();
h->devs = target_rw_devs(c, BCH_DATA_user, disk_label ? group_to_target(disk_label - 1) : 0);
h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
? group_to_target(h->disk_label - 1)
: 0);
unsigned nr_devs = dev_mask_nr(&h->devs);

for_each_member_device_rcu(c, ca, &h->devs)
Expand All @@ -1737,6 +1730,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,

h->blocksize = pick_blocksize(c, &h->devs);

h->nr_active_devs = 0;
for_each_member_device_rcu(c, ca, &h->devs)
if (ca->mi.bucket_size == h->blocksize)
h->nr_active_devs++;
Expand All @@ -1747,7 +1741,9 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
* If we only have redundancy + 1 devices, we're better off with just
* replication:
*/
if (h->nr_active_devs < h->redundancy + 2) {
h->insufficient_devs = h->nr_active_devs < h->redundancy + 2;

if (h->insufficient_devs) {
const char *err;

if (nr_devs < h->redundancy + 2)
Expand All @@ -1762,6 +1758,31 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
h->nr_active_devs, h->redundancy + 2, err);
}

if (h->s && !h->s->allocated)
ec_stripe_new_cancel(c, h, -EINTR);

h->rw_devs_change_count = c->rw_devs_change_count;
}

static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
unsigned algo, unsigned redundancy,
enum bch_watermark watermark)
{
struct ec_stripe_head *h;

h = kzalloc(sizeof(*h), GFP_KERNEL);
if (!h)
return NULL;

mutex_init(&h->lock);
BUG_ON(!mutex_trylock(&h->lock));

h->disk_label = disk_label;
h->algo = algo;
h->redundancy = redundancy;
h->watermark = watermark;

list_add(&h->list, &c->ec_stripe_head_list);
return h;
}
Expand All @@ -1772,7 +1793,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
h->s->allocated &&
bitmap_weight(h->s->blocks_allocated,
h->s->nr_data) == h->s->nr_data)
ec_stripe_set_pending(c, h);
ec_stripe_new_set_pending(c, h);

mutex_unlock(&h->lock);
}
Expand All @@ -1797,7 +1818,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,

if (test_bit(BCH_FS_going_ro, &c->flags)) {
h = ERR_PTR(-BCH_ERR_erofs_no_writes);
goto found;
goto err;
}

list_for_each_entry(h, &c->ec_stripe_head_list, list)
Expand All @@ -1806,18 +1827,23 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
h->redundancy == redundancy &&
h->watermark == watermark) {
ret = bch2_trans_mutex_lock(trans, &h->lock);
if (ret)
if (ret) {
h = ERR_PTR(ret);
goto err;
}
goto found;
}

h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
found:
if (!IS_ERR_OR_NULL(h) &&
h->nr_active_devs < h->redundancy + 2) {
if (h->rw_devs_change_count != c->rw_devs_change_count)
ec_stripe_head_devs_update(c, h);

if (h->insufficient_devs) {
mutex_unlock(&h->lock);
h = NULL;
}
err:
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
Expand Down Expand Up @@ -2267,8 +2293,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
}
goto unlock;
found:
h->s->err = -BCH_ERR_erofs_no_writes;
ec_stripe_set_pending(c, h);
ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
unlock:
mutex_unlock(&h->lock);
}
Expand Down
3 changes: 3 additions & 0 deletions fs/bcachefs/ec.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ struct ec_stripe_head {
unsigned algo;
unsigned redundancy;
enum bch_watermark watermark;
bool insufficient_devs;

unsigned long rw_devs_change_count;

u64 nr_created;

Expand Down

0 comments on commit cf391af

Please sign in to comment.