Skip to content
This repository has been archived by the owner on Mar 4, 2024. It is now read-only.

Commit

Permalink
membership: Ensure configuration rollback is possible.
Browse files Browse the repository at this point in the history
Signed-off-by: Mathieu Borderé <[email protected]>
  • Loading branch information
Mathieu Borderé committed Jan 5, 2023
1 parent 01b7a57 commit d070881
Show file tree
Hide file tree
Showing 5 changed files with 254 additions and 60 deletions.
27 changes: 15 additions & 12 deletions src/membership.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,22 +156,25 @@ int membershipRollback(struct raft *r)
assert(r->configuration_index != 0);

entry = logGet(r->log, r->configuration_index);

assert(entry != NULL);

/* Replace the current configuration with the last committed one. */
raft_configuration_close(&r->configuration);
raft_configuration_init(&r->configuration);

rv = configurationDecode(&entry->buf, &r->configuration);
if (rv != 0) {
return rv;
if (entry != NULL) {
/* Replace the current configuration with the last committed one. */
raft_configuration_close(&r->configuration);
raft_configuration_init(&r->configuration);

rv = configurationDecode(&entry->buf, &r->configuration);
if (rv != 0) {
return rv;
}
} else {
/* Configuration was truncated from log. */
rv = configurationRestorePrevious(r);
if (rv != 0) {
return rv;
}
}

configurationTrace(r, &r->configuration, "roll back config");

r->configuration_uncommitted_index = 0;

return 0;
}

Expand Down
43 changes: 40 additions & 3 deletions src/replication.c
Original file line number Diff line number Diff line change
Expand Up @@ -1237,10 +1237,17 @@ static void installSnapshotCb(struct raft_io_snapshot_put *req, int status)
goto discard;
}

tracef("restored snapshot with last index %llu", snapshot->index);
/* Enable configuration rollback if the next configuration after installing
* this snapshot needs to be rolled back. */
rv = configurationBackupCurrent(r);
if (rv != 0) {
/* Don't make this a hard fault, configuration rollback is a low
* probability event. */
tracef("failed to backup current configuration.");
}

tracef("restored snapshot with last index %llu", snapshot->index);
result.rejected = 0;

goto respond;

discard:
Expand Down Expand Up @@ -1468,6 +1475,7 @@ static void takeSnapshotCb(struct raft_io_snapshot_put *req, int status)
{
struct raft *r = req->data;
struct raft_snapshot *snapshot;
int rv;

r->snapshot.put.data = NULL;
snapshot = &r->snapshot.pending;
Expand All @@ -1478,6 +1486,16 @@ static void takeSnapshotCb(struct raft_io_snapshot_put *req, int status)
goto out;
}

/* Copy the (committed) configuration from the snapshot for rollback
* purposes, unless there's already a newer committed configuration. */
if (r->configuration_index == snapshot->configuration_index) {
rv = configurationBackupCurrent(r);
if (rv != 0) {
/* Don't make this a hard fault, configuration rollback is a low
* probability event. */
tracef("failed to backup last committed configuration.");
}
}
logSnapshot(r->log, snapshot->index, r->snapshot.trailing);
out:
takeSnapshotClose(r, snapshot);
Expand Down Expand Up @@ -1531,6 +1549,24 @@ static int takeSnapshotAsync(struct raft_io_async_work *take)
return r->fsm->snapshot_async(r->fsm, &snapshot->bufs, &snapshot->n_bufs);
}

static int copyLastCommittedConfiguration(const struct raft *r, struct raft_configuration *dst)
{
const struct raft_entry *entry;
int rv;

entry = logGet(r->log, r->configuration_index);
if (entry != NULL) {
tracef("entry != NULL index:%llu", r->configuration_index);
rv = configurationDecode(&entry->buf, dst);
} else {
tracef("entry == NULL index:%llu", r->configuration_index);
rv = configurationCopy(&r->configuration_previous, dst);
}

assert(dst->n != 0);
return rv;
}

static int takeSnapshot(struct raft *r)
{
struct raft_snapshot *snapshot;
Expand All @@ -1544,7 +1580,8 @@ static int takeSnapshot(struct raft *r)
snapshot->bufs = NULL;
snapshot->n_bufs = 0;

rv = configurationCopy(&r->configuration, &snapshot->configuration);
configurationInit(&snapshot->configuration);
rv = copyLastCommittedConfiguration(r, &snapshot->configuration);
if (rv != 0) {
goto abort;
}
Expand Down
1 change: 1 addition & 0 deletions src/snapshot.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ int snapshotRestore(struct raft *r, struct raft_snapshot *snapshot)
configurationClose(&r->configuration);
r->configuration = snapshot->configuration;
r->configuration_index = snapshot->configuration_index;
r->configuration_uncommitted_index = 0;
configurationTrace(r, &r->configuration, "configuration restore from snapshot");

r->commit_index = snapshot->index;
Expand Down
108 changes: 63 additions & 45 deletions src/start.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,60 +12,69 @@

#define tracef(...) Tracef(r->tracer, __VA_ARGS__)

/* Restore the most recent configuration. */
static int restoreMostRecentConfiguration(struct raft *r,
struct raft_entry *entry,
raft_index index)
/* Restore the most recent configurations. */
static int restoreConfigurations(struct raft *r, raft_index prev_index,
raft_index last_index, struct raft_entry *last)
{
struct raft_configuration configuration;
struct raft_configuration last_conf;
int rv;
raft_configuration_init(&configuration);
rv = configurationDecode(&entry->buf, &configuration);
if (rv != 0) {
raft_configuration_close(&configuration);
return rv;

/* No configuration entry loaded, nothing to do */
if (last == NULL) {
assert(prev_index == 0);
return 0;
} else {
/* There is a latest configuration, we can't know if it's
* committed or not. Backup the configuration restored from the snapshot
* or noop in case there was no snapshot. */
configurationBackupCurrent(r);
raft_configuration_init(&last_conf);
rv = configurationDecode(&last->buf, &last_conf);
if (rv != 0) {
raft_configuration_close(&last_conf);
return rv;
}
configurationClose(&r->configuration);
r->configuration = last_conf;
r->configuration_uncommitted_index = last_index;

/* If the last configuration is the first entry in the log, we know it's
* the bootstrap configuration and it's committed by default. */
if (last_index == 1) {
assert(prev_index == 0);
r->configuration_index = 1;
r->configuration_uncommitted_index = 0;
}

/* If there is a previous configuration it must have been committed as
* we don't allow multiple uncommitted configurations. */
if (prev_index != 0) {
r->configuration_index = prev_index;
}
}
configurationTrace(r, &configuration, "restore most recent configuration");
raft_configuration_close(&r->configuration);
r->configuration = configuration;
r->configuration_index = index;

configurationTrace(r, &r->configuration, "restore most recent configuration");
return 0;
}

/* Restore the entries that were loaded from persistent storage. The most recent
* configuration entry will be restored as well, if any.
*
* Note that we don't care whether the most recent configuration entry was
* actually committed or not. We don't allow more than one pending uncommitted
* configuration change at a time, plus
*
* when adding or removing just a single server, it is safe to switch directly
* to the new configuration.
*
* and
*
* The new configuration takes effect on each server as soon as it is added to
* that server's log: the C_new entry is replicated to the C_new servers, and
* a majority of the new configuration is used to determine the C_new entry's
* commitment. This means that servers do notwait for configuration entries to
* be committed, and each server always uses the latest configuration found in
* its log.
*
* as explained in section 4.1.
*
* TODO: we should probably set configuration_uncommitted_index as well, since we
* can't be sure a configuration change has been committed and we need to be
* ready to roll back to the last committed configuration.
*/
* Note that we cannot know if the last configuration in the log was committed
* or not, therefore we also need to track the second-to-last configuration
* entry. This second-to-last entry is committed by default as raft doesn't
* allow multipled uncommitted configuration entries and is used in case of
* configuration rollback scenarios. */
static int restoreEntries(struct raft *r,
raft_index snapshot_index,
raft_term snapshot_term,
raft_index start_index,
struct raft_entry *entries,
size_t n)
{
struct raft_entry *conf = NULL;
raft_index conf_index = 0;
struct raft_entry *last_conf = NULL;
raft_index last_conf_index = 0;
raft_index prev_conf_index = 0;
size_t i;
int rv;
logStart(r->log, snapshot_index, snapshot_term, start_index);
Expand All @@ -79,16 +88,17 @@ static int restoreEntries(struct raft *r,
}
r->last_stored++;
if (entry->type == RAFT_CHANGE) {
conf = entry;
conf_index = r->last_stored;
prev_conf_index = last_conf_index;
last_conf = entry;
last_conf_index = r->last_stored;
}
}
if (conf != NULL) {
rv = restoreMostRecentConfiguration(r, conf, conf_index);
if (rv != 0) {
goto err;
}

rv = restoreConfigurations(r, prev_conf_index, last_conf_index, last_conf);
if (rv != 0) {
goto err;
}

raft_free(entries);
return 0;

Expand Down Expand Up @@ -164,6 +174,14 @@ int raft_start(struct raft *r)
snapshot_index = snapshot->index;
snapshot_term = snapshot->term;
raft_free(snapshot);

/* Enable configuration rollback if the next configuration after installing
* this snapshot needs to be rolled back. */
rv = configurationBackupCurrent(r);
if (rv != 0) {
tracef("failed to backup current configuration.");
return rv;
}
} else if (n_entries > 0) {
/* If we don't have a snapshot and the on-disk log is not empty, then
* the first entry must be a configuration entry. */
Expand Down
Loading

0 comments on commit d070881

Please sign in to comment.