Skip to content
This repository has been archived by the owner on Mar 4, 2024. It is now read-only.

Config rollback #354

Merged
merged 5 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions include/raft.h
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,8 @@ struct raft
* 1. #configuration_index and #configuration_uncommitted_index are both
* zero. This should only happen when a brand new server starts joining a
* cluster and is waiting to receive log entries from the current
* leader. In this case #configuration must be empty and have no servers.
* leader. In this case #configuration and #configuration_previous
* must be empty and have no servers.
*
* 2. #configuration_index is non-zero while #configuration_uncommitted_index
* is zero. In this case the content of #configuration must match the one
Expand All @@ -645,11 +646,12 @@ struct raft
* the content of #configuration must match the one of the log entry at
* #configuration_uncommitted_index.
*
* TODO previous_configuration will always contain a copy of the previous
* configuration, if any, and is used in configuration rollback scenarios.
* 4. In case the previous - committed - configuration can no longer be found
* in the log e.g. after truncating the log when taking or installing a
* snapshot, `configuration_previous` will contain a copy of it.
*/
struct raft_configuration configuration;
struct raft_configuration configuration_previous; //currently not used.
struct raft_configuration configuration_previous;
raft_index configuration_index;
raft_index configuration_uncommitted_index;

Expand Down
40 changes: 40 additions & 0 deletions src/configuration.c
Original file line number Diff line number Diff line change
Expand Up @@ -349,4 +349,44 @@ void configurationTrace(const struct raft *r, struct raft_configuration *c, cons
}
tracef("=== CONFIG END ===");
}

int configurationBackup(struct raft *r, struct raft_configuration *src)
{
int rv;
struct raft_configuration dst;

/* Copy the configuration to an intermediate configuration because the copy
* can fail and we don't want to be left without the previous configuration. */
configurationInit(&dst);
rv = configurationCopy(src, &dst);
if (rv != 0) {
return rv;
}
configurationClose(&r->configuration_previous);
r->configuration_previous = dst;
return 0;
}

int configurationRestorePrevious(struct raft *r)
{
int rv;
struct raft_configuration prev;

/* There should always be a previous config. */
assert(r->configuration_previous.n != 0);
if (r->configuration_previous.n == 0) {
return RAFT_INVALID;
}

/* Copy the configuration to an intermediate configuration because the copy
* can fail and we don't want to be left without a configuration. */
configurationInit(&prev);
rv = configurationCopy(&r->configuration_previous, &prev);
if (rv != 0) {
return rv;
}
configurationClose(&r->configuration);
r->configuration = prev;
return 0;
}
#undef tracef
7 changes: 7 additions & 0 deletions src/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,11 @@ int configurationDecode(const struct raft_buffer *buf,

/* Output the configuration to the raft tracer */
void configurationTrace(const struct raft *r, struct raft_configuration *c, const char *msg);

/* Replaces the previous configuration with a copy of the configuration */
int configurationBackup(struct raft *r, struct raft_configuration *src);

/* Replaces the current configuration with a copy of the previous configuration. */
int configurationRestorePrevious(struct raft *r);

#endif /* CONFIGURATION_H_ */
27 changes: 15 additions & 12 deletions src/membership.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,23 +155,26 @@ int membershipRollback(struct raft *r)
/* Fetch the last committed configuration entry. */
assert(r->configuration_index != 0);

entry = logGet(r->log, r->configuration_index);

assert(entry != NULL);

/* Replace the current configuration with the last committed one. */
raft_configuration_close(&r->configuration);
raft_configuration_init(&r->configuration);

rv = configurationDecode(&entry->buf, &r->configuration);
if (rv != 0) {
return rv;
entry = logGet(r->log, r->configuration_index);
if (entry != NULL) {
raft_configuration_close(&r->configuration);
raft_configuration_init(&r->configuration);

rv = configurationDecode(&entry->buf, &r->configuration);
if (rv != 0) {
return rv;
}
} else {
/* Configuration was truncated from log. */
rv = configurationRestorePrevious(r);
if (rv != 0) {
return rv;
}
}

configurationTrace(r, &r->configuration, "roll back config");

r->configuration_uncommitted_index = 0;

return 0;
}

Expand Down
2 changes: 2 additions & 0 deletions src/raft.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ int raft_init(struct raft *r,
}

raft_configuration_init(&r->configuration);
raft_configuration_init(&r->configuration_previous);
r->configuration_index = 0;
r->configuration_uncommitted_index = 0;
r->election_timeout = DEFAULT_ELECTION_TIMEOUT;
Expand Down Expand Up @@ -110,6 +111,7 @@ static void ioCloseCb(struct raft_io *io)
raft_free(r->address);
logClose(r->log);
raft_configuration_close(&r->configuration);
raft_configuration_close(&r->configuration_previous);
if (r->close_cb != NULL) {
r->close_cb(r);
}
Expand Down
47 changes: 44 additions & 3 deletions src/replication.c
Original file line number Diff line number Diff line change
Expand Up @@ -1240,10 +1240,17 @@ static void installSnapshotCb(struct raft_io_snapshot_put *req, int status)
goto discard;
}

tracef("restored snapshot with last index %llu", snapshot->index);
/* Enable configuration rollback if the next configuration after installing
* this snapshot needs to be rolled back. */
rv = configurationBackup(r, &r->configuration);
if (rv != 0) {
/* Don't make this a hard fault, configuration rollback is a low
* probability event. */
tracef("failed to backup current configuration.");
}

tracef("restored snapshot with last index %llu", snapshot->index);
result.rejected = 0;

goto respond;

discard:
Expand Down Expand Up @@ -1468,6 +1475,7 @@ static void takeSnapshotCb(struct raft_io_snapshot_put *req, int status)
{
struct raft *r = req->data;
struct raft_snapshot *snapshot;
int rv;

r->snapshot.put.data = NULL;
snapshot = &r->snapshot.pending;
Expand All @@ -1478,6 +1486,20 @@ static void takeSnapshotCb(struct raft_io_snapshot_put *req, int status)
goto out;
}

/* While the snapshot was written, configuration changes could have
* occurred, these changes will not be purged from the log by this snapshot
* write. Therefore, we only need to backup a configuration in case
* configuration_index == snapshot->configuration_index, i.e. the last
* committed configuration is the configuration in the snapshot. (for
* simplicity this doesn't take into account the snapshot trailing parameter)*/
if (r->configuration_index == snapshot->configuration_index) {
rv = configurationBackup(r, &snapshot->configuration);
if (rv != 0) {
/* Don't make this a hard fault, configuration rollback is a low
* probability event. */
tracef("failed to backup last committed configuration.");
}
}
logSnapshot(r->log, snapshot->index, r->snapshot.trailing);
out:
takeSnapshotClose(r, snapshot);
Expand Down Expand Up @@ -1531,6 +1553,24 @@ static int takeSnapshotAsync(struct raft_io_async_work *take)
return r->fsm->snapshot_async(r->fsm, &snapshot->bufs, &snapshot->n_bufs);
}

static int copyLastCommittedConfiguration(const struct raft *r, struct raft_configuration *dst)
{
const struct raft_entry *entry;
int rv;

entry = logGet(r->log, r->configuration_index);
if (entry != NULL) {
tracef("entry != NULL index:%llu", r->configuration_index);
rv = configurationDecode(&entry->buf, dst);
} else {
tracef("entry == NULL index:%llu", r->configuration_index);
rv = configurationCopy(&r->configuration_previous, dst);
}

assert(dst->n != 0);
return rv;
}

static int takeSnapshot(struct raft *r)
{
struct raft_snapshot *snapshot;
Expand All @@ -1544,7 +1584,8 @@ static int takeSnapshot(struct raft *r)
snapshot->bufs = NULL;
snapshot->n_bufs = 0;

rv = configurationCopy(&r->configuration, &snapshot->configuration);
configurationInit(&snapshot->configuration);
rv = copyLastCommittedConfiguration(r, &snapshot->configuration);
if (rv != 0) {
goto abort;
}
Expand Down
2 changes: 2 additions & 0 deletions src/snapshot.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ int snapshotRestore(struct raft *r, struct raft_snapshot *snapshot)
configurationClose(&r->configuration);
r->configuration = snapshot->configuration;
r->configuration_index = snapshot->configuration_index;
r->configuration_uncommitted_index = 0;
configurationTrace(r, &r->configuration, "configuration restore from snapshot");

r->commit_index = snapshot->index;
Expand All @@ -65,6 +66,7 @@ int snapshotCopy(const struct raft_snapshot *src, struct raft_snapshot *dst)

dst->term = src->term;
dst->index = src->index;
dst->configuration_index = src->configuration_index;

rv = configurationCopy(&src->configuration, &dst->configuration);
if (rv != 0) {
Expand Down
112 changes: 66 additions & 46 deletions src/start.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,60 +12,69 @@

#define tracef(...) Tracef(r->tracer, __VA_ARGS__)

/* Restore the most recent configuration. */
static int restoreMostRecentConfiguration(struct raft *r,
struct raft_entry *entry,
raft_index index)
/* Restore the most recent configurations. */
static int restoreConfigurations(struct raft *r, raft_index prev_index,
raft_index last_index, struct raft_entry *last)
{
struct raft_configuration configuration;
struct raft_configuration last_conf;
int rv;
raft_configuration_init(&configuration);
rv = configurationDecode(&entry->buf, &configuration);
if (rv != 0) {
raft_configuration_close(&configuration);
return rv;

/* No configuration entry loaded, nothing to do */
if (last == NULL) {
assert(prev_index == 0);
return 0;
} else {
/* There is a latest configuration, we can't know if it's
* committed or not. Backup the configuration restored from the snapshot
* or noop in case there was no snapshot. */
configurationBackup(r, &r->configuration);
raft_configuration_init(&last_conf);
rv = configurationDecode(&last->buf, &last_conf);
if (rv != 0) {
raft_configuration_close(&last_conf);
return rv;
}
configurationClose(&r->configuration);
r->configuration = last_conf;
r->configuration_uncommitted_index = last_index;

/* If the last configuration is the first entry in the log, we know it's
* the bootstrap configuration and it's committed by default. */
if (last_index == 1) {
assert(prev_index == 0);
r->configuration_index = 1;
r->configuration_uncommitted_index = 0;
}

/* If there is a previous configuration it must have been committed as
* we don't allow multiple uncommitted configurations. */
if (prev_index != 0) {
r->configuration_index = prev_index;
}
}
configurationTrace(r, &configuration, "restore most recent configuration");
raft_configuration_close(&r->configuration);
r->configuration = configuration;
r->configuration_index = index;

configurationTrace(r, &r->configuration, "restore most recent configuration");
return 0;
}

/* Restore the entries that were loaded from persistent storage. The most recent
* configuration entry will be restored as well, if any.
*
* Note that we don't care whether the most recent configuration entry was
* actually committed or not. We don't allow more than one pending uncommitted
* configuration change at a time, plus
*
* when adding or removing just a single server, it is safe to switch directly
* to the new configuration.
*
* and
*
* The new configuration takes effect on each server as soon as it is added to
* that server's log: the C_new entry is replicated to the C_new servers, and
* a majority of the new configuration is used to determine the C_new entry's
* commitment. This means that servers do notwait for configuration entries to
* be committed, and each server always uses the latest configuration found in
* its log.
*
* as explained in section 4.1.
*
* TODO: we should probably set configuration_uncommitted_index as well, since we
* can't be sure a configuration change has been committed and we need to be
* ready to roll back to the last committed configuration.
*/
* Note that we cannot know if the last configuration in the log was committed
* or not, therefore we also need to track the second-to-last configuration
* entry. This second-to-last entry is committed by default as raft doesn't
* allow multipled uncommitted configuration entries and is used in case of
* configuration rollback scenarios. */
static int restoreEntries(struct raft *r,
raft_index snapshot_index,
raft_term snapshot_term,
raft_index start_index,
struct raft_entry *entries,
size_t n)
{
struct raft_entry *conf = NULL;
raft_index conf_index = 0;
struct raft_entry *last_conf = NULL;
raft_index last_conf_index = 0;
raft_index prev_conf_index = 0;
size_t i;
int rv;
logStart(r->log, snapshot_index, snapshot_term, start_index);
Expand All @@ -78,17 +87,20 @@ static int restoreEntries(struct raft *r,
goto err;
}
r->last_stored++;
if (entry->type == RAFT_CHANGE) {
conf = entry;
conf_index = r->last_stored;
/* Only take configurations into account that are newer than the
* configuration restored from the snapshot. */
if (entry->type == RAFT_CHANGE && r->last_stored > r->configuration_index) {
prev_conf_index = last_conf_index;
last_conf = entry;
last_conf_index = r->last_stored;
}
}
if (conf != NULL) {
rv = restoreMostRecentConfiguration(r, conf, conf_index);
if (rv != 0) {
goto err;
}

rv = restoreConfigurations(r, prev_conf_index, last_conf_index, last_conf);
if (rv != 0) {
goto err;
}

raft_free(entries);
return 0;

Expand Down Expand Up @@ -164,6 +176,14 @@ int raft_start(struct raft *r)
snapshot_index = snapshot->index;
snapshot_term = snapshot->term;
raft_free(snapshot);

/* Enable configuration rollback if the next configuration after installing
* this snapshot needs to be rolled back. */
rv = configurationBackup(r, &r->configuration);
if (rv != 0) {
tracef("failed to backup current configuration.");
return rv;
}
} else if (n_entries > 0) {
/* If we don't have a snapshot and the on-disk log is not empty, then
* the first entry must be a configuration entry. */
Expand Down
Loading