From b38bdae3a2e5b7cfe862580368b996b8d7dfa50f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 4 Jan 2022 16:36:33 +0000 Subject: [PATCH] Fix AssertionErrors after purging events (#11642) * Fix AssertionErrors after purging events If you purged a bunch of events from your database, and then restarted synapse without receiving more events, then you would get a bunch of AssertionErrors on restart. This fixes the situation by rewinding the stream processors. * `check-newsfragment`: ignore deleted newsfiles --- changelog.d/11536.bugfix | 1 + changelog.d/11536.misc | 1 - changelog.d/11642.bugfix | 1 + scripts-dev/check-newsfragment | 4 ++-- synapse/handlers/stats.py | 11 +++++++++++ synapse/handlers/user_directory.py | 18 +++++++++++++++--- 6 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 changelog.d/11536.bugfix delete mode 100644 changelog.d/11536.misc create mode 100644 changelog.d/11642.bugfix diff --git a/changelog.d/11536.bugfix b/changelog.d/11536.bugfix new file mode 100644 index 000000000000..4a1b00725479 --- /dev/null +++ b/changelog.d/11536.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug which could cause `AssertionError`s to be written to the log when Synapse was restarted after purging events from the database. diff --git a/changelog.d/11536.misc b/changelog.d/11536.misc deleted file mode 100644 index b9191c111b18..000000000000 --- a/changelog.d/11536.misc +++ /dev/null @@ -1 +0,0 @@ -Improvements to log messages around handling stream ids. diff --git a/changelog.d/11642.bugfix b/changelog.d/11642.bugfix new file mode 100644 index 000000000000..4a1b00725479 --- /dev/null +++ b/changelog.d/11642.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug which could cause `AssertionError`s to be written to the log when Synapse was restarted after purging events from the database. diff --git a/scripts-dev/check-newsfragment b/scripts-dev/check-newsfragment index af4de345df57..c764011d6adb 100755 --- a/scripts-dev/check-newsfragment +++ b/scripts-dev/check-newsfragment @@ -42,8 +42,8 @@ echo "--------------------------" echo matched=0 -for f in $(git diff --name-only FETCH_HEAD... -- changelog.d); do - # check that any modified newsfiles on this branch end with a full stop. +for f in $(git diff --diff-filter=d --name-only FETCH_HEAD... -- changelog.d); do + # check that any added newsfiles on this branch end with a full stop. lastchar=$(tr -d '\n' < "$f" | tail -c 1) if [ "$lastchar" != '.' ] && [ "$lastchar" != '!' ]; then echo -e "\e[31mERROR: newsfragment $f does not end with a '.' or '!'\e[39m" >&2 diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index bd3e6f2ec77b..29e41a4c796c 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -80,6 +80,17 @@ async def _unsafe_process(self) -> None: # If self.pos is None then means we haven't fetched it from DB if self.pos is None: self.pos = await self.store.get_stats_positions() + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self.pos > room_max_stream_ordering: + # apparently, we've processed more events than exist in the database! + # this can happen if events are removed with history purge or similar. + logger.warning( + "Event stream ordering appears to have gone backwards (%i -> %i): " + "rewinding stats processor", + self.pos, + room_max_stream_ordering, + ) + self.pos = room_max_stream_ordering # Loop round handling deltas until we're up to date diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index a0eb45446f56..1565e034cb25 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -148,9 +148,21 @@ async def _unsafe_process(self) -> None: if self.pos is None: self.pos = await self.store.get_user_directory_stream_pos() - # If still None then the initial background update hasn't happened yet. - if self.pos is None: - return None + # If still None then the initial background update hasn't happened yet. + if self.pos is None: + return None + + room_max_stream_ordering = self.store.get_room_max_stream_ordering() + if self.pos > room_max_stream_ordering: + # apparently, we've processed more events than exist in the database! + # this can happen if events are removed with history purge or similar. + logger.warning( + "Event stream ordering appears to have gone backwards (%i -> %i): " + "rewinding user directory processor", + self.pos, + room_max_stream_ordering, + ) + self.pos = room_max_stream_ordering # Loop round handling deltas until we're up to date while True: