From bc739a444fb5bffc6dd3b57399a223bc5b5b4a15 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 1 Feb 2023 15:19:15 +0000 Subject: [PATCH 1/5] Fetch fewer events when getting hosts in room This should hopefully improve the response time to serve a faster join, to persist an outgoing event, and probably other things too. --- synapse/storage/databases/main/roommember.py | 26 ++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 8e2ba7b7b470..b4298d33a6cb 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from itertools import chain from typing import ( TYPE_CHECKING, AbstractSet, @@ -94,6 +95,7 @@ def __init__( self._joined_host_linearizer = Linearizer("_JoinedHostsCache") self._server_notices_mxid = hs.config.servernotices.server_notices_mxid + self._storage_controllers = hs.get_storage_controllers() if ( self.hs.config.worker.run_background_tasks @@ -1131,12 +1133,32 @@ async def _get_joined_hosts( else: # The cache doesn't match the state group or prev state group, # so we calculate the result from first principles. + # + # We need to fetch all hosts joined to the room according to `state` by + # inspecting all join memberships in `state`. However, if the `state` is + # relatively recent then many of its events are likely to be held in + # the current state of the room, which is easily available and likely + # cached. We therefore compute the set of `state` events not in the + # current state and only fetch those. + current_state = await self._storage_controllers.state.get_current_state( + room_id + ) + unknown_state_events = {} + joined_users_in_current_state = [] + + for (type, state_key), event_id in state.items(): + current_event = current_state.get((type, state_key)) + if current_event is None or current_event.event_id != event_id: + unknown_state_events[type, state_key] = event_id + elif current_event.membership == Membership.JOIN: + joined_users_in_current_state.append(state_key) + joined_user_ids = await self.get_joined_user_ids_from_state( - room_id, state + room_id, unknown_state_events ) cache.hosts_to_joined_users = {} - for user_id in joined_user_ids: + for user_id in chain(joined_user_ids, joined_users_in_current_state): host = intern_string(get_domain_from_id(user_id)) cache.hosts_to_joined_users.setdefault(host, set()).add(user_id) From 70947443798e995ff026819cf5dd6823134ac81a Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 1 Feb 2023 15:29:27 +0000 Subject: [PATCH 2/5] Changelog --- changelog.d/14962.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/14962.feature diff --git a/changelog.d/14962.feature b/changelog.d/14962.feature new file mode 100644 index 000000000000..38f26012f23c --- /dev/null +++ b/changelog.d/14962.feature @@ -0,0 +1 @@ +Improve performance when joining or sending an event large rooms. From f781304e1882fd60cdef84d3405767773237629d Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 1 Feb 2023 18:03:39 +0000 Subject: [PATCH 3/5] Just fetch current membership --- synapse/storage/databases/main/roommember.py | 41 ++++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index b4298d33a6cb..255e3f8d1215 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -95,7 +95,6 @@ def __init__( self._joined_host_linearizer = Linearizer("_JoinedHostsCache") self._server_notices_mxid = hs.config.servernotices.server_notices_mxid - self._storage_controllers = hs.get_storage_controllers() if ( self.hs.config.worker.run_background_tasks @@ -1138,19 +1137,20 @@ async def _get_joined_hosts( # inspecting all join memberships in `state`. However, if the `state` is # relatively recent then many of its events are likely to be held in # the current state of the room, which is easily available and likely - # cached. We therefore compute the set of `state` events not in the + # cached. + # + # We therefore compute the set of `state` events not in the # current state and only fetch those. - current_state = await self._storage_controllers.state.get_current_state( - room_id + current_memberships = ( + await self._get_approximate_current_memberships_in_room(room_id) ) unknown_state_events = {} joined_users_in_current_state = [] for (type, state_key), event_id in state.items(): - current_event = current_state.get((type, state_key)) - if current_event is None or current_event.event_id != event_id: + if event_id not in current_memberships: unknown_state_events[type, state_key] = event_id - elif current_event.membership == Membership.JOIN: + elif current_memberships[event_id] == Membership.JOIN: joined_users_in_current_state.append(state_key) joined_user_ids = await self.get_joined_user_ids_from_state( @@ -1169,6 +1169,33 @@ async def _get_joined_hosts( return frozenset(cache.hosts_to_joined_users) + # TODO: this _might_ turn out to need caching, let's see + async def _get_approximate_current_memberships_in_room( + self, room_id: str + ) -> Mapping[str, Optional[str]]: + """Build a map from event id to membership, for all events in the current state. + + The event ids of non-memberships events (e.g. `m.room.power_levels`) are present + in the result, mapped to values of `None`. + + The result is approximate for partially-joined rooms. It is fully accurate + for fully-joined rooms. + """ + + def f(txn: LoggingTransaction) -> List[Tuple[str, str]]: + sql = """ + SELECT event_id, membership + FROM current_state_events + WHERE room_id = ?; + """ + txn.execute(sql, (room_id,)) + return txn.fetchall() # type: ignore[return-value] + + rows = await self.db_pool.runInteraction( + "_get_approimate_current_memberships_in_room", f + ) + return {row[0]: row[1] for row in rows} + @cached(max_entries=10000) def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache": return _JoinedHostsCache() From 7ee3976debf925fc2f79381c0c51003588e31e32 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 1 Feb 2023 18:39:07 +0000 Subject: [PATCH 4/5] Use helper function oh, that's what they exist for --- synapse/storage/databases/main/roommember.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 255e3f8d1215..49dbd34767e2 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1182,19 +1182,13 @@ async def _get_approximate_current_memberships_in_room( for fully-joined rooms. """ - def f(txn: LoggingTransaction) -> List[Tuple[str, str]]: - sql = """ - SELECT event_id, membership - FROM current_state_events - WHERE room_id = ?; - """ - txn.execute(sql, (room_id,)) - return txn.fetchall() # type: ignore[return-value] - - rows = await self.db_pool.runInteraction( - "_get_approimate_current_memberships_in_room", f + rows = await self.db_pool.simple_select_list( + "current_state_events", + keyvalues={"room_id": room_id}, + retcols=("event_id", "membership"), + desc="has_completed_background_updates", ) - return {row[0]: row[1] for row in rows} + return {row["event_id"]: row["membership"] for row in rows} @cached(max_entries=10000) def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache": From 60e3e2a5cbfb514d13346318dd48cf977098e7bc Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 2 Feb 2023 16:05:14 +0000 Subject: [PATCH 5/5] Remove TODO --- synapse/storage/databases/main/roommember.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 49dbd34767e2..ea6a5e2f3495 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1169,7 +1169,6 @@ async def _get_joined_hosts( return frozenset(cache.hosts_to_joined_users) - # TODO: this _might_ turn out to need caching, let's see async def _get_approximate_current_memberships_in_room( self, room_id: str ) -> Mapping[str, Optional[str]]: