From db73726a7ddc8144f45ca897b1722860801ad5a7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 30 Sep 2022 10:29:18 +0100 Subject: [PATCH 1/6] Fix performance regression in `get_users_in_room` --- synapse/handlers/federation.py | 4 +- synapse/handlers/room.py | 4 +- synapse/storage/controllers/state.py | 16 +++- synapse/storage/databases/main/roommember.py | 89 +++++++++++--------- 4 files changed, 70 insertions(+), 43 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b86625829897..986ffed3d592 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -412,7 +412,9 @@ async def _maybe_backfill_inner( # First we try hosts that are already in the room. # TODO: HEURISTIC ALERT. likely_domains = ( - await self._storage_controllers.state.get_current_hosts_in_room(room_id) + await self._storage_controllers.state.get_current_hosts_in_room_ordered( + room_id + ) ) async def try_backfill(domains: Collection[str]) -> bool: diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index b220238e5597..57ab05ad25d9 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1540,7 +1540,9 @@ async def get_event_for_timestamp( ) likely_domains = ( - await self._storage_controllers.state.get_current_hosts_in_room(room_id) + await self._storage_controllers.state.get_current_hosts_in_room_ordered( + room_id + ) ) # Loop through each homeserver candidate until we get a succesful response diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index bb60130afed7..b02399157809 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -24,6 +24,7 @@ Mapping, Optional, Sequence, + Set, Tuple, ) @@ -529,7 +530,18 @@ async def get_current_state_event( ) return state_map.get(key) - async def get_current_hosts_in_room(self, room_id: str) -> List[str]: + async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + """Get current hosts in room based on current state. + + Blocks until we have full state for the given room. This only happens for rooms + with partial state. + """ + + await self._partial_state_room_tracker.await_full_state(room_id) + + return await self.stores.main.get_current_hosts_in_room(room_id) + + async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: """Get current hosts in room based on current state. Blocks until we have full state for the given room. This only happens for rooms @@ -542,7 +554,7 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]: await self._partial_state_room_tracker.await_full_state(room_id) - return await self.stores.main.get_current_hosts_in_room(room_id) + return await self.stores.main.get_current_hosts_in_room_ordered(room_id) async def get_current_hosts_in_room_or_partial_state_approximation( self, room_id: str diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 982e1f08e30b..b5206c0b84bd 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -146,14 +146,7 @@ def _transact(txn: LoggingTransaction) -> int: @cached(max_entries=100000, iterable=True) async def get_users_in_room(self, room_id: str) -> List[str]: - """ - Returns a list of users in the room sorted by longest in the room first - (aka. with the lowest depth). This is done to match the sort in - `get_current_hosts_in_room()` and so we can re-use the cache but it's - not horrible to have here either. - - Uses `m.room.member`s in the room state at the current forward extremities to - determine which users are in the room. + """Returns a list of users in the room. Will return inaccurate results for rooms with partial state, since the state for the forward extremities of those rooms will exclude most members. We may also @@ -165,19 +158,10 @@ async def get_users_in_room(self, room_id: str) -> List[str]: ) def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: - """ - Returns a list of users in the room sorted by longest in the room first - (aka. with the lowest depth). This is done to match the sort in - `get_current_hosts_in_room()` and so we can re-use the cache but it's - not horrible to have here either. - """ + """Returns a list of users in the room.""" sql = """ - SELECT c.state_key FROM current_state_events as c - /* Get the depth of the event from the events table */ - INNER JOIN events AS e USING (event_id) - WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? - /* Sorted by lowest depth first */ - ORDER BY e.depth ASC; + SELECT c.state_key FROM current_state_events + WHERE type = 'm.room.member' AND room_id = ? AND membership = ? """ txn.execute(sql, (room_id, Membership.JOIN)) @@ -931,7 +915,44 @@ async def _check_host_room_membership( return True @cached(iterable=True, max_entries=10000) - async def get_current_hosts_in_room(self, room_id: str) -> List[str]: + async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + """Get current hosts in room based on current state.""" + + # First we check if we already have `get_users_in_room` in the cache, as + # we can just calculate result from that + users = self.get_users_in_room.cache.get_immediate( + (room_id,), None, update_metrics=False + ) + if users is not None: + return {get_domain_from_id(u) for u in users} + + if isinstance(self.database_engine, Sqlite3Engine): + # If we're using SQLite then let's just always use + # `get_users_in_room` rather than funky SQL. + users = await self.get_users_in_room(room_id) + return {get_domain_from_id(u) for u in users} + + # For PostgreSQL we can use a regex to pull out the domains from the + # joined users in `current_state_events` via regex. + + def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: + sql = """ + SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$') + FROM current_state_events + WHERE + type = 'm.room.member' + AND membership = 'join' + AND room_id = ? + """ + txn.execute(sql, (room_id,)) + return {d for d, in txn} + + return await self.db_pool.runInteraction( + "get_current_hosts_in_room", get_current_hosts_in_room_txn + ) + + @cached(iterable=True, max_entries=10000) + async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: """ Get current hosts in room based on current state. @@ -939,6 +960,8 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]: longest is good because they're most likely to have anything we ask about. + For SQLite the returned list is not ordered (for performance reasons). + Uses `m.room.member`s in the room state at the current forward extremities to determine which hosts are in the room. @@ -952,35 +975,23 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]: sorted by join with the lowest depth first). """ - # First we check if we already have `get_users_in_room` in the cache, as - # we can just calculate result from that - users = self.get_users_in_room.cache.get_immediate( - (room_id,), None, update_metrics=False - ) - if users is None and isinstance(self.database_engine, Sqlite3Engine): + if isinstance(self.database_engine, Sqlite3Engine): # If we're using SQLite then let's just always use # `get_users_in_room` rather than funky SQL. users = await self.get_users_in_room(room_id) - if users is not None: - # Because `users` is sorted from lowest -> highest depth, the list - # of domains will also be sorted that way. - domains: List[str] = [] - # We use a `Set` just for fast lookups - domain_set: Set[str] = set() + domains: Set[str] = set() for u in users: if ":" not in u: continue domain = get_domain_from_id(u) - if domain not in domain_set: - domain_set.add(domain) - domains.append(domain) - return domains + domains.add(domain) + return list(domains) # For PostgreSQL we can use a regex to pull out the domains from the # joined users in `current_state_events` via regex. - def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]: + def get_current_hosts_in_room_ordered_txn(txn: LoggingTransaction) -> List[str]: # Returns a list of servers currently joined in the room sorted by # longest in the room first (aka. with the lowest depth). The # heuristic of sorting by servers who have been in the room the @@ -1008,7 +1019,7 @@ def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]: return [d for d, in txn if d is not None] return await self.db_pool.runInteraction( - "get_current_hosts_in_room", get_current_hosts_in_room_txn + "get_current_hosts_in_room_ordered", get_current_hosts_in_room_ordered_txn ) async def get_joined_hosts( From 1f400a5a78b27fc1dd88973d7d989150ed1bc2a9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 30 Sep 2022 10:34:50 +0100 Subject: [PATCH 2/6] Newsfile --- changelog.d/13972.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13972.bugfix diff --git a/changelog.d/13972.bugfix b/changelog.d/13972.bugfix new file mode 100644 index 000000000000..760fe48fa62f --- /dev/null +++ b/changelog.d/13972.bugfix @@ -0,0 +1 @@ +Fix performance regression in `get_users_in_room` database query. Introduced in v1.67.0. From 0dca91d2ff28e274005f12f7f402601bff21746a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 30 Sep 2022 10:43:06 +0100 Subject: [PATCH 3/6] Fix query --- synapse/storage/databases/main/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index b5206c0b84bd..3158e0a75fc4 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -160,7 +160,7 @@ async def get_users_in_room(self, room_id: str) -> List[str]: def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: """Returns a list of users in the room.""" sql = """ - SELECT c.state_key FROM current_state_events + SELECT state_key FROM current_state_events WHERE type = 'm.room.member' AND room_id = ? AND membership = ? """ From 586ceaef61366843c567428b6d77ce39bfcf8b92 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 30 Sep 2022 11:51:30 +0100 Subject: [PATCH 4/6] Update changelog.d/13972.bugfix Co-authored-by: David Robertson --- changelog.d/13972.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/13972.bugfix b/changelog.d/13972.bugfix index 760fe48fa62f..4c1e19ef8c57 100644 --- a/changelog.d/13972.bugfix +++ b/changelog.d/13972.bugfix @@ -1 +1 @@ -Fix performance regression in `get_users_in_room` database query. Introduced in v1.67.0. +Fix a performance regression in the `get_users_in_room` database query. Introduced in v1.67.0. From 99d4bdc2a5aef0ce7ec27470802af8c09fac68ff Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 30 Sep 2022 11:51:42 +0100 Subject: [PATCH 5/6] Simplify approximation --- synapse/storage/controllers/state.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index b02399157809..2b31ce54bb75 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -23,7 +23,6 @@ List, Mapping, Optional, - Sequence, Set, Tuple, ) @@ -558,7 +557,7 @@ async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: async def get_current_hosts_in_room_or_partial_state_approximation( self, room_id: str - ) -> Sequence[str]: + ) -> Collection[str]: """Get approximation of current hosts in room based on current state. For rooms with full state, this is equivalent to `get_current_hosts_in_room`, @@ -578,14 +577,9 @@ async def get_current_hosts_in_room_or_partial_state_approximation( ) hosts_from_state = await self.stores.main.get_current_hosts_in_room(room_id) - hosts_from_state_set = set(hosts_from_state) - - # First take the list of hosts based on the current state. - # For rooms with partial state, this will be missing most hosts. - hosts = list(hosts_from_state) - # Then add in the list of hosts in the room at the time we joined. - # This will be an empty list for rooms with full state. - hosts.extend(host for host in hosts_at_join if host not in hosts_from_state_set) + + hosts = set(hosts_at_join) + hosts.update(hosts_from_state) return hosts From 01fd9bc21287b05fe2e11c18c932b02caa972d64 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 30 Sep 2022 11:56:25 +0100 Subject: [PATCH 6/6] Review comments --- synapse/storage/databases/main/roommember.py | 50 +++++++++++--------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 3158e0a75fc4..2337289d8828 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -153,19 +153,30 @@ async def get_users_in_room(self, room_id: str) -> List[str]: calculate room state incorrectly for such rooms and believe that a member is or is not in the room when the opposite is true. """ - return await self.db_pool.runInteraction( - "get_users_in_room", self.get_users_in_room_txn, room_id + return await self.db_pool.simple_select_onecol( + table="current_state_events", + keyvalues={ + "type": EventTypes.Member, + "room_id": room_id, + "membership": Membership.JOIN, + }, + retcol="state_key", + desc="get_users_in_room", ) def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: """Returns a list of users in the room.""" - sql = """ - SELECT state_key FROM current_state_events - WHERE type = 'm.room.member' AND room_id = ? AND membership = ? - """ - txn.execute(sql, (room_id, Membership.JOIN)) - return [r[0] for r in txn] + return self.db_pool.simple_select_onecol_txn( + txn, + table="current_state_events", + keyvalues={ + "type": EventTypes.Member, + "room_id": room_id, + "membership": Membership.JOIN, + }, + retcol="state_key", + ) @cached() def get_user_in_room_with_profile( @@ -960,15 +971,16 @@ async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: longest is good because they're most likely to have anything we ask about. - For SQLite the returned list is not ordered (for performance reasons). + For SQLite the returned list is not ordered, as SQLite doesn't support + the appropriate SQL. - Uses `m.room.member`s in the room state at the current forward extremities to - determine which hosts are in the room. + Uses `m.room.member`s in the room state at the current forward + extremities to determine which hosts are in the room. - Will return inaccurate results for rooms with partial state, since the state for - the forward extremities of those rooms will exclude most members. We may also - calculate room state incorrectly for such rooms and believe that a host is or - is not in the room when the opposite is true. + Will return inaccurate results for rooms with partial state, since the + state for the forward extremities of those rooms will exclude most + members. We may also calculate room state incorrectly for such rooms and + believe that a host is or is not in the room when the opposite is true. Returns: Returns a list of servers sorted by longest in the room first. (aka. @@ -978,14 +990,8 @@ async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: if isinstance(self.database_engine, Sqlite3Engine): # If we're using SQLite then let's just always use # `get_users_in_room` rather than funky SQL. - users = await self.get_users_in_room(room_id) - domains: Set[str] = set() - for u in users: - if ":" not in u: - continue - domain = get_domain_from_id(u) - domains.add(domain) + domains = await self.get_current_hosts_in_room(room_id) return list(domains) # For PostgreSQL we can use a regex to pull out the domains from the