From 4191f5615f42ad720161941b80f68f0bcdc3a797 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 21 Oct 2021 03:44:27 -0500 Subject: [PATCH] Remove fake prev events from historical state chain Fix https://github.com/matrix-org/synapse/issues/11091 We have to allow creation of events with no prev_events but do have auth_events. And since the historical member events are outliers with no prev_events to resolve them, we want to avoid putting them as backward extremeties. --- synapse/federation/federation_server.py | 2 + synapse/handlers/federation.py | 1 + synapse/handlers/federation_event.py | 3 +- synapse/handlers/message.py | 6 +- synapse/handlers/room_batch.py | 11 +++- synapse/handlers/room_member.py | 2 +- .../databases/main/event_federation.py | 2 +- synapse/storage/databases/main/events.py | 65 ++++++++++++++++++- 8 files changed, 83 insertions(+), 9 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 015b61bf63a0..36bcc1d81470 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -200,6 +200,8 @@ async def on_backfill_request( res = self._transaction_dict_from_pdus(pdus) + logger.info("on_backfill_request res=%s", res) + return 200, res async def on_incoming_transaction( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 98feca5e24c3..d915f0b35ccf 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1061,6 +1061,7 @@ async def on_backfill_request( events = await self.store.get_backfill_events(room_id, pdu_list, limit) events = await filter_events_for_server(self.storage, origin, events) + logger.info("on_backfill_request resultant events(%d)=%s", len(events), events) return events diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 177352f8320d..c6eb7d088e41 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -417,7 +417,8 @@ async def backfill( dest, room_id, limit=limit, extremities=extremities ) logger.info( - "from remote server: got backfill response events=%s", + "from remote server: got backfill response events(%d)=%s", + len(events), [ { "event_id": ev.event_id, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 2e024b551f99..f4ae4a392ce7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -955,8 +955,10 @@ async def create_new_client_event( # event and then try to auth it (which fails with a somewhat confusing "No # create event in auth events") assert ( - builder.type == EventTypes.Create or len(prev_event_ids) > 0 - ), "Attempting to create an event with no prev_events" + builder.type == EventTypes.Create + or len(prev_event_ids) > 0 + or len(auth_event_ids) > 0 + ), "Attempting to create an event with no prev_events or auth_event_ids" event = await builder.build( prev_event_ids=prev_event_ids, diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index c7ee6836e221..a2b2257d6fe4 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -184,7 +184,7 @@ async def persist_state_events_at_start( # Make the state events float off on their own so we don't have a # bunch of `@mxid joined the room` noise between each batch - prev_event_ids_for_state_chain = [generate_fake_event_id()] + prev_event_ids_for_state_chain = [] # generate_fake_event_id() for state_event in state_events_at_start: assert_params_in_dict( @@ -227,6 +227,15 @@ async def persist_state_events_at_start( # reference and also update in the event when we append later. auth_event_ids=auth_event_ids.copy(), ) + + mem_event = await self.store.get_event(event_id) + logger.info( + "room_batch mem_event_id=%s depth=%s stream_ordering=%s prev_event_ids=%s", + mem_event.event_id, + mem_event.depth, + mem_event.internal_metadata.stream_ordering, + mem_event.prev_event_ids(), + ) else: # TODO: Add some complement tests that adds state that is not member joins # and will use this code path. Maybe we only want to support join state events diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 74e6c7eca6b1..3ff82ab229d3 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -644,7 +644,7 @@ async def update_membership_locked( if block_invite: raise SynapseError(403, "Invites have been disabled on this server") - if prev_event_ids: + if prev_event_ids is not None: return await self._local_membership_update( requester=requester, target=target, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 145ba7b59347..7f5d1b263fa1 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1127,7 +1127,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): ev["type"], ev["depth"], event_lookup_result["stream_ordering"], - ev["content"].get("body", None), + ev["content"].get("body", ev["content"]), ) else: logger.info( diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 56d265213290..21f69f578735 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2139,6 +2139,38 @@ def _update_backward_extremeties(self, txn, events): Forward extremities are handled when we first start persisting the events. """ + logger.info( + "_update_backward_extremeties events=%s", + [ + { + "event_id": ev.event_id, + "prev_events": ev.prev_event_ids(), + "outlier": ev.internal_metadata.is_outlier(), + } + for ev in events + ], + ) + + for ev in events: + for e_id in ev.prev_event_ids(): + query = """ + SELECT 1 FROM event_edges + INNER JOIN events AS e USING (event_id, room_id) + WHERE event_id = ? AND room_id = ? AND e.outlier = TRUE + """ + + txn.execute( + query, + (e_id, ev.room_id), + ) + result = txn.fetchall() + logger.info( + "_update_backward_extremeties test ev=%s prev_event_id=%s result=%s", + ev.event_id, + e_id, + result, + ) + # From the events passed in, add all of the prev events as backwards extremities. # Ignore any events that are already backwards extrems or outliers. query = ( @@ -2147,22 +2179,45 @@ def _update_backward_extremeties(self, txn, events): " SELECT 1 FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" " )" + # 1. Don't add an event as a extremity again if we already persisted it + # as a non-outlier. + # 2. Don't add an outlier as an extremity if it has no prev_events " AND NOT EXISTS (" - " SELECT 1 FROM events WHERE event_id = ? AND room_id = ? " - " AND outlier = ?" + " SELECT 1 FROM events" + " LEFT JOIN event_edges edge" + " ON edge.event_id = events.event_id" + " WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = FALSE OR edge.event_id IS NULL)" " )" ) txn.execute_batch( query, [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id, False) + (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id) for ev in events for e_id in ev.prev_event_ids() if not ev.internal_metadata.is_outlier() ], ) + for ev in events: + for e_id in ev.prev_event_ids(): + query = """ + SELECT * FROM event_backward_extremities + WHERE event_id = ? AND room_id = ? + """ + + txn.execute( + query, + (e_id, ev.room_id), + ) + result = txn.fetchall() + logger.info( + "_update_backward_extremeties ended up as prev_event_id=%s result=%s", + e_id, + result, + ) + # Delete all these events that we've already fetched and now know that their # prev events are the new backwards extremeties. query = ( @@ -2175,6 +2230,10 @@ def _update_backward_extremeties(self, txn, events): (ev.event_id, ev.room_id) for ev in events if not ev.internal_metadata.is_outlier() + # If we encountered an event with no prev_events, then we might + # as well remove it now because it won't ever have anything else + # to backfill from. + or len(ev.prev_event_ids()) == 0 ], )