From 80828eda06f8e3d6a930c9fa45204ad6fef1d411 Mon Sep 17 00:00:00 2001 From: David Teller Date: Wed, 22 Sep 2021 15:09:43 +0200 Subject: [PATCH] =?UTF-8?q?Extend=20ModuleApi=20with=20the=20methods=20we'?= =?UTF-8?q?ll=20need=20to=20reject=20spam=20based=20on=20=E2=80=A6IP=20-?= =?UTF-8?q?=20resolves=20#10832=20(#10833)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend ModuleApi with the methods we'll need to reject spam based on IP - resolves #10832 Signed-off-by: David Teller --- changelog.d/10833.misc | 1 + synapse/module_api/__init__.py | 82 +++++++++++++++++++- synapse/storage/databases/main/client_ips.py | 27 +++++-- tests/module_api/test_api.py | 72 +++++++++++++++++ 4 files changed, 174 insertions(+), 8 deletions(-) create mode 100644 changelog.d/10833.misc diff --git a/changelog.d/10833.misc b/changelog.d/10833.misc new file mode 100644 index 000000000000..f23c0a1a023a --- /dev/null +++ b/changelog.d/10833.misc @@ -0,0 +1 @@ +Extend the ModuleApi to let plug-ins check whether an ID is local and to access IP + User Agent data. diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 3196c2bec65e..174e6934a8be 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -24,8 +24,10 @@ List, Optional, Tuple, + Union, ) +import attr import jinja2 from twisted.internet import defer @@ -46,7 +48,14 @@ from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter -from synapse.types import JsonDict, Requester, UserID, UserInfo, create_requester +from synapse.types import ( + DomainSpecificString, + JsonDict, + Requester, + UserID, + UserInfo, + create_requester, +) from synapse.util import Clock from synapse.util.caches.descriptors import cached @@ -79,6 +88,18 @@ logger = logging.getLogger(__name__) +@attr.s(auto_attribs=True) +class UserIpAndAgent: + """ + An IP address and user agent used by a user to connect to this homeserver. + """ + + ip: str + user_agent: str + # The time at which this user agent/ip was last seen. + last_seen: int + + class ModuleApi: """A proxy object that gets passed to various plugin modules so they can register new users etc if necessary. @@ -700,6 +721,65 @@ def read_templates( (td for td in (self.custom_template_dir, custom_template_directory) if td), ) + def is_mine(self, id: Union[str, DomainSpecificString]) -> bool: + """ + Checks whether an ID (user id, room, ...) comes from this homeserver. + + Args: + id: any Matrix id (e.g. user id, room id, ...), either as a raw id, + e.g. string "@user:example.com" or as a parsed UserID, RoomID, ... + Returns: + True if id comes from this homeserver, False otherwise. + + Added in Synapse v1.44.0. + """ + if isinstance(id, DomainSpecificString): + return self._hs.is_mine(id) + else: + return self._hs.is_mine_id(id) + + async def get_user_ip_and_agents( + self, user_id: str, since_ts: int = 0 + ) -> List[UserIpAndAgent]: + """ + Return the list of user IPs and agents for a user. + + Args: + user_id: the id of a user, local or remote + since_ts: a timestamp in seconds since the epoch, + or the epoch itself if not specified. + Returns: + The list of all UserIpAndAgent that the user has + used to connect to this homeserver since `since_ts`. + If the user is remote, this list is empty. + + Added in Synapse v1.44.0. + """ + # Don't hit the db if this is not a local user. + is_mine = False + try: + # Let's be defensive against ill-formed strings. + if self.is_mine(user_id): + is_mine = True + except Exception: + pass + + if is_mine: + raw_data = await self._store.get_user_ip_and_agents( + UserID.from_string(user_id), since_ts + ) + # Sanitize some of the data. We don't want to return tokens. + return [ + UserIpAndAgent( + ip=str(data["ip"]), + user_agent=str(data["user_agent"]), + last_seen=int(data["last_seen"]), + ) + for data in raw_data + ] + else: + return [] + class PublicRoomListManager: """Contains methods for adding to, removing from and querying whether a room diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 7a98275d927f..7e33ae578c7b 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -555,8 +555,11 @@ async def get_last_client_ip_by_device( return ret async def get_user_ip_and_agents( - self, user: UserID + self, user: UserID, since_ts: int = 0 ) -> List[Dict[str, Union[str, int]]]: + """ + Fetch IP/User Agent connection since a given timestamp. + """ user_id = user.to_string() results = {} @@ -568,13 +571,23 @@ async def get_user_ip_and_agents( ) = key if uid == user_id: user_agent, _, last_seen = self._batch_row_update[key] - results[(access_token, ip)] = (user_agent, last_seen) + if last_seen >= since_ts: + results[(access_token, ip)] = (user_agent, last_seen) - rows = await self.db_pool.simple_select_list( - table="user_ips", - keyvalues={"user_id": user_id}, - retcols=["access_token", "ip", "user_agent", "last_seen"], - desc="get_user_ip_and_agents", + def get_recent(txn): + txn.execute( + """ + SELECT access_token, ip, user_agent, last_seen FROM user_ips + WHERE last_seen >= ? AND user_id = ? + ORDER BY last_seen + DESC + """, + (since_ts, user_id), + ) + return txn.fetchall() + + rows = await self.db_pool.runInteraction( + desc="get_user_ip_and_agents", func=get_recent ) results.update( diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py index 7dd519cd44a4..9d38974fba93 100644 --- a/tests/module_api/test_api.py +++ b/tests/module_api/test_api.py @@ -43,6 +43,7 @@ def prepare(self, reactor, clock, homeserver): self.module_api = homeserver.get_module_api() self.event_creation_handler = homeserver.get_event_creation_handler() self.sync_handler = homeserver.get_sync_handler() + self.auth_handler = homeserver.get_auth_handler() def make_homeserver(self, reactor, clock): return self.setup_test_homeserver( @@ -89,6 +90,77 @@ def test_get_userinfo_by_id__no_user_found(self): found_user = self.get_success(self.module_api.get_userinfo_by_id("@alice:test")) self.assertIsNone(found_user) + def test_get_user_ip_and_agents(self): + user_id = self.register_user("test_get_user_ip_and_agents_user", "1234") + + # Initially, we should have no ip/agent for our user. + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + self.assertEqual(info, []) + + # Insert a first ip, agent. We should be able to retrieve it. + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip_1", "user_agent_1", "device_1", None + ) + ) + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + + self.assertEqual(len(info), 1) + last_seen_1 = info[0].last_seen + + # Insert a second ip, agent at a later date. We should be able to retrieve it. + last_seen_2 = last_seen_1 + 10000 + print("%s => %s" % (last_seen_1, last_seen_2)) + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip_2", "user_agent_2", "device_2", last_seen_2 + ) + ) + info = self.get_success(self.module_api.get_user_ip_and_agents(user_id)) + + self.assertEqual(len(info), 2) + ip_1_seen = False + ip_2_seen = False + + for i in info: + if i.ip == "ip_1": + ip_1_seen = True + self.assertEqual(i.user_agent, "user_agent_1") + self.assertEqual(i.last_seen, last_seen_1) + elif i.ip == "ip_2": + ip_2_seen = True + self.assertEqual(i.user_agent, "user_agent_2") + self.assertEqual(i.last_seen, last_seen_2) + self.assertTrue(ip_1_seen) + self.assertTrue(ip_2_seen) + + # If we fetch from a midpoint between last_seen_1 and last_seen_2, + # we should only find the second ip, agent. + info = self.get_success( + self.module_api.get_user_ip_and_agents( + user_id, (last_seen_1 + last_seen_2) / 2 + ) + ) + self.assertEqual(len(info), 1) + self.assertEqual(info[0].ip, "ip_2") + self.assertEqual(info[0].user_agent, "user_agent_2") + self.assertEqual(info[0].last_seen, last_seen_2) + + # If we fetch from a point later than last_seen_2, we shouldn't + # find anything. + info = self.get_success( + self.module_api.get_user_ip_and_agents(user_id, last_seen_2 + 10000) + ) + self.assertEqual(info, []) + + def test_get_user_ip_and_agents__no_user_found(self): + info = self.get_success( + self.module_api.get_user_ip_and_agents( + "@test_get_user_ip_and_agents_user_nonexistent:example.com" + ) + ) + self.assertEqual(info, []) + def test_sending_events_into_room(self): """Tests that a module can send events into a room""" # Mock out create_and_send_nonmember_event to check whether events are being sent