From feb0c7b6ebf5a7fa6d6ad8b47a4128fb43d5d375 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 Mar 2020 16:58:29 +0000 Subject: [PATCH] Allow for a validation regex for the next_link query parameter (#285) --- sydent/http/servlets/emailservlet.py | 15 +++++++++- sydent/sydent.py | 22 ++++++++++---- sydent/validators/common.py | 44 +++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/sydent/http/servlets/emailservlet.py b/sydent/http/servlets/emailservlet.py index 393377cb..79ac54b4 100644 --- a/sydent/http/servlets/emailservlet.py +++ b/sydent/http/servlets/emailservlet.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging from twisted.web.resource import Resource from sydent.util.emailutils import EmailAddressException, EmailSendException @@ -22,10 +23,13 @@ IncorrectClientSecretException, NextLinkValidationException, ) +from sydent.validators.common import validate_next_link from sydent.util.stringutils import is_valid_client_secret from sydent.http.servlets import get_args, jsonwrap, send_cors +logger = logging.getLogger(__name__) + class EmailRequestCodeServlet(Resource): isLeaf = True @@ -57,9 +61,18 @@ def render_POST(self, request): ipaddress = self.sydent.ip_from_request(request) nextLink = None - if 'next_link' in args and not args['next_link'].startswith("file:///"): + if 'next_link' in args: nextLink = args['next_link'] + if not validate_next_link(self.sydent, nextLink): + logger.warning( + "Validation attempt rejected as provided 'next_link' value is not " + "http(s) or domain does not match " + "general.next_link.domain_whitelist config value: %s", + nextLink, + ) + return {'errcode': 'M_INVALID_PARAM', 'error': 'Invalid next_link'} + resp = None try: diff --git a/sydent/sydent.py b/sydent/sydent.py index 05f5f9f8..e63fca6a 100644 --- a/sydent/sydent.py +++ b/sydent/sydent.py @@ -20,6 +20,7 @@ import logging import logging.handlers import os +import re import twisted.internet.reactor from twisted.python import log @@ -65,10 +66,11 @@ logger = logging.getLogger(__name__) -def list_from_comma_sep_string(rawstr): + +def set_from_comma_sep_string(rawstr): if rawstr == '': - return [] - return [x.strip() for x in rawstr.split(',')] + return set() + return {x.strip() for x in rawstr.split(',')} CONFIG_DEFAULTS = { @@ -86,6 +88,10 @@ def list_from_comma_sep_string(rawstr): # Path to file detailing the configuration of the /info and /internal-info servlets. # More information can be found in docs/info.md. 'info_path': 'info.yaml', + # A comma-separated domain whitelist used to validate the next_link query parameter + # provided by the client to the /requestToken and /submitToken endpoints + # If empty, no whitelist is applied + 'next_link.domain_whitelist': '' }, 'db': { 'db.file': 'sydent.db', @@ -180,9 +186,15 @@ def sighup(signum, stack): self.shadow_hs_master = self.cfg.get('general', 'shadow.hs.master') self.shadow_hs_slave = self.cfg.get('general', 'shadow.hs.slave') - self.user_dir_allowed_hses = set(list_from_comma_sep_string( + self.user_dir_allowed_hses = set_from_comma_sep_string( self.cfg.get('userdir', 'userdir.allowed_homeservers', '') - )) + ) + + next_link_whitelist = self.cfg.get('general', 'next_link.domain_whitelist') + if next_link_whitelist == '': + self.next_link_domain_whitelist = None + else: + self.next_link_domain_whitelist = set_from_comma_sep_string(next_link_whitelist) self.invites_validity_period = parse_duration( self.cfg.get('general', 'invites.validity_period'), diff --git a/sydent/validators/common.py b/sydent/validators/common.py index 512db7fc..52f33ef8 100644 --- a/sydent/validators/common.py +++ b/sydent/validators/common.py @@ -9,6 +9,7 @@ ) from sydent.util import time_msec from sqlite3 import IntegrityError +from urlparse import urlparse logger = logging.getLogger(__name__) @@ -61,13 +62,23 @@ def validateSessionWithToken(sydent, sid, clientSecret, token, next_link=None): # If so, and the next_link this time around is different, then the # user may be getting phished. Reject the validation attempt. if next_link and valSessionStore.next_link_differs(sid, token, next_link): - logger.info( + logger.warning( "Validation attempt rejected as provided 'next_link' is different " "from that in a previous, successful validation attempt with this " "session id" ) raise NextLinkValidationException() + # Validate the value of next_link + if next_link and not validate_next_link(sydent, next_link): + logger.warning( + "Validation attempt rejected as provided 'next_link' value is not " + "http(s) or domain does not match " + "general.next_link.domain_whitelist config value: %s", + next_link, + ) + raise NextLinkValidationException() + # TODO once we can validate the token oob #if tokenObj.validated and clientSecret == tokenObj.clientSecret: # return True @@ -116,3 +127,34 @@ def validateSessionWithToken(sydent, sid, clientSecret, token, next_link=None): else: logger.info("Incorrect token submitted") return False + + +def validate_next_link(sydent, next_link): + """Return whether a given next_link value is valid. next_link is valid if the scheme is + http(s) and the next_link.domain_whitelist config option is either empty or contains a + domain that matches the one in the given next_link + + :param sydent: A global sydent object + :type sydent: Sydent + + :param next_link: The value of the next_link query parameter + :type next_link: str + + :returns: Whether a given next_link value is valid + :rtype: bool + """ + # Parse the contents of the URL + next_link_parsed = urlparse(next_link) + + # Scheme must be http(s) + if next_link_parsed.scheme not in ["http", "https"]: + return False + + # If the domain whitelist is set, the domain must be in it + if ( + sydent.next_link_domain_whitelist is not None + and next_link_parsed.hostname not in sydent.next_link_domain_whitelist + ): + return False + + return True