Skip to content

Commit

Permalink
Don't perform www. prefix removal on 2nd level domains. Fixes interne…
Browse files Browse the repository at this point in the history
  • Loading branch information
tfmorris committed Aug 28, 2023
1 parent 6934c32 commit 25d87fb
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
4 changes: 2 additions & 2 deletions surt/IAURLCanonicalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ def alphaReorderQuery(orig):

# massageHost()
#_______________________________________________________________________________
_RE_WWWDIGITS = re.compile(b'www\d*\.')
_RE_WWWDIGITS = re.compile(rb'(www\d*\.).+\.')

def massageHost(host):
m = _RE_WWWDIGITS.match(host)
if m:
return host[len(m.group(0)):]
return host[len(m.group(1)):]
else:
return host

Expand Down
6 changes: 5 additions & 1 deletion tests/test_surt.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,11 @@ def test_surt():
assert surt.surt("dns:alexa.com") == 'dns:alexa.com'
assert surt.surt("dns:archive.org") == 'dns:archive.org'

assert surt.surt("http://www1234.com/") == 'com,www1234)/'
assert surt.surt("http://www.archive.org/") == 'org,archive)/'
assert surt.surt("https://www.archive.org/") == 'org,archive)/'
assert surt.surt("http://www1.archive.org/") == 'org,archive)/'
assert surt.surt("http://www1.www.archive.org/") == 'org,archive,www)/'
assert surt.surt("http://archive.org/") == 'org,archive)/'
assert surt.surt("http://archive.org/goo/") == 'org,archive)/goo'
assert surt.surt("http://archive.org/goo/?") == 'org,archive)/goo'
Expand Down Expand Up @@ -362,7 +366,7 @@ def test_surt_ipaddress(url, opts, out):
])
def test_surt_return_type(burl):
"""surt.surt() returns the same type of string object (i.e. returns unicode
string for unicode string input, and byets for bytes)
string for unicode string input, and bytes for bytes)
Note this behavior may change in the future versions. This test is for
testing compatibility until that happens.
Expand Down

0 comments on commit 25d87fb

Please sign in to comment.