Skip to content

Commit c352332

Browse files
authored
Unresolver: allow a full URL when un-resolving a domain (#11632)
This also checks that the URL has a valid protocol. This is needed for readthedocs/readthedocs-corporate#1887.
1 parent 647b10b commit c352332

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

readthedocs/core/unresolver.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def unresolve_url(self, url, append_indexhtml=True):
208208
parsed_url = urlparse(url)
209209
if parsed_url.scheme not in ["http", "https"]:
210210
raise InvalidSchemeError(parsed_url.scheme)
211-
domain = self.get_domain_from_host(parsed_url.netloc)
211+
domain = parsed_url.hostname
212212
unresolved_domain = self.unresolve_domain(domain)
213213
return self._unresolve(
214214
unresolved_domain=unresolved_domain,
@@ -551,8 +551,18 @@ def unresolve_domain(self, domain):
551551
Unresolve domain by extracting relevant information from it.
552552
553553
:param str domain: Domain to extract the information from.
554+
It can be a full URL, in that case, only the domain is used.
554555
:returns: A UnresolvedDomain object.
555556
"""
557+
parsed_domain = urlparse(domain)
558+
if parsed_domain.scheme:
559+
if parsed_domain.scheme not in ["http", "https"]:
560+
raise InvalidSchemeError(parsed_domain.scheme)
561+
domain = parsed_domain.hostname
562+
563+
if not domain:
564+
raise InvalidSubdomainError(domain)
565+
556566
public_domain = self.get_domain_from_host(settings.PUBLIC_DOMAIN)
557567
external_domain = self.get_domain_from_host(
558568
settings.RTD_EXTERNAL_VERSION_DOMAIN

readthedocs/rtd_tests/tests/test_unresolver.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111
InvalidExternalVersionError,
1212
InvalidPathForVersionedProjectError,
1313
InvalidSchemeError,
14+
InvalidSubdomainError,
1415
SuspiciousHostnameError,
1516
TranslationNotFoundError,
1617
TranslationWithoutVersionError,
1718
VersionNotFoundError,
1819
unresolve,
20+
unresolver,
1921
)
2022
from readthedocs.projects.constants import SINGLE_VERSION_WITHOUT_TRANSLATIONS
2123
from readthedocs.projects.models import Domain
@@ -372,8 +374,34 @@ def test_unresolve_invalid_scheme(self):
372374
"fttp://pip.readthedocs.io/en/latest/",
373375
"fttps://pip.readthedocs.io/en/latest/",
374376
"ssh://pip.readthedocs.io/en/latest/",
377+
"javascript://pip.readthedocs.io/en/latest/",
375378
"://pip.readthedocs.io/en/latest/",
376379
]
377380
for url in invalid_urls:
378381
with pytest.raises(InvalidSchemeError):
379382
unresolve(url)
383+
384+
# A triple slash is interpreted as a URL without domain,
385+
# we don't support that.
386+
with pytest.raises(InvalidSubdomainError):
387+
unresolve("https:///pip.readthedocs.io/en/latest/")
388+
389+
def test_unresolve_domain_with_full_url(self):
390+
result = unresolver.unresolve_domain("https://pip.readthedocs.io/en/latest/")
391+
self.assertIsNone(result.domain)
392+
self.assertEqual(result.project, self.pip)
393+
self.assertTrue(result.is_from_public_domain)
394+
self.assertEqual(result.source_domain, "pip.readthedocs.io")
395+
396+
def test_unresolve_domain_with_full_url_invalid_protocol(self):
397+
invalid_protocols = [
398+
"fttp",
399+
"fttps",
400+
"ssh",
401+
"javascript",
402+
]
403+
for protocol in invalid_protocols:
404+
with pytest.raises(InvalidSchemeError):
405+
unresolver.unresolve_domain(
406+
f"{protocol}://pip.readthedocs.io/en/latest/"
407+
)

0 commit comments

Comments
 (0)