Skip to content

Commit e368e05

Browse files
committed
Proxito: refactor canocanical redirects
Closes #10065
1 parent d499203 commit e368e05

File tree

8 files changed

+143
-67
lines changed

8 files changed

+143
-67
lines changed

readthedocs/core/resolver.py

+48-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""URL resolver for documentation."""
2-
32
from urllib.parse import urlunparse
43

54
import structlog
65
from django.conf import settings
76

87
from readthedocs.builds.constants import EXTERNAL
98
from readthedocs.core.utils.extend import SettingsOverrideObject
9+
from readthedocs.core.utils.url import join_url_path
1010

1111
log = structlog.get_logger(__name__)
1212

@@ -214,6 +214,53 @@ def resolve(
214214
)
215215
return urlunparse((protocol, domain, path, '', query_params, ''))
216216

217+
def _get_root_path(self, project):
218+
"""
219+
Returns the root path for a project.
220+
221+
If the project is a subproject, it will return ``/projects/<project-slug>/``.
222+
If the project is a main project, it will return ``/``.
223+
This will respect the custom urlconf of the project if it's defined.
224+
"""
225+
custom_prefix = None
226+
if project.urlconf:
227+
custom_prefix = project.urlconf.split("$", 1)[0]
228+
229+
if project.is_subproject:
230+
prefix = custom_prefix or "projects"
231+
return join_url_path(prefix, project.slug, "/")
232+
233+
prefix = custom_prefix or "/"
234+
return join_url_path(prefix, "/")
235+
236+
def resolve_root(self, project, external_version_slug=None):
237+
"""
238+
Get the root URL from where the documentation of ``project`` is served from.
239+
240+
This doesn't include the version or language. For example:
241+
242+
- https://docs.example.com/projects/<project-slug>/
243+
- https://docs.readthedocs.io/
244+
"""
245+
canonical_project = self._get_canonical_project(project)
246+
use_custom_domain = self._use_cname(canonical_project)
247+
custom_domain = canonical_project.get_canonical_custom_domain()
248+
if external_version_slug:
249+
domain = self._get_external_subdomain(
250+
canonical_project, external_version_slug
251+
)
252+
use_https = settings.PUBLIC_DOMAIN_USES_HTTPS
253+
elif use_custom_domain and custom_domain:
254+
domain = custom_domain.domain
255+
use_https = custom_domain.https
256+
else:
257+
domain = self._get_project_subdomain(canonical_project)
258+
use_https = settings.PUBLIC_DOMAIN_USES_HTTPS
259+
260+
protocol = "https" if use_https else "http"
261+
path = self._get_root_path(project)
262+
return urlunparse((protocol, domain, path, "", "", ""))
263+
217264
def _get_canonical_project_data(self, project):
218265
"""
219266
Returns a tuple with (project, subproject_slug) from the canonical project of `project`.

readthedocs/core/utils/url.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""URL hadling utilities."""
2+
3+
4+
def join_url_path(base, *args):
5+
"""
6+
Joins a base URL path with one or more path components.
7+
8+
This does a simple join of the base path with the path components,
9+
inserting a slash between each component.
10+
It does not offer protection against directory traversal attacks.
11+
"""
12+
base = "/" + base.lstrip("/")
13+
for path in args:
14+
base = base.rstrip("/") + "/" + path.lstrip("/")
15+
return base

readthedocs/projects/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ class ProxitoURLConf:
753753

754754
return ProxitoURLConf
755755

756-
@property
756+
@cached_property
757757
def is_subproject(self):
758758
"""Return whether or not this project is a subproject."""
759759
return self.superprojects.exists()

readthedocs/proxito/tests/test_full.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -1395,11 +1395,13 @@ def test_cache_on_private_versions_custom_domain(self):
13951395
self.domain.save()
13961396
self._test_cache_control_header_project(expected_value='private', host=self.domain.domain)
13971397

1398-
# HTTPS redirect respects the privacy level of the version.
1399-
resp = self.client.get('/en/latest/', secure=False, HTTP_HOST=self.domain.domain)
1400-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/en/latest/')
1401-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
1402-
self.assertEqual(resp.headers['Cache-Tag'], 'project,project:latest')
1398+
# HTTPS redirects can always be cached.
1399+
resp = self.client.get(
1400+
"/en/latest/", secure=False, HTTP_HOST=self.domain.domain
1401+
)
1402+
self.assertEqual(resp["Location"], f"https://{self.domain.domain}/en/latest/")
1403+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1404+
self.assertEqual(resp.headers["Cache-Tag"], "project")
14031405

14041406
def test_cache_public_versions(self):
14051407
self.project.versions.update(privacy_level=PUBLIC)
@@ -1427,15 +1429,18 @@ def test_cache_on_private_versions_custom_domain_subproject(self):
14271429
self.domain.save()
14281430
self._test_cache_control_header_subproject(expected_value='private', host=self.domain.domain)
14291431

1430-
# HTTPS redirect respects the privacy level of the version.
1432+
# HTTPS redirects can always be cached.
14311433
resp = self.client.get(
14321434
'/projects/subproject/en/latest/',
14331435
secure=False,
14341436
HTTP_HOST=self.domain.domain,
14351437
)
1436-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/projects/subproject/en/latest/')
1437-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
1438-
self.assertEqual(resp.headers['Cache-Tag'], 'subproject,subproject:latest')
1438+
self.assertEqual(
1439+
resp["Location"],
1440+
f"https://{self.domain.domain}/projects/subproject/en/latest/",
1441+
)
1442+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1443+
self.assertEqual(resp.headers["Cache-Tag"], "project")
14391444

14401445
def test_cache_public_versions_subproject(self):
14411446
self.subproject.versions.update(privacy_level=PUBLIC)
@@ -1447,15 +1452,18 @@ def test_cache_public_versions_custom_domain(self):
14471452
self.domain.save()
14481453
self._test_cache_control_header_subproject(expected_value='public', host=self.domain.domain)
14491454

1450-
# HTTPS redirect respects the privacy level of the version.
1455+
# HTTPS redirects can always be cached.
14511456
resp = self.client.get(
14521457
'/projects/subproject/en/latest/',
14531458
secure=False,
14541459
HTTP_HOST=self.domain.domain,
14551460
)
1456-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/projects/subproject/en/latest/')
1457-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'public')
1458-
self.assertEqual(resp.headers['Cache-Tag'], 'subproject,subproject:latest')
1461+
self.assertEqual(
1462+
resp["Location"],
1463+
f"https://{self.domain.domain}/projects/subproject/en/latest/",
1464+
)
1465+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1466+
self.assertEqual(resp.headers["Cache-Tag"], "project")
14591467

14601468
def test_cache_disable_on_rtd_header_resolved_project(self):
14611469
get(

readthedocs/proxito/tests/test_middleware.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,12 @@ def test_subproject_redirect(self):
116116
)
117117
resp = self.client.get(self.url, HTTP_HOST="subproject.dev.readthedocs.io")
118118
self.assertEqual(resp.status_code, 302)
119-
self.assertEqual(resp["location"], f"http://pip.dev.readthedocs.io/")
120-
self.assertEqual(resp["X-RTD-Redirect"], RedirectType.to_canonical_domain.name)
119+
self.assertEqual(
120+
resp["location"], f"http://pip.dev.readthedocs.io/projects/subproject/"
121+
)
122+
self.assertEqual(
123+
resp["X-RTD-Redirect"], RedirectType.subproject_to_main_domain.name
124+
)
121125

122126
# We are not canonicalizing custom domains -> public domain for now
123127
@pytest.mark.xfail(strict=True)

readthedocs/proxito/tests/test_redirects.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ def test_subproject_redirect(self):
8383
r = self.client.get('/', HTTP_HOST='subproject.dev.readthedocs.io')
8484
self.assertEqual(r.status_code, 302)
8585
self.assertEqual(
86-
r['Location'], 'https://project.dev.readthedocs.io/projects/subproject/en/latest/',
86+
r["Location"],
87+
"https://project.dev.readthedocs.io/projects/subproject/",
8788
)
8889

8990
r = self.client.get('/en/latest/', HTTP_HOST='subproject.dev.readthedocs.io')

readthedocs/proxito/views/mixins.py

+22-21
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from readthedocs.analytics.utils import get_client_ip
2020
from readthedocs.audit.models import AuditLog
2121
from readthedocs.builds.constants import EXTERNAL, INTERNAL
22-
from readthedocs.core.resolver import resolve
22+
from readthedocs.core.resolver import resolve, resolver
23+
from readthedocs.core.utils.url import join_url_path
2324
from readthedocs.projects.constants import MEDIA_TYPE_HTML
2425
from readthedocs.proxito.constants import RedirectType
2526
from readthedocs.redirects.exceptions import InfiniteRedirectException
@@ -331,10 +332,8 @@ def canonical_redirect(
331332
self,
332333
request,
333334
final_project,
334-
version_slug,
335-
filename,
335+
external_version_slug,
336336
redirect_type,
337-
is_external_version=False,
338337
):
339338
"""
340339
Return a redirect to the canonical domain including scheme.
@@ -362,24 +361,22 @@ def canonical_redirect(
362361

363362
if redirect_type == RedirectType.http_to_https:
364363
to = parsed_from._replace(scheme="https").geturl()
365-
elif redirect_type in [
366-
RedirectType.to_canonical_domain,
367-
RedirectType.subproject_to_main_domain,
368-
]:
369-
to = resolve(
364+
elif redirect_type == RedirectType.to_canonical_domain:
365+
canonical_domain = final_project.get_canonical_custom_domain()
366+
protocol = "https" if canonical_domain.https else "http"
367+
to = parsed_from._replace(
368+
scheme=protocol, netloc=canonical_domain.domain
369+
).geturl()
370+
elif redirect_type == RedirectType.subproject_to_main_domain:
371+
project_doc_root = resolver.resolve_root(
370372
project=final_project,
371-
version_slug=version_slug,
372-
filename=filename,
373-
query_params=parsed_from.query,
374-
external=is_external_version,
373+
external_version_slug=external_version_slug,
375374
)
376-
# When a canonical redirect is done, only change the domain.
377-
if redirect_type == RedirectType.to_canonical_domain:
378-
parsed_to = urlparse(to)
379-
to = parsed_from._replace(
380-
scheme=parsed_to.scheme,
381-
netloc=parsed_to.netloc,
382-
).geturl()
375+
parsed_doc_root = urlparse(project_doc_root)
376+
to = parsed_doc_root._replace(
377+
path=join_url_path(parsed_doc_root.path, parsed_from.path),
378+
query=parsed_from.query,
379+
).geturl()
383380
else:
384381
raise NotImplementedError
385382

@@ -391,7 +388,11 @@ def canonical_redirect(
391388
)
392389
raise InfiniteRedirectException()
393390

394-
log.info('Canonical Redirect.', host=request.get_host(), from_url=filename, to_url=to)
391+
log.info(
392+
"Canonical Redirect.", host=request.get_host(), from_url=from_url, to_url=to
393+
)
394+
# Canocanical redirects can be cached, since the final URL will check for authz.
395+
self.cache_request = True
395396
resp = HttpResponseRedirect(to)
396397
resp["X-RTD-Redirect"] = redirect_type.name
397398
return resp

readthedocs/proxito/views/serve.py

+28-28
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,23 @@ def get(self,
8989
``subproject_slash`` is used to determine if the subproject URL has a slash,
9090
so that we can decide if we need to serve docs or add a /.
9191
"""
92+
unresolved_domain = request.unresolved_domain
93+
# Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain)
94+
redirect_type = self._get_canonical_redirect_type(request)
95+
if redirect_type:
96+
# Attach the current project to the request.
97+
request.path_project_slug = unresolved_domain.project.slug
98+
try:
99+
return self.canonical_redirect(
100+
request=request,
101+
final_project=unresolved_domain.project,
102+
external_version_slug=unresolved_domain.external_version_slug,
103+
redirect_type=redirect_type,
104+
)
105+
except InfiniteRedirectException:
106+
# Don't redirect in this case, since it would break things
107+
pass
108+
92109
version_slug = self.get_version_from_host(request, version_slug)
93110
final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa
94111
request,
@@ -100,7 +117,7 @@ def get(self,
100117
)
101118
version = final_project.versions.filter(slug=version_slug).first()
102119

103-
is_external = request.unresolved_domain.is_from_external_domain
120+
is_external = unresolved_domain.is_from_external_domain
104121

105122
log.bind(
106123
project_slug=final_project.slug,
@@ -137,26 +154,6 @@ def get(self,
137154
if spam_response:
138155
return spam_response
139156

140-
# Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain)
141-
redirect_type = self._get_canonical_redirect_type(request)
142-
if redirect_type:
143-
# A canonical redirect can be cached, if we don't have information
144-
# about the version, since the final URL will check for authz.
145-
if not version and self._is_cache_enabled(final_project):
146-
self.cache_request = True
147-
try:
148-
return self.canonical_redirect(
149-
request=request,
150-
final_project=final_project,
151-
version_slug=version_slug,
152-
filename=filename,
153-
redirect_type=redirect_type,
154-
is_external_version=is_external,
155-
)
156-
except InfiniteRedirectException:
157-
# Don't redirect in this case, since it would break things
158-
pass
159-
160157
# Handle a / redirect when we aren't a single version
161158
if all([
162159
lang_slug is None,
@@ -252,6 +249,16 @@ def _get_canonical_redirect_type(self, request):
252249
log.debug("Proxito CNAME HTTPS Redirect.", domain=domain.domain)
253250
return RedirectType.http_to_https
254251

252+
# Check for subprojects before checking for canonical domains,
253+
# so we can redirect to the main domain first.
254+
# Custom domains on subprojects are not supported.
255+
if project.is_subproject:
256+
log.debug(
257+
"Proxito Public Domain -> Subproject Main Domain Redirect.",
258+
project_slug=project.slug,
259+
)
260+
return RedirectType.subproject_to_main_domain
261+
255262
if unresolved_domain.is_from_public_domain:
256263
canonical_domain = (
257264
Domain.objects.filter(project=project)
@@ -265,13 +272,6 @@ def _get_canonical_redirect_type(self, request):
265272
)
266273
return RedirectType.to_canonical_domain
267274

268-
if project.is_subproject:
269-
log.debug(
270-
"Proxito Public Domain -> Subproject Main Domain Redirect.",
271-
project_slug=project.slug,
272-
)
273-
return RedirectType.subproject_to_main_domain
274-
275275
return None
276276

277277

0 commit comments

Comments
 (0)