Skip to content

Commit 00ec8fb

Browse files
committed
Proxito: refactor canocanical redirects
Closes #10065
1 parent d641add commit 00ec8fb

File tree

8 files changed

+149
-70
lines changed

8 files changed

+149
-70
lines changed

readthedocs/core/resolver.py

+53-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""URL resolver for documentation."""
2-
32
from urllib.parse import urlunparse
43

54
import structlog
65
from django.conf import settings
76

87
from readthedocs.builds.constants import EXTERNAL
98
from readthedocs.core.utils.extend import SettingsOverrideObject
9+
from readthedocs.core.utils.url import join_url_path
1010

1111
log = structlog.get_logger(__name__)
1212

@@ -214,6 +214,58 @@ def resolve(
214214
)
215215
return urlunparse((protocol, domain, path, '', query_params, ''))
216216

217+
def _get_root_path(self, project):
218+
"""
219+
Returns the root path for a project.
220+
221+
If the project is a subproject, it will return ``/projects/<project-slug>/``.
222+
If the project is a main project, it will return ``/``.
223+
This will respect the custom urlconf of the project if it's defined.
224+
"""
225+
custom_prefix = None
226+
if project.urlconf:
227+
custom_prefix = project.urlconf.split("$", 1)[0]
228+
229+
if project.is_subproject:
230+
prefix = custom_prefix or "projects"
231+
return join_url_path(prefix, project.slug, "/")
232+
233+
prefix = custom_prefix or "/"
234+
return join_url_path(prefix, "/")
235+
236+
def resolve_root(self, project, external_version_slug=None):
237+
"""
238+
Get the root URL from where the documentation of ``project`` is served from.
239+
240+
This doesn't include the version or language. For example:
241+
242+
- https://docs.example.com/projects/<project-slug>/
243+
- https://docs.readthedocs.io/
244+
245+
This will respect the custom urlconf of the project if it's defined.
246+
247+
:param project: Project object to get the root URL from
248+
:param external_version_slug: If given, resolve using the external version domain.
249+
"""
250+
canonical_project = self._get_canonical_project(project)
251+
use_custom_domain = self._use_cname(canonical_project)
252+
custom_domain = canonical_project.get_canonical_custom_domain()
253+
if external_version_slug:
254+
domain = self._get_external_subdomain(
255+
canonical_project, external_version_slug
256+
)
257+
use_https = settings.PUBLIC_DOMAIN_USES_HTTPS
258+
elif use_custom_domain and custom_domain:
259+
domain = custom_domain.domain
260+
use_https = custom_domain.https
261+
else:
262+
domain = self._get_project_subdomain(canonical_project)
263+
use_https = settings.PUBLIC_DOMAIN_USES_HTTPS
264+
265+
protocol = "https" if use_https else "http"
266+
path = self._get_root_path(project)
267+
return urlunparse((protocol, domain, path, "", "", ""))
268+
217269
def _get_canonical_project_data(self, project):
218270
"""
219271
Returns a tuple with (project, subproject_slug) from the canonical project of `project`.

readthedocs/core/utils/url.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""URL hadling utilities."""
2+
3+
4+
def join_url_path(base, *args):
5+
"""
6+
Joins a base URL path with one or more path components.
7+
8+
This does a simple join of the base path with the path components,
9+
inserting a slash between each component.
10+
It does not offer protection against directory traversal attacks.
11+
"""
12+
base = "/" + base.lstrip("/")
13+
for path in args:
14+
base = base.rstrip("/") + "/" + path.lstrip("/")
15+
return base

readthedocs/projects/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ class ProxitoURLConf:
753753

754754
return ProxitoURLConf
755755

756-
@property
756+
@cached_property
757757
def is_subproject(self):
758758
"""Return whether or not this project is a subproject."""
759759
return self.superprojects.exists()

readthedocs/proxito/tests/test_full.py

+21-13
Original file line numberDiff line numberDiff line change
@@ -1395,11 +1395,13 @@ def test_cache_on_private_versions_custom_domain(self):
13951395
self.domain.save()
13961396
self._test_cache_control_header_project(expected_value='private', host=self.domain.domain)
13971397

1398-
# HTTPS redirect respects the privacy level of the version.
1399-
resp = self.client.get('/en/latest/', secure=False, HTTP_HOST=self.domain.domain)
1400-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/en/latest/')
1401-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
1402-
self.assertEqual(resp.headers['Cache-Tag'], 'project,project:latest')
1398+
# HTTPS redirects can always be cached.
1399+
resp = self.client.get(
1400+
"/en/latest/", secure=False, HTTP_HOST=self.domain.domain
1401+
)
1402+
self.assertEqual(resp["Location"], f"https://{self.domain.domain}/en/latest/")
1403+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1404+
self.assertEqual(resp.headers["Cache-Tag"], "project")
14031405

14041406
def test_cache_public_versions(self):
14051407
self.project.versions.update(privacy_level=PUBLIC)
@@ -1427,15 +1429,18 @@ def test_cache_on_private_versions_custom_domain_subproject(self):
14271429
self.domain.save()
14281430
self._test_cache_control_header_subproject(expected_value='private', host=self.domain.domain)
14291431

1430-
# HTTPS redirect respects the privacy level of the version.
1432+
# HTTPS redirects can always be cached.
14311433
resp = self.client.get(
14321434
'/projects/subproject/en/latest/',
14331435
secure=False,
14341436
HTTP_HOST=self.domain.domain,
14351437
)
1436-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/projects/subproject/en/latest/')
1437-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
1438-
self.assertEqual(resp.headers['Cache-Tag'], 'subproject,subproject:latest')
1438+
self.assertEqual(
1439+
resp["Location"],
1440+
f"https://{self.domain.domain}/projects/subproject/en/latest/",
1441+
)
1442+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1443+
self.assertEqual(resp.headers["Cache-Tag"], "project")
14391444

14401445
def test_cache_public_versions_subproject(self):
14411446
self.subproject.versions.update(privacy_level=PUBLIC)
@@ -1447,15 +1452,18 @@ def test_cache_public_versions_custom_domain(self):
14471452
self.domain.save()
14481453
self._test_cache_control_header_subproject(expected_value='public', host=self.domain.domain)
14491454

1450-
# HTTPS redirect respects the privacy level of the version.
1455+
# HTTPS redirects can always be cached.
14511456
resp = self.client.get(
14521457
'/projects/subproject/en/latest/',
14531458
secure=False,
14541459
HTTP_HOST=self.domain.domain,
14551460
)
1456-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/projects/subproject/en/latest/')
1457-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'public')
1458-
self.assertEqual(resp.headers['Cache-Tag'], 'subproject,subproject:latest')
1461+
self.assertEqual(
1462+
resp["Location"],
1463+
f"https://{self.domain.domain}/projects/subproject/en/latest/",
1464+
)
1465+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1466+
self.assertEqual(resp.headers["Cache-Tag"], "project")
14591467

14601468
def test_cache_disable_on_rtd_header_resolved_project(self):
14611469
get(

readthedocs/proxito/tests/test_middleware.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,12 @@ def test_subproject_redirect(self):
116116
)
117117
resp = self.client.get(self.url, HTTP_HOST="subproject.dev.readthedocs.io")
118118
self.assertEqual(resp.status_code, 302)
119-
self.assertEqual(resp["location"], f"http://pip.dev.readthedocs.io/")
120-
self.assertEqual(resp["X-RTD-Redirect"], RedirectType.to_canonical_domain.name)
119+
self.assertEqual(
120+
resp["location"], f"http://pip.dev.readthedocs.io/projects/subproject/"
121+
)
122+
self.assertEqual(
123+
resp["X-RTD-Redirect"], RedirectType.subproject_to_main_domain.name
124+
)
121125

122126
# We are not canonicalizing custom domains -> public domain for now
123127
@pytest.mark.xfail(strict=True)

readthedocs/proxito/tests/test_redirects.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ def test_subproject_redirect(self):
8383
r = self.client.get('/', HTTP_HOST='subproject.dev.readthedocs.io')
8484
self.assertEqual(r.status_code, 302)
8585
self.assertEqual(
86-
r['Location'], 'https://project.dev.readthedocs.io/projects/subproject/en/latest/',
86+
r["Location"],
87+
"https://project.dev.readthedocs.io/projects/subproject/",
8788
)
8889

8990
r = self.client.get('/en/latest/', HTTP_HOST='subproject.dev.readthedocs.io')

readthedocs/proxito/views/mixins.py

+23-24
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
from readthedocs.analytics.utils import get_client_ip
2020
from readthedocs.audit.models import AuditLog
2121
from readthedocs.builds.constants import EXTERNAL, INTERNAL
22-
from readthedocs.core.resolver import resolve
22+
from readthedocs.core.resolver import resolve, resolver
23+
from readthedocs.core.utils.url import join_url_path
2324
from readthedocs.projects.constants import MEDIA_TYPE_HTML
2425
from readthedocs.proxito.constants import RedirectType
2526
from readthedocs.redirects.exceptions import InfiniteRedirectException
@@ -331,10 +332,8 @@ def canonical_redirect(
331332
self,
332333
request,
333334
final_project,
334-
version_slug,
335-
filename,
336335
redirect_type,
337-
is_external_version=False,
336+
external_version_slug=None,
338337
):
339338
"""
340339
Return a redirect to the canonical domain including scheme.
@@ -352,34 +351,30 @@ def canonical_redirect(
352351
353352
:param request: Request object.
354353
:param final_project: The current project being served.
355-
:param version_slug: The current version slug being served.
356-
:param filename: The filename being served.
357354
:param redirect_type: The type of canonical redirect (https, canonical-cname, subproject-main-domain)
358-
:param external: If the version is from a pull request preview.
355+
:param external_version_slug: The version slug if the request is from a pull request preview.
359356
"""
360357
from_url = request.build_absolute_uri()
361358
parsed_from = urlparse(from_url)
362359

363360
if redirect_type == RedirectType.http_to_https:
364361
to = parsed_from._replace(scheme="https").geturl()
365-
elif redirect_type in [
366-
RedirectType.to_canonical_domain,
367-
RedirectType.subproject_to_main_domain,
368-
]:
369-
to = resolve(
362+
elif redirect_type == RedirectType.to_canonical_domain:
363+
canonical_domain = final_project.get_canonical_custom_domain()
364+
protocol = "https" if canonical_domain.https else "http"
365+
to = parsed_from._replace(
366+
scheme=protocol, netloc=canonical_domain.domain
367+
).geturl()
368+
elif redirect_type == RedirectType.subproject_to_main_domain:
369+
project_doc_root = resolver.resolve_root(
370370
project=final_project,
371-
version_slug=version_slug,
372-
filename=filename,
373-
query_params=parsed_from.query,
374-
external=is_external_version,
371+
external_version_slug=external_version_slug,
375372
)
376-
# When a canonical redirect is done, only change the domain.
377-
if redirect_type == RedirectType.to_canonical_domain:
378-
parsed_to = urlparse(to)
379-
to = parsed_from._replace(
380-
scheme=parsed_to.scheme,
381-
netloc=parsed_to.netloc,
382-
).geturl()
373+
parsed_doc_root = urlparse(project_doc_root)
374+
to = parsed_doc_root._replace(
375+
path=join_url_path(parsed_doc_root.path, parsed_from.path),
376+
query=parsed_from.query,
377+
).geturl()
383378
else:
384379
raise NotImplementedError
385380

@@ -391,7 +386,11 @@ def canonical_redirect(
391386
)
392387
raise InfiniteRedirectException()
393388

394-
log.info('Canonical Redirect.', host=request.get_host(), from_url=filename, to_url=to)
389+
log.info(
390+
"Canonical Redirect.", host=request.get_host(), from_url=from_url, to_url=to
391+
)
392+
# Canocanical redirects can be cached, since the final URL will check for authz.
393+
self.cache_request = True
395394
resp = HttpResponseRedirect(to)
396395
resp["X-RTD-Redirect"] = redirect_type.name
397396
return resp

readthedocs/proxito/views/serve.py

+28-28
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,23 @@ def get(self,
8989
``subproject_slash`` is used to determine if the subproject URL has a slash,
9090
so that we can decide if we need to serve docs or add a /.
9191
"""
92+
unresolved_domain = request.unresolved_domain
93+
# Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain).
94+
redirect_type = self._get_canonical_redirect_type(request)
95+
if redirect_type:
96+
# Attach the current project to the request.
97+
request.path_project_slug = unresolved_domain.project.slug
98+
try:
99+
return self.canonical_redirect(
100+
request=request,
101+
final_project=unresolved_domain.project,
102+
external_version_slug=unresolved_domain.external_version_slug,
103+
redirect_type=redirect_type,
104+
)
105+
except InfiniteRedirectException:
106+
# Don't redirect in this case, since it would break things.
107+
pass
108+
92109
version_slug = self.get_version_from_host(request, version_slug)
93110
final_project, lang_slug, version_slug, filename = _get_project_data_from_request( # noqa
94111
request,
@@ -100,7 +117,7 @@ def get(self,
100117
)
101118
version = final_project.versions.filter(slug=version_slug).first()
102119

103-
is_external = request.unresolved_domain.is_from_external_domain
120+
is_external = unresolved_domain.is_from_external_domain
104121

105122
log.bind(
106123
project_slug=final_project.slug,
@@ -137,26 +154,6 @@ def get(self,
137154
if spam_response:
138155
return spam_response
139156

140-
# Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain)
141-
redirect_type = self._get_canonical_redirect_type(request)
142-
if redirect_type:
143-
# A canonical redirect can be cached, if we don't have information
144-
# about the version, since the final URL will check for authz.
145-
if not version and self._is_cache_enabled(final_project):
146-
self.cache_request = True
147-
try:
148-
return self.canonical_redirect(
149-
request=request,
150-
final_project=final_project,
151-
version_slug=version_slug,
152-
filename=filename,
153-
redirect_type=redirect_type,
154-
is_external_version=is_external,
155-
)
156-
except InfiniteRedirectException:
157-
# Don't redirect in this case, since it would break things
158-
pass
159-
160157
# Handle a / redirect when we aren't a single version
161158
if all([
162159
lang_slug is None,
@@ -252,6 +249,16 @@ def _get_canonical_redirect_type(self, request):
252249
log.debug("Proxito CNAME HTTPS Redirect.", domain=domain.domain)
253250
return RedirectType.http_to_https
254251

252+
# Check for subprojects before checking for canonical domains,
253+
# so we can redirect to the main domain first.
254+
# Custom domains on subprojects are not supported.
255+
if project.is_subproject:
256+
log.debug(
257+
"Proxito Public Domain -> Subproject Main Domain Redirect.",
258+
project_slug=project.slug,
259+
)
260+
return RedirectType.subproject_to_main_domain
261+
255262
if unresolved_domain.is_from_public_domain:
256263
canonical_domain = (
257264
Domain.objects.filter(project=project)
@@ -265,13 +272,6 @@ def _get_canonical_redirect_type(self, request):
265272
)
266273
return RedirectType.to_canonical_domain
267274

268-
if project.is_subproject:
269-
log.debug(
270-
"Proxito Public Domain -> Subproject Main Domain Redirect.",
271-
project_slug=project.slug,
272-
)
273-
return RedirectType.subproject_to_main_domain
274-
275275
return None
276276

277277

0 commit comments

Comments
 (0)