Skip to content

Commit 9d07164

Browse files
authored
Proxito: simplify caching logic (#10067)
Caching is enabled for all projects now. We always check for the privacy level to explicitly cache or not a response, if the response isn't explicitly cached or not, we use the default, this default depends if we are on .org or .com (we cache everything on .org, and on .com we cache responses attached to public versions). Closes #10031
1 parent 337f26f commit 9d07164

File tree

12 files changed

+123
-82
lines changed

12 files changed

+123
-82
lines changed

readthedocs/analytics/proxied_api.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88

99
from readthedocs.analytics.models import PageView
1010
from readthedocs.api.v2.permissions import IsAuthorizedToViewVersion
11+
from readthedocs.core.mixins import CDNCacheControlMixin
1112
from readthedocs.core.unresolver import UnresolverError, unresolve
1213
from readthedocs.core.utils.extend import SettingsOverrideObject
1314
from readthedocs.projects.models import Project
1415

1516

16-
class BaseAnalyticsView(APIView):
17+
class BaseAnalyticsView(CDNCacheControlMixin, APIView):
1718

1819
"""
1920
Track page views.
@@ -25,6 +26,9 @@ class BaseAnalyticsView(APIView):
2526
- absolute_uri: Full path with domain.
2627
"""
2728

29+
# We always want to hit our analytics endpoint,
30+
# so we capture all views/interactions.
31+
cache_response = False
2832
http_method_names = ['get']
2933
permission_classes = [IsAuthorizedToViewVersion]
3034

readthedocs/analytics/tests.py

+5
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ def test_invalid_uri(self):
128128
self.client.get(url, HTTP_HOST=self.host)
129129
assert PageView.objects.all().count() == 0
130130

131+
def test_cache_headers(self):
132+
resp = self.client.get(self.url, HTTP_HOST=self.host)
133+
self.assertEqual(resp.status_code, 200)
134+
self.assertEqual(resp["CDN-Cache-Control"], "private")
135+
131136
def test_increase_page_view_count(self):
132137
assert (
133138
PageView.objects.all().count() == 0

readthedocs/builds/models.py

+10
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,16 @@ def is_private(self):
217217
return self.project.external_builds_privacy_level == PRIVATE
218218
return self.privacy_level == PRIVATE
219219

220+
@property
221+
def is_public(self):
222+
"""
223+
Check if the version is public (taking external versions into consideration).
224+
225+
This is basically ``is_private`` negated,
226+
``is_private`` understands both normal and external versions
227+
"""
228+
return not self.is_private
229+
220230
@property
221231
def is_external(self):
222232
return self.type == EXTERNAL

readthedocs/core/mixins.py

+15-25
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,7 @@
11
"""Common mixin classes for views."""
2-
from functools import lru_cache
3-
4-
from django.conf import settings
52
from django.contrib.auth.mixins import LoginRequiredMixin
63
from vanilla import ListView
74

8-
from readthedocs.projects.models import Feature
9-
from readthedocs.subscriptions.models import PlanFeature
10-
115

126
class ListViewWithForm(ListView):
137

@@ -34,41 +28,37 @@ class ProxiedAPIMixin:
3428
class CDNCacheControlMixin:
3529

3630
"""
37-
Allow to cache views at the CDN level when privacy levels are enabled.
31+
Explicitly cache or not a view at the CDN level.
3832
39-
The cache control header is only used when privacy levels
40-
are enabled (otherwise everything is public by default).
33+
The cache control header is used to mark the response as public/private,
34+
so it can be cached or not.
4135
4236
Views that can be cached should always return the same response for all
4337
users (anonymous and authenticated users), like when the version attached
4438
to the request is public.
4539
46-
To cache a view you can either set the `cache_request` attribute to `True`,
47-
or override the `can_be_cached` method.
40+
To explicitly cache a view you can either set the `cache_response`
41+
attribute to `True`/`False`, or override the `can_be_cached` method
42+
(which defaults to return the `cache_response` attribute).
43+
If set to `None` (default), the cache header won't be set, so the default
44+
value can be set by our middleware (public for .org and private for .com).
4845
4946
We use ``CDN-Cache-Control``, to control caching at the CDN level only.
5047
This doesn't affect caching at the browser level (``Cache-Control``).
5148
5249
See https://developers.cloudflare.com/cache/about/cdn-cache-control.
5350
"""
5451

55-
cache_request = False
52+
cache_response = None
5653

5754
def dispatch(self, request, *args, **kwargs):
5855
response = super().dispatch(request, *args, **kwargs)
59-
if settings.ALLOW_PRIVATE_REPOS and self.can_be_cached(request):
60-
response.headers['CDN-Cache-Control'] = 'public'
56+
cache_response = self.can_be_cached(request)
57+
if cache_response is not None:
58+
response.headers["CDN-Cache-Control"] = (
59+
"public" if cache_response else "private"
60+
)
6161
return response
6262

6363
def can_be_cached(self, request):
64-
return self.cache_request
65-
66-
@lru_cache(maxsize=1)
67-
def _is_cache_enabled(self, project):
68-
"""Helper function to check if CDN is enabled for a project."""
69-
plan_has_cdn = PlanFeature.objects.get_feature(
70-
obj=project, type=PlanFeature.TYPE_CDN
71-
)
72-
return settings.ALLOW_PRIVATE_REPOS and (
73-
plan_has_cdn or project.has_feature(Feature.CDN_ENABLED)
74-
)
64+
return self.cache_response

readthedocs/core/views/__init__.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from django.shortcuts import render
1212
from django.views.generic import TemplateView, View
1313

14-
from readthedocs.core.mixins import PrivateViewMixin
14+
from readthedocs.core.mixins import CDNCacheControlMixin, PrivateViewMixin
1515
from readthedocs.projects.models import Project
1616

1717
log = structlog.get_logger(__name__)
@@ -21,7 +21,12 @@ class NoProjectException(Exception):
2121
pass
2222

2323

24-
class HealthCheckView(View):
24+
class HealthCheckView(CDNCacheControlMixin, View):
25+
# Never cache this view, we always want to get the live response from the server.
26+
# In production we should configure the health check to hit the LB directly,
27+
# but it's useful to be careful here in case of a misconfiguration.
28+
cache_response = False
29+
2530
def get(self, request, *args, **kwargs):
2631
return JsonResponse({'status': 200}, status=200)
2732

readthedocs/projects/views/public.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
)
2828
from readthedocs.builds.models import Version
2929
from readthedocs.builds.views import BuildTriggerMixin
30+
from readthedocs.core.mixins import CDNCacheControlMixin
3031
from readthedocs.core.permissions import AdminPermission
3132
from readthedocs.core.utils.extend import SettingsOverrideObject
3233
from readthedocs.projects.filters import ProjectVersionListFilterSet
@@ -307,7 +308,7 @@ def project_downloads(request, project_slug):
307308
)
308309

309310

310-
class ProjectDownloadMediaBase(ServeDocsMixin, View):
311+
class ProjectDownloadMediaBase(CDNCacheControlMixin, ServeDocsMixin, View):
311312

312313
# Use new-style URLs (same domain as docs) or old-style URLs (dashboard URL)
313314
same_domain_url = False
@@ -361,6 +362,9 @@ def get(
361362
slug=version_slug,
362363
)
363364

365+
# All public versions can be cached.
366+
self.cache_response = version.is_public
367+
364368
else:
365369
# All the arguments come from the URL.
366370
version = get_object_or_404(

readthedocs/proxito/middleware.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -156,27 +156,32 @@ def add_cache_headers(self, request, response):
156156
"""
157157
Add Cache-Control headers.
158158
159-
If privacy levels are enabled and the header isn't already present,
160-
set the cache level to private.
161-
Or if the request was from the `X-RTD-Slug` header,
162-
we don't cache the response, since we could be caching a response in another domain.
159+
If the `CDN-Cache-Control` header isn't already present, set the cache
160+
level to public or private, depending if we allow private repos or not.
161+
Or if the request was from the `X-RTD-Slug` header, we don't cache the
162+
response, since we could be caching a response in another domain.
163163
164164
We use ``CDN-Cache-Control``, to control caching at the CDN level only.
165165
This doesn't affect caching at the browser level (``Cache-Control``).
166166
167167
See https://developers.cloudflare.com/cache/about/cdn-cache-control.
168168
"""
169-
header = "CDN-Cache-Control"
169+
cdn_cache_header = "CDN-Cache-Control"
170170
unresolved_domain = request.unresolved_domain
171171
# Never trust projects resolving from the X-RTD-Slug header,
172172
# we don't want to cache their content on domains from other
173173
# projects, see GHSA-mp38-vprc-7hf5.
174174
if unresolved_domain and unresolved_domain.is_from_http_header:
175-
response.headers[header] = "private"
176-
177-
if settings.ALLOW_PRIVATE_REPOS:
178-
# Set the key to private only if it hasn't already been set by the view.
179-
response.headers.setdefault(header, "private")
175+
response.headers[cdn_cache_header] = "private"
176+
# SECURITY: Return early, we never want to cache this response.
177+
return
178+
179+
# Set the key only if it hasn't already been set by the view.
180+
if cdn_cache_header not in response.headers:
181+
default_cache_level = (
182+
"private" if settings.ALLOW_PRIVATE_REPOS else "public"
183+
)
184+
response.headers[cdn_cache_header] = default_cache_level
180185

181186
def _set_request_attributes(self, request, unresolved_domain):
182187
"""

readthedocs/proxito/tests/base.py

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from django.core.files.storage import get_storage_class
88
from django.test import TestCase
99

10+
from readthedocs.builds.constants import LATEST
1011
from readthedocs.projects.constants import PUBLIC
1112
from readthedocs.projects.models import Domain, Project
1213
from readthedocs.proxito.views import serve
@@ -33,6 +34,7 @@ def setUp(self):
3334
main_language_project=None,
3435
)
3536
self.project.versions.update(privacy_level=PUBLIC)
37+
self.version = self.project.versions.get(slug=LATEST)
3638

3739
self.subproject = fixture.get(
3840
Project,

readthedocs/proxito/tests/test_full.py

+35-11
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def test_health_check(self):
5858
host = '127.0.0.1'
5959
resp = self.client.get(url, HTTP_HOST=host)
6060
self.assertEqual(resp.status_code, 200)
61-
self.assertEqual(resp.json(), {'status': 200})
61+
self.assertEqual(resp.json(), {"status": 200})
62+
self.assertEqual(resp["CDN-Cache-Control"], "private")
6263

6364
def test_subproject_serving(self):
6465
url = '/projects/subproject/en/latest/awesome.html'
@@ -397,7 +398,24 @@ def test_project_nginx_serving_unicode_filename(self):
397398
)
398399

399400
@override_settings(PYTHON_MEDIA=False)
400-
def test_download_files(self):
401+
def test_download_files_public_version(self):
402+
for type_ in DOWNLOADABLE_MEDIA_TYPES:
403+
resp = self.client.get(
404+
f"/_/downloads/en/latest/{type_}/",
405+
HTTP_HOST="project.dev.readthedocs.io",
406+
)
407+
self.assertEqual(resp.status_code, 200)
408+
extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_
409+
self.assertEqual(
410+
resp["X-Accel-Redirect"],
411+
f"/proxito/media/{type_}/project/latest/project.{extension}",
412+
)
413+
self.assertEqual(resp["CDN-Cache-Control"], "public")
414+
415+
@override_settings(PYTHON_MEDIA=False, ALLOW_PRIVATE_REPOS=True)
416+
def test_download_files_private_version(self):
417+
self.version.privacy_level = PRIVATE
418+
self.version.save()
401419
for type_ in DOWNLOADABLE_MEDIA_TYPES:
402420
resp = self.client.get(
403421
f"/_/downloads/en/latest/{type_}/",
@@ -409,6 +427,7 @@ def test_download_files(self):
409427
resp["X-Accel-Redirect"],
410428
f"/proxito/media/{type_}/project/latest/project.{extension}",
411429
)
430+
self.assertEqual(resp["CDN-Cache-Control"], "private")
412431

413432
@override_settings(PYTHON_MEDIA=False)
414433
def test_invalid_download_files(self):
@@ -1460,11 +1479,13 @@ def test_cache_on_private_versions_custom_domain(self):
14601479
self.domain.save()
14611480
self._test_cache_control_header_project(expected_value='private', host=self.domain.domain)
14621481

1463-
# HTTPS redirect respects the privacy level of the version.
1464-
resp = self.client.get('/en/latest/', secure=False, HTTP_HOST=self.domain.domain)
1465-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/en/latest/')
1466-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
1467-
self.assertEqual(resp.headers['Cache-Tag'], 'project,project:latest')
1482+
# HTTPS redirect can always be cached.
1483+
resp = self.client.get(
1484+
"/en/latest/", secure=False, HTTP_HOST=self.domain.domain
1485+
)
1486+
self.assertEqual(resp["Location"], f"https://{self.domain.domain}/en/latest/")
1487+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1488+
self.assertEqual(resp.headers["Cache-Tag"], "project,project:latest")
14681489

14691490
def test_cache_public_versions(self):
14701491
self.project.versions.update(privacy_level=PUBLIC)
@@ -1492,15 +1513,18 @@ def test_cache_on_private_versions_custom_domain_subproject(self):
14921513
self.domain.save()
14931514
self._test_cache_control_header_subproject(expected_value='private', host=self.domain.domain)
14941515

1495-
# HTTPS redirect respects the privacy level of the version.
1516+
# HTTPS redirect can always be cached.
14961517
resp = self.client.get(
14971518
'/projects/subproject/en/latest/',
14981519
secure=False,
14991520
HTTP_HOST=self.domain.domain,
15001521
)
1501-
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/projects/subproject/en/latest/')
1502-
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
1503-
self.assertEqual(resp.headers['Cache-Tag'], 'subproject,subproject:latest')
1522+
self.assertEqual(
1523+
resp["Location"],
1524+
f"https://{self.domain.domain}/projects/subproject/en/latest/",
1525+
)
1526+
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
1527+
self.assertEqual(resp.headers["Cache-Tag"], "subproject,subproject:latest")
15041528

15051529
def test_cache_public_versions_subproject(self):
15061530
self.subproject.versions.update(privacy_level=PUBLIC)

readthedocs/proxito/tests/test_headers.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,13 @@ def test_user_domain_headers(self):
133133
self.assertEqual(r[http_header_secure], http_header_value)
134134

135135
@override_settings(ALLOW_PRIVATE_REPOS=False)
136-
def test_cache_headers_public_projects(self):
136+
def test_cache_headers_public_version_with_private_projects_not_allowed(self):
137137
r = self.client.get('/en/latest/', HTTP_HOST='project.dev.readthedocs.io')
138138
self.assertEqual(r.status_code, 200)
139-
self.assertNotIn('CDN-Cache-Control', r)
139+
self.assertEqual(r["CDN-Cache-Control"], "public")
140140

141141
@override_settings(ALLOW_PRIVATE_REPOS=True)
142-
def test_cache_headers_private_projects(self):
142+
def test_cache_headers_public_version_with_private_projects_allowed(self):
143143
r = self.client.get('/en/latest/', HTTP_HOST='project.dev.readthedocs.io')
144144
self.assertEqual(r.status_code, 200)
145-
self.assertEqual(r['CDN-Cache-Control'], 'private')
145+
self.assertEqual(r["CDN-Cache-Control"], "public")

readthedocs/proxito/views/mixins.py

+4
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,8 @@ def system_redirect(
329329
log.debug(
330330
"System Redirect.", host=request.get_host(), from_url=filename, to_url=to
331331
)
332+
# All system redirects can be cached, since the final URL will check for authz.
333+
self.cache_response = True
332334
resp = HttpResponseRedirect(to)
333335
resp["X-RTD-Redirect"] = RedirectType.system.name
334336
return resp
@@ -398,6 +400,8 @@ def canonical_redirect(
398400
raise InfiniteRedirectException()
399401

400402
log.info('Canonical Redirect.', host=request.get_host(), from_url=filename, to_url=to)
403+
# All canonical redirects can be cached, since the final URL will check for authz.
404+
self.cache_response = True
401405
resp = HttpResponseRedirect(to)
402406
resp["X-RTD-Redirect"] = redirect_type.name
403407
return resp

0 commit comments

Comments
 (0)