Skip to content

Proxito: simplify caching logic #10067

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion readthedocs/analytics/proxied_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

from readthedocs.analytics.models import PageView
from readthedocs.api.v2.permissions import IsAuthorizedToViewVersion
from readthedocs.core.mixins import CDNCacheControlMixin
from readthedocs.core.unresolver import UnresolverError, unresolve
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.projects.models import Project


class BaseAnalyticsView(APIView):
class BaseAnalyticsView(CDNCacheControlMixin, APIView):

"""
Track page views.
Expand All @@ -25,6 +26,8 @@ class BaseAnalyticsView(APIView):
- absolute_uri: Full path with domain.
"""

# Never cache this view, we always want to hit this endpoint.
cache_response = False
http_method_names = ['get']
permission_classes = [IsAuthorizedToViewVersion]

Expand Down
5 changes: 5 additions & 0 deletions readthedocs/analytics/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ def test_invalid_uri(self):
self.client.get(url, HTTP_HOST=self.host)
assert PageView.objects.all().count() == 0

def test_cache_headers(self):
resp = self.client.get(self.url, HTTP_HOST=self.host)
self.assertEqual(resp.status_code, 200)
self.assertEqual(resp["CDN-Cache-Control"], "private")

def test_increase_page_view_count(self):
assert (
PageView.objects.all().count() == 0
Expand Down
5 changes: 5 additions & 0 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ def is_private(self):
return self.project.external_builds_privacy_level == PRIVATE
return self.privacy_level == PRIVATE

@property
def is_public(self):
"""Check if the version is public (taking external versions into consideration)."""
return not self.is_private

@property
def is_external(self):
return self.type == EXTERNAL
Expand Down
37 changes: 13 additions & 24 deletions readthedocs/core/mixins.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
"""Common mixin classes for views."""
from functools import lru_cache

from django.conf import settings
from django.contrib.auth.mixins import LoginRequiredMixin
from vanilla import ListView

from readthedocs.projects.models import Feature
from readthedocs.subscriptions.models import PlanFeature


class ListViewWithForm(ListView):

Expand All @@ -34,41 +28,36 @@ class ProxiedAPIMixin:
class CDNCacheControlMixin:

"""
Allow to cache views at the CDN level when privacy levels are enabled.
Explicitly cache or not a view at the CDN level.

The cache control header is only used when privacy levels
are enabled (otherwise everything is public by default).
The cache control header is used to mark the response as public/private,
so it can be cached or not.

Views that can be cached should always return the same response for all
users (anonymous and authenticated users), like when the version attached
to the request is public.

To cache a view you can either set the `cache_request` attribute to `True`,
To explicitly cache a view you can either set the `cache_response` attribute to `True`/`False`,
or override the `can_be_cached` method.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which takes precedence? We should likely document the order we check things, so it's clear how the logic is interpreted.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have two different ways of doing the same? Can we have just one? 😄

If set to `None` (default), the cache header won't be set,
so the default value can be set by our middleware.

We use ``CDN-Cache-Control``, to control caching at the CDN level only.
This doesn't affect caching at the browser level (``Cache-Control``).

See https://developers.cloudflare.com/cache/about/cdn-cache-control.
"""

cache_request = False
cache_response = None

def dispatch(self, request, *args, **kwargs):
response = super().dispatch(request, *args, **kwargs)
if settings.ALLOW_PRIVATE_REPOS and self.can_be_cached(request):
response.headers['CDN-Cache-Control'] = 'public'
cache_response = self.can_be_cached(request)
if cache_response is not None:
response.headers["CDN-Cache-Control"] = (
"public" if cache_response else "private"
)
return response

def can_be_cached(self, request):
return self.cache_request

@lru_cache(maxsize=1)
def _is_cache_enabled(self, project):
"""Helper function to check if CDN is enabled for a project."""
plan_has_cdn = PlanFeature.objects.get_feature(
obj=project, type=PlanFeature.TYPE_CDN
)
return settings.ALLOW_PRIVATE_REPOS and (
plan_has_cdn or project.has_feature(Feature.CDN_ENABLED)
)
return self.cache_response
8 changes: 6 additions & 2 deletions readthedocs/core/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from django.shortcuts import render
from django.views.generic import TemplateView, View

from readthedocs.core.mixins import PrivateViewMixin
from readthedocs.core.mixins import CDNCacheControlMixin, PrivateViewMixin
from readthedocs.projects.models import Project

log = structlog.get_logger(__name__)
Expand All @@ -21,7 +21,11 @@ class NoProjectException(Exception):
pass


class HealthCheckView(View):
class HealthCheckView(CDNCacheControlMixin, View):

# Never cache this view, we always want to get the live response from the server.
cache_response = False

def get(self, request, *args, **kwargs):
return JsonResponse({'status': 200}, status=200)

Expand Down
6 changes: 5 additions & 1 deletion readthedocs/projects/views/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from readthedocs.builds.constants import BUILD_STATE_FINISHED, LATEST
from readthedocs.builds.models import Version
from readthedocs.builds.views import BuildTriggerMixin
from readthedocs.core.mixins import CDNCacheControlMixin
from readthedocs.core.permissions import AdminPermission
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.projects.filters import ProjectVersionListFilterSet
Expand Down Expand Up @@ -302,7 +303,7 @@ def project_downloads(request, project_slug):
)


class ProjectDownloadMediaBase(ServeDocsMixin, View):
class ProjectDownloadMediaBase(CDNCacheControlMixin, ServeDocsMixin, View):

# Use new-style URLs (same domain as docs) or old-style URLs (dashboard URL)
same_domain_url = False
Expand Down Expand Up @@ -353,6 +354,9 @@ def get(
slug=version_slug,
)

# All public versions can be cached.
self.cache_response = version.is_public

else:
# All the arguments come from the URL.
version = get_object_or_404(
Expand Down
19 changes: 10 additions & 9 deletions readthedocs/proxito/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,27 +156,28 @@ def add_cache_headers(self, request, response):
"""
Add Cache-Control headers.

If privacy levels are enabled and the header isn't already present,
set the cache level to private.
Or if the request was from the `X-RTD-Slug` header,
we don't cache the response, since we could be caching a response in another domain.
If the `CDN-Cache-Control` header isn't already present, set the cache
level to public or private, depending if we allow private repos or not.
Or if the request was from the `X-RTD-Slug` header, we don't cache the
response, since we could be caching a response in another domain.

We use ``CDN-Cache-Control``, to control caching at the CDN level only.
This doesn't affect caching at the browser level (``Cache-Control``).
Comment on lines 164 to 165
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we make this configurable as well? Probably in another PR.


See https://developers.cloudflare.com/cache/about/cdn-cache-control.
"""
header = "CDN-Cache-Control"
cache_header = "CDN-Cache-Control"
unresolved_domain = request.unresolved_domain
# Never trust projects resolving from the X-RTD-Slug header,
# we don't want to cache their content on domains from other
# projects, see GHSA-mp38-vprc-7hf5.
if unresolved_domain and unresolved_domain.is_from_http_header:
response.headers[header] = "private"
response.headers[cache_header] = "private"
return

if settings.ALLOW_PRIVATE_REPOS:
# Set the key to private only if it hasn't already been set by the view.
response.headers.setdefault(header, "private")
# Set the key only if it hasn't already been set by the view.
default_cache_level = "private" if settings.ALLOW_PRIVATE_REPOS else "public"
response.headers.setdefault(cache_header, default_cache_level)

def _set_request_attributes(self, request, unresolved_domain):
"""
Expand Down
2 changes: 2 additions & 0 deletions readthedocs/proxito/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from django.core.files.storage import get_storage_class
from django.test import TestCase

from readthedocs.builds.constants import LATEST
from readthedocs.projects.constants import PUBLIC
from readthedocs.projects.models import Domain, Project
from readthedocs.proxito.views import serve
Expand All @@ -33,6 +34,7 @@ def setUp(self):
main_language_project=None,
)
self.project.versions.update(privacy_level=PUBLIC)
self.version = self.project.versions.get(slug=LATEST)

self.subproject = fixture.get(
Project,
Expand Down
46 changes: 35 additions & 11 deletions readthedocs/proxito/tests/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def test_health_check(self):
host = '127.0.0.1'
resp = self.client.get(url, HTTP_HOST=host)
self.assertEqual(resp.status_code, 200)
self.assertEqual(resp.json(), {'status': 200})
self.assertEqual(resp.json(), {"status": 200})
self.assertEqual(resp["CDN-Cache-Control"], "private")

def test_subproject_serving(self):
url = '/projects/subproject/en/latest/awesome.html'
Expand Down Expand Up @@ -366,7 +367,24 @@ def test_project_nginx_serving_unicode_filename(self):
)

@override_settings(PYTHON_MEDIA=False)
def test_download_files(self):
def test_download_files_public_version(self):
for type_ in DOWNLOADABLE_MEDIA_TYPES:
resp = self.client.get(
f"/_/downloads/en/latest/{type_}/",
HTTP_HOST="project.dev.readthedocs.io",
)
self.assertEqual(resp.status_code, 200)
extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a code smell... Probably should have named htmlzip as zip... 🙃

self.assertEqual(
resp["X-Accel-Redirect"],
f"/proxito/media/{type_}/project/latest/project.{extension}",
)
self.assertEqual(resp["CDN-Cache-Control"], "public")

@override_settings(PYTHON_MEDIA=False, ALLOW_PRIVATE_REPOS=True)
def test_download_files_private_version(self):
self.version.privacy_level = PRIVATE
self.version.save()
for type_ in DOWNLOADABLE_MEDIA_TYPES:
resp = self.client.get(
f"/_/downloads/en/latest/{type_}/",
Expand All @@ -378,6 +396,7 @@ def test_download_files(self):
resp["X-Accel-Redirect"],
f"/proxito/media/{type_}/project/latest/project.{extension}",
)
self.assertEqual(resp["CDN-Cache-Control"], "private")

@override_settings(PYTHON_MEDIA=False)
def test_invalid_download_files(self):
Expand Down Expand Up @@ -1395,11 +1414,13 @@ def test_cache_on_private_versions_custom_domain(self):
self.domain.save()
self._test_cache_control_header_project(expected_value='private', host=self.domain.domain)

# HTTPS redirect respects the privacy level of the version.
resp = self.client.get('/en/latest/', secure=False, HTTP_HOST=self.domain.domain)
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/en/latest/')
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
self.assertEqual(resp.headers['Cache-Tag'], 'project,project:latest')
# HTTPS redirect can always be cached.
resp = self.client.get(
"/en/latest/", secure=False, HTTP_HOST=self.domain.domain
)
self.assertEqual(resp["Location"], f"https://{self.domain.domain}/en/latest/")
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
self.assertEqual(resp.headers["Cache-Tag"], "project,project:latest")

def test_cache_public_versions(self):
self.project.versions.update(privacy_level=PUBLIC)
Expand Down Expand Up @@ -1427,15 +1448,18 @@ def test_cache_on_private_versions_custom_domain_subproject(self):
self.domain.save()
self._test_cache_control_header_subproject(expected_value='private', host=self.domain.domain)

# HTTPS redirect respects the privacy level of the version.
# HTTPS redirect can always be cached.
resp = self.client.get(
'/projects/subproject/en/latest/',
secure=False,
HTTP_HOST=self.domain.domain,
)
self.assertEqual(resp['Location'], f'https://{self.domain.domain}/projects/subproject/en/latest/')
self.assertEqual(resp.headers['CDN-Cache-Control'], 'private')
self.assertEqual(resp.headers['Cache-Tag'], 'subproject,subproject:latest')
self.assertEqual(
resp["Location"],
f"https://{self.domain.domain}/projects/subproject/en/latest/",
)
self.assertEqual(resp.headers["CDN-Cache-Control"], "public")
self.assertEqual(resp.headers["Cache-Tag"], "subproject,subproject:latest")

def test_cache_public_versions_subproject(self):
self.subproject.versions.update(privacy_level=PUBLIC)
Expand Down
8 changes: 4 additions & 4 deletions readthedocs/proxito/tests/test_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,13 @@ def test_user_domain_headers(self):
self.assertEqual(r[http_header_secure], http_header_value)

@override_settings(ALLOW_PRIVATE_REPOS=False)
def test_cache_headers_public_projects(self):
def test_cache_headers_public_version_with_private_projects_not_allowed(self):
r = self.client.get('/en/latest/', HTTP_HOST='project.dev.readthedocs.io')
self.assertEqual(r.status_code, 200)
self.assertNotIn('CDN-Cache-Control', r)
self.assertEqual(r["CDN-Cache-Control"], "public")

@override_settings(ALLOW_PRIVATE_REPOS=True)
def test_cache_headers_private_projects(self):
def test_cache_headers_public_version_with_private_projects_allowed(self):
r = self.client.get('/en/latest/', HTTP_HOST='project.dev.readthedocs.io')
self.assertEqual(r.status_code, 200)
self.assertEqual(r['CDN-Cache-Control'], 'private')
self.assertEqual(r["CDN-Cache-Control"], "public")
4 changes: 4 additions & 0 deletions readthedocs/proxito/views/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ def system_redirect(
log.debug(
"System Redirect.", host=request.get_host(), from_url=filename, to_url=to
)
# All system redirects can be cached, since the final URL will check for authz.
self.cache_response = True
resp = HttpResponseRedirect(to)
resp["X-RTD-Redirect"] = RedirectType.system.name
return resp
Expand Down Expand Up @@ -392,6 +394,8 @@ def canonical_redirect(
raise InfiniteRedirectException()

log.info('Canonical Redirect.', host=request.get_host(), from_url=filename, to_url=to)
# All canonical redirects can be cached, since the final URL will check for authz.
self.cache_response = True
resp = HttpResponseRedirect(to)
resp["X-RTD-Redirect"] = redirect_type.name
return resp
Expand Down
Loading