Skip to content

Commit 9a1bd6c

Browse files
committed
Proxito: Don't hit storage for 404s
We have all the information we need in the DB already. Closes #10512
1 parent 8cae85b commit 9a1bd6c

File tree

1 file changed

+60
-51
lines changed

1 file changed

+60
-51
lines changed

readthedocs/proxito/views/serve.py

+60-51
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
)
2626
from readthedocs.core.utils.extend import SettingsOverrideObject
2727
from readthedocs.projects import constants
28-
from readthedocs.projects.models import Domain, Feature
28+
from readthedocs.projects.models import Domain, Feature, HTMLFile
2929
from readthedocs.projects.templatetags.projects_tags import sort_version_aware
3030
from readthedocs.proxito.constants import RedirectType
3131
from readthedocs.proxito.exceptions import (
@@ -673,32 +673,45 @@ def _get_custom_404_page(self, request, project, version=None):
673673
if default_version:
674674
versions_404.append(default_version)
675675

676+
if not versions_404:
677+
return None
678+
679+
tryfiles = ["404.html", "404/index.html"]
680+
available_404_files = list(
681+
HTMLFile.objects.filter(
682+
version__in=versions_404, path__in=tryfiles
683+
).values_list("version__slug", "path")
684+
)
685+
if not available_404_files:
686+
return None
687+
676688
for version_404 in versions_404:
677689
if not self.allowed_user(request, version_404):
678690
continue
679691

680-
storage_root_path = project.get_storage_path(
681-
type_="html",
682-
version_slug=version_404.slug,
683-
include_file=False,
684-
version_type=self.version_type,
685-
)
686-
tryfiles = ["404.html", "404/index.html"]
687692
for tryfile in tryfiles:
693+
if (version_404.slug, tryfile) not in available_404_files:
694+
continue
695+
696+
storage_root_path = project.get_storage_path(
697+
type_="html",
698+
version_slug=version_404.slug,
699+
include_file=False,
700+
version_type=self.version_type,
701+
)
688702
storage_filename_path = build_media_storage.join(
689703
storage_root_path, tryfile
690704
)
691-
if build_media_storage.exists(storage_filename_path):
692-
log.debug(
693-
"Serving custom 404.html page.",
694-
version_slug_404=version_404.slug,
695-
storage_filename_path=storage_filename_path,
696-
)
697-
resp = HttpResponse(
698-
build_media_storage.open(storage_filename_path).read()
699-
)
700-
resp.status_code = 404
701-
return resp
705+
log.debug(
706+
"Serving custom 404.html page.",
707+
version_slug_404=version_404.slug,
708+
storage_filename_path=storage_filename_path,
709+
)
710+
resp = HttpResponse(
711+
build_media_storage.open(storage_filename_path).read()
712+
)
713+
resp.status_code = 404
714+
return resp
702715
return None
703716

704717
def _get_index_file_redirect(self, request, project, version, filename, full_path):
@@ -711,46 +724,42 @@ def _get_index_file_redirect(self, request, project, version, filename, full_pat
711724
- /en/latest/foo -> /en/latest/foo/README.html
712725
- /en/latest/foo/ -> /en/latest/foo/README.html
713726
"""
714-
storage_root_path = project.get_storage_path(
715-
type_="html",
716-
version_slug=version.slug,
717-
include_file=False,
718-
version_type=self.version_type,
719-
)
720-
721727
tryfiles = ["index.html", "README.html"]
722728
# If the path ends with `/`, we already tried to serve
723729
# the `/index.html` file, so we only need to test for
724730
# the `/README.html` file.
725731
if full_path.endswith("/"):
726732
tryfiles = ["README.html"]
727733

728-
# First, check for dirhtml with slash
729-
for tryfile in tryfiles:
730-
storage_filename_path = build_media_storage.join(
731-
storage_root_path,
732-
f"{filename}/{tryfile}".lstrip("/"),
734+
tryfiles = [f"{filename}/{tryfile}".lstrip("/") for tryfile in tryfiles]
735+
available_index_files = list(
736+
HTMLFile.objects.filter(version=version, path__in=tryfiles).values_list(
737+
"path", flat=True
733738
)
734-
log.debug("Trying index filename.")
735-
if build_media_storage.exists(storage_filename_path):
736-
log.info("Redirecting to index file.", tryfile=tryfile)
737-
# Use urlparse so that we maintain GET args in our redirect
738-
parts = urlparse(full_path)
739-
if tryfile == "README.html":
740-
new_path = parts.path.rstrip("/") + f"/{tryfile}"
741-
else:
742-
new_path = parts.path.rstrip("/") + "/"
743-
744-
# `full_path` doesn't include query params.`
745-
query = urlparse(request.get_full_path()).query
746-
redirect_url = parts._replace(
747-
path=new_path,
748-
query=query,
749-
).geturl()
750-
751-
# TODO: decide if we need to check for infinite redirect here
752-
# (from URL == to URL)
753-
return HttpResponseRedirect(redirect_url)
739+
)
740+
741+
for tryfile in tryfiles:
742+
if tryfile not in available_index_files:
743+
continue
744+
745+
log.info("Redirecting to index file.", tryfile=tryfile)
746+
# Use urlparse so that we maintain GET args in our redirect
747+
parts = urlparse(full_path)
748+
if tryfile.endswith("README.html"):
749+
new_path = tryfile
750+
else:
751+
new_path = parts.path.rstrip("/") + "/"
752+
753+
# `full_path` doesn't include query params.`
754+
query = urlparse(request.get_full_path()).query
755+
redirect_url = parts._replace(
756+
path=new_path,
757+
query=query,
758+
).geturl()
759+
760+
# TODO: decide if we need to check for infinite redirect here
761+
# (from URL == to URL)
762+
return HttpResponseRedirect(redirect_url)
754763

755764
return None
756765

0 commit comments

Comments
 (0)