From c4bb3131d73d3d3d231dc1439f665a0e1da3896b Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 25 Jun 2024 09:11:49 -0700 Subject: [PATCH 1/3] Run iri_to_uri on header values This fixes https://github.com/readthedocs/readthedocs.org/issues/11403 --- readthedocs/proxito/middleware.py | 3 ++- readthedocs/proxito/tests/test_headers.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/readthedocs/proxito/middleware.py b/readthedocs/proxito/middleware.py index b6807967a74..daa147f8bb2 100644 --- a/readthedocs/proxito/middleware.py +++ b/readthedocs/proxito/middleware.py @@ -19,6 +19,7 @@ from django.shortcuts import redirect from django.urls import reverse from django.utils.deprecation import MiddlewareMixin +from django.utils.encoding import iri_to_uri from django.utils.html import escape from readthedocs.builds.models import Version @@ -382,7 +383,7 @@ def add_resolver_headers(self, request, response): mime_encode=True, ) - response["X-RTD-Resolver-Filename"] = header_value + response["X-RTD-Resolver-Filename"] = iri_to_uri(header_value) except BadHeaderError: # Skip adding the header because it fails validation log.info( diff --git a/readthedocs/proxito/tests/test_headers.py b/readthedocs/proxito/tests/test_headers.py index f99a6a3dc45..17eb7cf69b6 100644 --- a/readthedocs/proxito/tests/test_headers.py +++ b/readthedocs/proxito/tests/test_headers.py @@ -75,6 +75,29 @@ def test_serve_headers_with_path(self): "/proxito/media/html/project/latest/guides/jupyter/gallery.html", ) + # refs https://read-the-docs.sentry.io/issues/5019718893/ + def test_serve_headers_with_unicode(self): + r = self.client.get( + "/en/latest/tutorial_1installation.htmlReview%20of%20Failures%20of%0BReview%20of%20Failures%20of%0BPhotovoltaic%20Moduleshotovoltaic%20Modules", + secure=True, + headers={"host": "project.dev.readthedocs.io"}, + ) + self.assertEqual(r.status_code, 200) + self.assertEqual(r["Cache-Tag"], "project,project:latest") + self.assertEqual(r["X-RTD-Domain"], "project.dev.readthedocs.io") + self.assertEqual(r["X-RTD-Project"], "project") + self.assertEqual(r["X-RTD-Project-Method"], "public_domain") + self.assertEqual(r["X-RTD-Version"], "latest") + self.assertEqual(r["X-RTD-version-Method"], "path") + self.assertEqual( + r["X-RTD-Resolver-Filename"], + "/tutorial_1installation.htmlReview%20of%20Failures%20of%0BReview%20of%20Failures%20of%0BPhotovoltaic%20Moduleshotovoltaic%20Modules", + ) + self.assertEqual( + r["X-RTD-Path"], + "/proxito/media/html/project/latest/tutorial_1installation.htmlReview%20of%20Failures%20of%0BReview%20of%20Failures%20of%0BPhotovoltaic%20Moduleshotovoltaic%20Modules", + ) + def test_subproject_serve_headers(self): r = self.client.get( "/projects/subproject/en/latest/", From c850490194750477a9afc410d29737ebf66dae5a Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 25 Jun 2024 10:41:50 -0700 Subject: [PATCH 2/3] Escape in the right place --- readthedocs/proxito/middleware.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/readthedocs/proxito/middleware.py b/readthedocs/proxito/middleware.py index daa147f8bb2..4b1253d1c8d 100644 --- a/readthedocs/proxito/middleware.py +++ b/readthedocs/proxito/middleware.py @@ -374,7 +374,8 @@ def _get_https_redirect(self, request): def add_resolver_headers(self, request, response): if request.unresolved_url is not None: # TODO: add more ``X-RTD-Resolver-*`` headers - header_value = escape(request.unresolved_url.filename) + uri_filename = iri_to_uri(request.unresolved_url.filename) + header_value = escape(uri_filename) try: # Use Django internals to validate the header's value before injecting it. ResponseHeaders({})._convert_to_charset( @@ -383,7 +384,7 @@ def add_resolver_headers(self, request, response): mime_encode=True, ) - response["X-RTD-Resolver-Filename"] = iri_to_uri(header_value) + response["X-RTD-Resolver-Filename"] = header_value except BadHeaderError: # Skip adding the header because it fails validation log.info( From 1f71e20acf76ae2dc6eae971af4d13430c2a9cf2 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 25 Jun 2024 11:48:48 -0700 Subject: [PATCH 3/3] Remove _convert_to_charset logic and fix test --- readthedocs/proxito/middleware.py | 18 +----------------- .../proxito/tests/test_old_redirects.py | 1 - 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/readthedocs/proxito/middleware.py b/readthedocs/proxito/middleware.py index 4b1253d1c8d..edcf80acbae 100644 --- a/readthedocs/proxito/middleware.py +++ b/readthedocs/proxito/middleware.py @@ -15,7 +15,6 @@ ) from django.conf import settings from django.core.exceptions import SuspiciousOperation -from django.http.response import BadHeaderError, ResponseHeaders from django.shortcuts import redirect from django.urls import reverse from django.utils.deprecation import MiddlewareMixin @@ -376,22 +375,7 @@ def add_resolver_headers(self, request, response): # TODO: add more ``X-RTD-Resolver-*`` headers uri_filename = iri_to_uri(request.unresolved_url.filename) header_value = escape(uri_filename) - try: - # Use Django internals to validate the header's value before injecting it. - ResponseHeaders({})._convert_to_charset( - header_value, - "latin-1", - mime_encode=True, - ) - - response["X-RTD-Resolver-Filename"] = header_value - except BadHeaderError: - # Skip adding the header because it fails validation - log.info( - "Skip adding X-RTD-Resolver-Filename header due to invalid value.", - filename=request.unresolved_url.filename, - value=header_value, - ) + response["X-RTD-Resolver-Filename"] = header_value def process_response(self, request, response): # noqa self.add_proxito_headers(request, response) diff --git a/readthedocs/proxito/tests/test_old_redirects.py b/readthedocs/proxito/tests/test_old_redirects.py index d52c5bdd8b0..9fc758ab0f8 100644 --- a/readthedocs/proxito/tests/test_old_redirects.py +++ b/readthedocs/proxito/tests/test_old_redirects.py @@ -1397,7 +1397,6 @@ def test_redirect_exact_with_wildcard_crossdomain(self): r = self.client.get(url, headers={"host": "project.dev.readthedocs.io"}) self.assertEqual(r.status_code, 302, url) self.assertEqual(r["Location"], expected_location, url) - self.assertNotIn("X-RTD-Resolver-Filename", r.headers) def test_redirect_html_to_clean_url_crossdomain(self): """