Skip to content

Commit 0b3d41b

Browse files
authored
Canonicalize all proxito slashes (#8028)
* Canonicalize all proxito slashes This is currently broken, eg. this URL works: https://docs.readthedocs.io/en/latest///index.html * Only parse the path not the params * Use geturl() instead of urlunparse
1 parent b3185cf commit 0b3d41b

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

readthedocs/proxito/middleware.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
"""
88
import logging
99
import sys
10+
import re
11+
from urllib.parse import urlparse
1012

1113
from django.conf import settings
12-
from django.shortcuts import render
14+
from django.shortcuts import render, redirect
1315
from django.utils.deprecation import MiddlewareMixin
1416
from django.urls import reverse
1517

@@ -173,6 +175,13 @@ def process_request(self, request): # noqa
173175
if hasattr(ret, 'status_code'):
174176
return ret
175177

178+
if '//' in request.path:
179+
# Remove multiple slashes from URL's
180+
url_parsed = urlparse(request.get_full_path())
181+
clean_path = re.sub('//+', '/', url_parsed.path)
182+
new_parsed = url_parsed._replace(path=clean_path)
183+
return redirect(new_parsed.geturl())
184+
176185
log.debug('Proxito Project: slug=%s', ret)
177186

178187
# Otherwise set the slug on the request

readthedocs/proxito/tests/test_redirects.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,48 @@ def test_proper_page_on_subdomain(self):
130130
'https://project.dev.readthedocs.io/en/latest/test.html',
131131
)
132132

133+
def test_slash_redirect(self):
134+
host = 'project.dev.readthedocs.io'
135+
136+
url = '/en/latest////awesome.html'
137+
resp = self.client.get(url, HTTP_HOST=host)
138+
self.assertEqual(resp.status_code, 302)
139+
self.assertEqual(
140+
resp['Location'], '/en/latest/awesome.html',
141+
)
142+
143+
url = '///en/latest////awesome.html'
144+
resp = self.client.get(url, HTTP_HOST=host)
145+
self.assertEqual(resp.status_code, 302)
146+
self.assertEqual(
147+
resp['Location'], '/en/latest/awesome.html',
148+
)
149+
150+
url = '///en/latest////awesome///index.html'
151+
resp = self.client.get(url, HTTP_HOST=host)
152+
self.assertEqual(resp.status_code, 302)
153+
self.assertEqual(
154+
resp['Location'], '/en/latest/awesome/index.html',
155+
)
156+
157+
url = '///en/latest////awesome///index.html?foo=bar'
158+
resp = self.client.get(url, HTTP_HOST=host)
159+
self.assertEqual(resp.status_code, 302)
160+
self.assertEqual(
161+
resp['Location'], '/en/latest/awesome/index.html?foo=bar',
162+
)
163+
164+
url = '///en/latest////awesome///'
165+
resp = self.client.get(url, HTTP_HOST=host)
166+
self.assertEqual(resp.status_code, 302)
167+
self.assertEqual(
168+
resp['Location'], '/en/latest/awesome/',
169+
)
170+
171+
# Don't change the values of params
172+
url = '///en/latest////awesome///index.html?foo=bar//bas'
173+
resp = self.client.get(url, HTTP_HOST=host)
174+
self.assertEqual(resp.status_code, 302)
175+
self.assertEqual(
176+
resp['Location'], '/en/latest/awesome/index.html?foo=bar//bas',
177+
)

0 commit comments

Comments
 (0)