1
1
import re
2
2
from dataclasses import dataclass
3
- from urllib .parse import urlparse
3
+ from urllib .parse import ParseResult , urlparse
4
4
5
5
import structlog
6
6
from django .conf import settings
@@ -27,8 +27,7 @@ class UnresolvedURL:
27
27
28
28
version : Version = None
29
29
filename : str = None
30
- query : str = None
31
- fragment : str = None
30
+ parsed_url : ParseResult = None
32
31
domain : Domain = None
33
32
external : bool = False
34
33
@@ -41,59 +40,92 @@ class Unresolver:
41
40
# - /en/latest/
42
41
# - /en/latest/file/name/
43
42
multiversion_pattern = re .compile (
44
- r"^/(?P<language>{lang_slug})(/((?P<version>{version_slug})(/(?P<file>{filename_slug}))?)?)?$" .format ( # noqa
43
+ r"""
44
+ ^/(?P<language>{lang_slug}) # Must have the language slug.
45
+ (/((?P<version>{version_slug})(/(?P<file>{filename_slug}))?)?)?$ # Optionally a version followed by a file. # noqa
46
+ """ .format (
45
47
** pattern_opts
46
- )
48
+ ),
49
+ re .VERBOSE ,
47
50
)
48
51
49
52
# This pattern matches:
50
53
# - /projects/subproject
51
54
# - /projects/subproject/
52
55
# - /projects/subproject/file/name/
53
56
subproject_pattern = re .compile (
54
- r"^/projects/(?P<project>{project_slug}+)(/(?P<file>{filename_slug}))?$" .format (
57
+ r"""
58
+ ^/projects/ # Must have the `projects` prefix.
59
+ (?P<project>{project_slug}+) # Followed by the subproject alias.
60
+ (/(?P<file>{filename_slug}))?$ # Optionally a filename, which will be recursively resolved.
61
+ """ .format (
55
62
** pattern_opts
56
- )
63
+ ),
64
+ re .VERBOSE ,
57
65
)
58
66
59
- def unresolve (self , url , add_index = True ):
67
+ def unresolve (self , url , append_indexhtml = True ):
60
68
"""
61
69
Turn a URL into the component parts that our views would use to process them.
62
70
63
71
This is useful for lots of places,
64
72
like where we want to figure out exactly what file a URL maps to.
65
73
66
74
:param url: Full URL to unresolve (including the protocol and domain part).
67
- :param add_index : If `True` the filename will be normalized
75
+ :param append_indexhtml : If `True` directories will be normalized
68
76
to end with ``/index.html``.
69
77
"""
70
78
parsed = urlparse (url )
71
79
domain = self .get_domain_from_host (parsed .netloc )
72
- project_slug , domain_object , external = self .unresolve_domain (domain )
73
- if not project_slug :
80
+ (
81
+ parent_project_slug ,
82
+ domain_object ,
83
+ external_version_slug ,
84
+ ) = self .unresolve_domain (domain )
85
+ if not parent_project_slug :
74
86
return None
75
87
76
- parent_project = Project .objects .filter (slug = project_slug ).first ()
88
+ parent_project = Project .objects .filter (slug = parent_project_slug ).first ()
77
89
if not parent_project :
78
90
return None
79
91
80
- project , version , filename = self ._unresolve_path (
92
+ current_project , version , filename = self ._unresolve_path (
81
93
parent_project = parent_project ,
82
94
path = parsed .path ,
83
95
)
84
96
85
- if add_index and filename and filename .endswith ("/" ):
97
+ # Make sure we are serving the external version from the subdomain.
98
+ if external_version_slug and version :
99
+ if external_version_slug != version .slug :
100
+ log .warning (
101
+ "Invalid version for external domain." ,
102
+ domain = domain ,
103
+ version_slug = version .slug ,
104
+ )
105
+ version = None
106
+ filename = None
107
+ elif not version .is_external :
108
+ log .warning (
109
+ "Attempt of serving a non-external version from RTD_EXTERNAL_VERSION_DOMAIN." ,
110
+ domain = domain ,
111
+ version_slug = version .slug ,
112
+ version_type = version .type ,
113
+ url = url ,
114
+ )
115
+ version = None
116
+ filename = None
117
+
118
+ if append_indexhtml and filename and filename .endswith ("/" ):
86
119
filename += "index.html"
87
120
88
121
return UnresolvedURL (
89
122
parent_project = parent_project ,
90
- project = project or parent_project ,
123
+ project = current_project or parent_project ,
91
124
version = version ,
92
125
filename = filename ,
93
- query = parsed .query ,
94
- fragment = parsed .fragment ,
126
+ parsed_url = parsed ,
95
127
domain = domain_object ,
96
- external = external ,
128
+ external = bool ( external_version_slug ) ,
97
129
)
98
130
99
131
@staticmethod
@@ -109,7 +141,11 @@ def _match_multiversion_project(self, parent_project, path):
109
141
Try to match a multiversion project.
110
142
111
143
If the translation exists, we return a result even if the version doesn't,
112
- so the translation is taken as the canonical project (useful for 404 pages).
144
+ so the translation is taken as the current project (useful for 404 pages).
145
+
146
+ :returns: None or a tuple with the current project, version and file.
147
+ A tuple with only the project means we weren't able to find a version,
148
+ but the translation was correct.
113
149
"""
114
150
match = self .multiversion_pattern .match (path )
115
151
if not match :
@@ -138,24 +174,40 @@ def _match_subproject(self, parent_project, path):
138
174
139
175
If the subproject exists, we try to resolve the rest of the path
140
176
with the subproject as the canonical project.
177
+
178
+ If the subproject exists, we return a result even if version doesn't,
179
+ so the subproject is taken as the current project (useful for 404 pages).
180
+
181
+ :returns: None or a tuple with the current project, version and file.
182
+ A tuple with only the project means we were able to find the subproject,
183
+ but we weren't able to resolve the rest of the path.
141
184
"""
142
185
match = self .subproject_pattern .match (path )
143
186
if not match :
144
187
return None
145
188
146
- project_slug = match .group ("project" )
189
+ subproject_alias = match .group ("project" )
147
190
file = self ._normalize_filename (match .group ("file" ))
148
191
project_relationship = (
149
- parent_project .subprojects .filter (alias = project_slug )
192
+ parent_project .subprojects .filter (alias = subproject_alias )
150
193
.prefetch_related ("child" )
151
194
.first ()
152
195
)
153
196
if project_relationship :
154
- return self ._unresolve_path (
155
- parent_project = project_relationship .child ,
197
+ # We use the subproject as the new parent project
198
+ # to resolve the rest of the path relative to it.
199
+ subproject = project_relationship .child
200
+ response = self ._unresolve_path (
201
+ parent_project = subproject ,
156
202
path = file ,
157
203
check_subprojects = False ,
158
204
)
205
+ # If we got a valid response, return that,
206
+ # otherwise return the current subproject
207
+ # as the current project without a valid version or path.
208
+ if response :
209
+ return response
210
+ return subproject , None , None
159
211
return None
160
212
161
213
def _match_single_version_project (self , parent_project , path ):
@@ -182,10 +234,19 @@ def _unresolve_path(self, parent_project, path, check_subprojects=True):
182
234
If the returned version is `None`, then we weren't able to
183
235
unresolve the path into a valid version of the project.
184
236
237
+ The checks are done in the following order:
238
+
239
+ - Check for multiple versions if the parent project
240
+ isn't a single version project.
241
+ - Check for subprojects.
242
+ - Check for single versions if the parent project isn’t
243
+ a multi version project.
244
+
185
245
:param parent_project: The project that owns the path.
186
246
:param path: The path to unresolve.
187
247
:param check_subprojects: If we should check for subprojects,
188
- this is used to call this function recursively.
248
+ this is used to call this function recursively when
249
+ resolving the path from a subproject (we don't support subprojects of subprojects).
189
250
190
251
:returns: A tuple with: project, version, and file name.
191
252
"""
@@ -216,7 +277,7 @@ def _unresolve_path(self, parent_project, path, check_subprojects=True):
216
277
if response :
217
278
return response
218
279
219
- return None , None , None
280
+ return parent_project , None , None
220
281
221
282
@staticmethod
222
283
def get_domain_from_host (host ):
@@ -234,8 +295,8 @@ def unresolve_domain(self, domain):
234
295
Unresolve domain by extracting relevant information from it.
235
296
236
297
:param str domain: Domain to extract the information from.
237
- :returns: A tuple with: the project slug, domain object, and if the domain
238
- is from an external version.
298
+ :returns: A tuple with: the project slug, domain object, and the
299
+ external version slug if the domain is from an external version.
239
300
"""
240
301
public_domain = self .get_domain_from_host (settings .PUBLIC_DOMAIN )
241
302
external_domain = self .get_domain_from_host (
@@ -250,22 +311,22 @@ def unresolve_domain(self, domain):
250
311
if public_domain == root_domain :
251
312
project_slug = subdomain
252
313
log .debug ("Public domain." , domain = domain )
253
- return project_slug , None , False
314
+ return project_slug , None , None
254
315
255
316
# TODO: This can catch some possibly valid domains (docs.readthedocs.io.com)
256
317
# for example, but these might be phishing, so let's ignore them for now.
257
318
log .warning ("Weird variation of our domain." , domain = domain )
258
- return None , None , False
319
+ return None , None , None
259
320
260
321
# Serve PR builds on external_domain host.
261
322
if external_domain == root_domain :
262
323
try :
324
+ project_slug , version_slug = subdomain .rsplit ("--" , maxsplit = 1 )
263
325
log .debug ("External versions domain." , domain = domain )
264
- project_slug , _ = subdomain .rsplit ("--" , maxsplit = 1 )
265
- return project_slug , None , True
326
+ return project_slug , None , version_slug
266
327
except ValueError :
267
328
log .info ("Invalid format of external versions domain." , domain = domain )
268
- return None , None , False
329
+ return None , None , None
269
330
270
331
# Custom domain.
271
332
domain_object = (
0 commit comments