Skip to content

Commit 7cd0723

Browse files
committed
Add sitemap index and serve sitemap for subprojects
The sitemap index lists all the sitemap locations for a project including the subprojects. The sitemap of subprojects are also served from the parent domain. Closes readthedocs#6841
1 parent f4efd14 commit 7cd0723

File tree

5 files changed

+131
-3
lines changed

5 files changed

+131
-3
lines changed

docs/user/reference/sitemaps.rst

+7-1
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@ It contains information such as:
1616
* How important this URL is in relation to other URLs in the site.
1717
* What translations are available for a page.
1818

19-
Read the Docs automatically generates a ``sitemap.xml`` for your project,
19+
Read the Docs automatically generates a ``sitemap.xml`` and a
20+
``sitemap_index.xml`` for your project,
2021

2122
By default the sitemap includes:
2223

2324
* Each version of your documentation and when it was last updated, sorted by version number.
2425

26+
By default the sitemap index includes:
27+
28+
* The location of ``sitemap.xml``
29+
* The locations of the ``sitemap.xml`` of subprojects if they are set.
30+
2531
This allows search engines to prioritize results based on the version number,
2632
sorted by `semantic versioning`_.
2733

readthedocs/proxito/tests/test_full.py

+45
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,51 @@ def test_sitemap_all_private_versions(self):
15041504
)
15051505
self.assertEqual(response.status_code, 404)
15061506

1507+
def test_sitemap_subproject(self):
1508+
self.project.versions.update(active=True)
1509+
self.subproject.versions.update(active=True)
1510+
1511+
subresponse = self.client.get(
1512+
reverse("sitemap_xml", args=["subproject"]),
1513+
headers={"host": "project.readthedocs.io"},
1514+
)
1515+
response = self.client.get(
1516+
reverse("sitemap_xml"), headers={"host": "subproject.readthedocs.io"}
1517+
)
1518+
1519+
self.assertEqual(subresponse.status_code, 200)
1520+
self.assertEqual(response.status_code, 200)
1521+
self.assertEqual(subresponse.content, response.content)
1522+
1523+
def test_sitemap_index(self):
1524+
self.project.versions.update(active=True)
1525+
response = self.client.get(
1526+
reverse("sitemap_index_xml"), headers={"host": "project.readthedocs.io"}
1527+
)
1528+
self.assertEqual(response.status_code, 200)
1529+
self.assertEqual(response["Content-Type"], "application/xml")
1530+
expected = dedent(
1531+
"""
1532+
<?xml version="1.0" encoding="UTF-8"?>
1533+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
1534+
1535+
<sitemap>
1536+
<loc>https://project.readthedocs.io/sitemap.xml</loc>
1537+
</sitemap>
1538+
1539+
<sitemap>
1540+
<loc>https://project.readthedocs.io/projects/subproject/sitemap.xml</loc>
1541+
</sitemap>
1542+
1543+
<sitemap>
1544+
<loc>https://project.readthedocs.io/projects/subproject-alias/sitemap.xml</loc>
1545+
</sitemap>
1546+
1547+
</sitemapindex>
1548+
"""
1549+
).lstrip()
1550+
self.assertEqual(response.content.decode(), expected)
1551+
15071552
@mock.patch(
15081553
"readthedocs.proxito.views.mixins.staticfiles_storage",
15091554
new=StaticFileSystemStorageTest(),

readthedocs/proxito/urls.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
ServePageRedirect,
4848
ServeRobotsTXT,
4949
ServeSitemapXML,
50+
ServeSitemapIndexXML,
5051
ServeStaticFiles,
5152
)
5253
from readthedocs.proxito.views.utils import fast_404, proxito_404_page_handler
@@ -136,7 +137,15 @@
136137
name="proxito_404_handler",
137138
),
138139
re_path(r"robots\.txt$", ServeRobotsTXT.as_view(), name="robots_txt"),
139-
re_path(r"sitemap\.xml$", ServeSitemapXML.as_view(), name="sitemap_xml"),
140+
re_path(
141+
r"^(?:projects/(?P<subproject_slug>{project_slug})/)?"
142+
r"sitemap\.xml$".format(**pattern_opts),
143+
ServeSitemapXML.as_view(),
144+
name="sitemap_xml",
145+
),
146+
re_path(
147+
r"sitemap_index\.xml$", ServeSitemapIndexXML.as_view(), name="sitemap_index_xml"
148+
),
140149
]
141150

142151
docs_urls = [

readthedocs/proxito/views/serve.py

+61-1
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,7 @@ class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):
10191019
# Extra cache tag to invalidate only this view if needed.
10201020
project_cache_tag = "sitemap.xml"
10211021

1022-
def get(self, request):
1022+
def get(self, request, subproject_slug=None):
10231023
"""
10241024
Generate and serve a ``sitemap.xml`` for a particular ``project``.
10251025
@@ -1078,6 +1078,12 @@ def changefreqs_generator():
10781078
yield from itertools.chain(changefreqs, itertools.repeat('monthly'))
10791079

10801080
project = request.unresolved_domain.project
1081+
1082+
if subproject_slug:
1083+
project = get_object_or_404(
1084+
project.subprojects, alias=subproject_slug
1085+
).child
1086+
10811087
public_versions = Version.internal.public(
10821088
project=project,
10831089
only_active=True,
@@ -1164,6 +1170,60 @@ class ServeSitemapXML(SettingsOverrideObject):
11641170
_default_class = ServeSitemapXMLBase
11651171

11661172

1173+
class ServeSitemapIndexXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):
1174+
1175+
"""Serve sitemap_index.xml from the domain's root."""
1176+
1177+
cache_response = True
1178+
project_cache_tag = "sitemap.xml"
1179+
1180+
def get(self, request):
1181+
"""
1182+
Generate and serve a ``sitemap_index.xml`` for a particular
1183+
``project``.
1184+
1185+
The sitemap index is generated from the project and all sub-projects.
1186+
"""
1187+
1188+
project = request.unresolved_domain.project
1189+
1190+
locations = [
1191+
"{scheme}://{domain}/sitemap.xml".format(
1192+
scheme="https",
1193+
domain=project.subdomain(),
1194+
)
1195+
]
1196+
for subproject in project.related_projects.all():
1197+
locations.append(
1198+
"{scheme}://{domain}/projects/{subproject}/sitemap.xml".format(
1199+
scheme="https",
1200+
domain=project.subdomain(),
1201+
subproject=subproject.slug,
1202+
)
1203+
)
1204+
context = {
1205+
"locations": locations,
1206+
}
1207+
return render(
1208+
request,
1209+
"sitemap_index.xml",
1210+
context,
1211+
content_type="application/xml",
1212+
)
1213+
1214+
def _get_project(self):
1215+
# Method used by the CDNCacheTagsMixin class.
1216+
return self.request.unresolved_domain.project
1217+
1218+
def _get_version(self):
1219+
# This view isn't attached to a version.
1220+
return None
1221+
1222+
1223+
class ServeSitemapIndexXML(SettingsOverrideObject):
1224+
_default_class = ServeSitemapIndexXMLBase
1225+
1226+
11671227
class ServeStaticFiles(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, View):
11681228

11691229
"""
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3+
{% for loc in locations %}
4+
<sitemap>
5+
<loc>{{ loc }}</loc>
6+
</sitemap>
7+
{% endfor %}
8+
</sitemapindex>

0 commit comments

Comments
 (0)