-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Generate general sitemap.xml for projects #5122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 19 commits
7edeeeb
65f5593
00da71f
83da6a1
38ed1b6
a27d96f
f460539
fdb2509
9f2233e
51be556
b69f4ea
fd5ddbf
8cb488e
8a42529
a40b493
6b3cf9f
21b0015
2f41620
2a7f6bd
acfbfc4
a0efd4a
6482bac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
Sitemaps | ||
======== | ||
|
||
Sitemaps_ allows us to inform search engines about URLs that are available for crawling | ||
and communicate them additional information about each URL of the project: | ||
|
||
* when it was last updated, | ||
* how often it changes, | ||
* how important it is in relation to other URLs in the site, and | ||
* what translations are available for a page. | ||
|
||
Read the Docs automatically generates a sitemap for each project that hosts | ||
to improve results when performing a search on these search engines. | ||
This allow us to prioritize results based on the version number, for example | ||
to show ``latest`` as the top result followed by ``stable`` and then all the project's | ||
versions sorted following semver_. | ||
|
||
.. _semver: https://semver.org/ | ||
.. _Sitemaps: https://www.sitemaps.org/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,3 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
""" | ||
Doc serving from Python. | ||
|
||
|
@@ -26,6 +24,7 @@ | |
SERVE_DOCS (['private']) - The list of ['private', 'public'] docs to serve. | ||
""" | ||
|
||
import itertools | ||
import logging | ||
import mimetypes | ||
import os | ||
|
@@ -36,6 +35,7 @@ | |
from django.http import Http404, HttpResponse, HttpResponseRedirect | ||
from django.shortcuts import get_object_or_404, render | ||
from django.utils.encoding import iri_to_uri | ||
from django.views.decorators.cache import cache_page | ||
from django.views.static import serve | ||
|
||
from readthedocs.builds.models import Version | ||
|
@@ -44,6 +44,7 @@ | |
from readthedocs.core.symlink import PrivateSymlink, PublicSymlink | ||
from readthedocs.projects import constants | ||
from readthedocs.projects.models import Project, ProjectRelationship | ||
from readthedocs.projects.templatetags.projects_tags import sort_version_aware | ||
|
||
|
||
log = logging.getLogger(__name__) | ||
|
@@ -262,7 +263,7 @@ def _serve_symlink_docs(request, project, privacy_level, filename=''): | |
files_tried.append(os.path.join(basepath, filename)) | ||
|
||
raise Http404( | ||
'File not found. Tried these files: %s' % ','.join(files_tried), | ||
'File not found. Tried these files: {}'.format(','.join(files_tried)), | ||
) | ||
|
||
|
||
|
@@ -309,4 +310,121 @@ def robots_txt(request, project): | |
if os.path.exists(fullpath): | ||
return HttpResponse(open(fullpath).read(), content_type='text/plain') | ||
|
||
return HttpResponse('User-agent: *\nAllow: /\n', content_type='text/plain') | ||
sitemap_url = '{scheme}://{domain}/sitemap.xml'.format( | ||
scheme='https', | ||
domain=project.subdomain(), | ||
) | ||
return HttpResponse( | ||
'User-agent: *\nAllow: /\nSitemap: {}\n'.format(sitemap_url), | ||
content_type='text/plain', | ||
) | ||
|
||
|
||
@map_project_slug | ||
@cache_page(60 * 60 * 24 * 3) # 3 days | ||
def sitemap_xml(request, project): | ||
""" | ||
Generate and serve a ``sitemap.xml`` for a particular ``project``. | ||
|
||
The sitemap is generated from all the ``active`` and public versions of | ||
``project``. These versions are sorted by using semantic versioning | ||
prepending ``latest`` and ``stable`` (if they are enabled) at the beginning. | ||
|
||
Following this order, the versions are assigned priorities and change | ||
frequency. Starting from 1 and decreasing by 0.1 for priorities and starting | ||
from daily, weekly to monthly for change frequency. | ||
|
||
If the project is private, the view raises ``Http404``. On the other hand, | ||
if the project is public but a version is private, this one is not included | ||
in the sitemap. | ||
|
||
:param request: Django request object | ||
:param project: Project instance to generate the sitemap | ||
|
||
:returns: response with the ``sitemap.xml`` template rendered | ||
|
||
:rtype: django.http.HttpResponse | ||
""" | ||
|
||
def priorities_generator(): | ||
""" | ||
Generator returning ``priority`` needed by sitemap.xml. | ||
|
||
It generates values from 1 to 0.1 by decreasing in 0.1 on each | ||
iteration. After 0.1 is reached, it will keep returning 0.1. | ||
""" | ||
priorities = [1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2] | ||
yield from itertools.chain(priorities, itertools.repeat(0.1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo on line 288: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed! |
||
|
||
def changefreqs_generator(): | ||
""" | ||
Generator returning ``changefreq`` needed by sitemap.xml. | ||
|
||
It returns ``daily`` on first iteration, then ``weekly`` and then it | ||
will return always ``monthly``. | ||
|
||
We are using ``monthly`` as last value because ``never`` is too | ||
aggressive. If the tag is removed and a branch is created with the same | ||
name, we will want bots to revisit this. | ||
""" | ||
changefreqs = ['daily', 'weekly'] | ||
yield from itertools.chain(changefreqs, itertools.repeat('monthly')) | ||
|
||
if project.privacy_level == constants.PRIVATE: | ||
raise Http404 | ||
|
||
sorted_versions = sort_version_aware( | ||
project.versions.filter( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason not to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe it also defaults to only active projects, but can pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No specific reason. I just changed to |
||
active=True, | ||
privacy_level=constants.PUBLIC, | ||
), | ||
) | ||
|
||
versions = [] | ||
for version, priority, changefreq in zip( | ||
sorted_versions, | ||
priorities_generator(), | ||
changefreqs_generator(), | ||
): | ||
element = { | ||
'loc': version.get_subdomain_url(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The URL should be properly escaped: https://www.sitemaps.org/protocol.html#escaping There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if there is something we need to do here, actually. |
||
'priority': priority, | ||
'changefreq': changefreq, | ||
'languages': [], | ||
} | ||
|
||
# Version can be enabled, but not ``built`` yet. We want to show the | ||
# link without a ``lastmod`` attribute | ||
last_build = version.builds.order_by('-date').first() | ||
if last_build: | ||
element['lastmod'] = last_build.date.isoformat() | ||
|
||
if project.translations.exists(): | ||
for translation in project.translations.all(): | ||
href = project.get_docs_url( | ||
version_slug=version.slug, | ||
lang_slug=translation.language, | ||
private=version.privacy_level == constants.PRIVATE, | ||
) | ||
element['languages'].append({ | ||
'hreflang': translation.language, | ||
'href': href, | ||
}) | ||
|
||
# Add itself also as protocol requires | ||
element['languages'].append({ | ||
'hreflang': project.language, | ||
'href': element['loc'], | ||
}) | ||
|
||
versions.append(element) | ||
|
||
context = { | ||
'versions': versions, | ||
} | ||
return render( | ||
request, | ||
'sitemap.xml', | ||
context, | ||
content_type='application/xml', | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" | ||
xmlns:xhtml="http://www.w3.org/1999/xhtml"> | ||
{% for version in versions %} | ||
<url> | ||
<loc>{{ version.loc }}</loc> | ||
{% for language in version.languages %} | ||
<xhtml:link | ||
rel="alternate" | ||
hreflang="{{ language.hreflang }}" | ||
href="{{ language.href }}"/> | ||
{% endfor %} | ||
{% if version.lastmod %} | ||
<lastmod>{{ version.lastmod }}</lastmod> | ||
{% endif %} | ||
<changefreq>{{ version.changefreq }}</changefreq> | ||
<priority>{{ version.priority }}</priority> | ||
</url> | ||
{% endfor %} | ||
</urlset> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this linked from anywhere? Should be in an toctree somewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I wanted to link it under
Feature Documentation
but I forgot to do it.I will move this file under
docs/features/sitemaps.rst
and will be linked automatically on that section.