Skip to content

Commit f06271b

Browse files
authored
Merge pull request #5086 from rtfd/humitos/custom-robots-txt
Support custom robots.txt
2 parents 1f8443c + 3e4b1a4 commit f06271b

File tree

4 files changed

+161
-1
lines changed

4 files changed

+161
-1
lines changed

docs/faq.rst

+46
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,49 @@ What commit of Read the Docs is in production?
230230
----------------------------------------------
231231

232232
We deploy readthedocs.org from the `rel` branch in our GitHub repository. You can see the latest commits that have been deployed by looking on GitHub: https://github.com/rtfd/readthedocs.org/commits/rel
233+
234+
235+
How can I avoid search results having a deprecated version of my docs?
236+
---------------------------------------------------------------------
237+
238+
If readers search something related to your docs in Google, it will probably return the most relevant version of your documentation.
239+
It may happen that this version is already deprecated and you want to stop Google indexing it as a result,
240+
and start suggesting the latest (or newer) one.
241+
242+
To accomplish this, you can add a ``robots.txt`` file to your documentation's root so it ends up served at the root URL of your project
243+
(for example, https://yourproject.readthedocs.io/robots.txt).
244+
245+
246+
Minimal example of ``robots.txt``
247+
+++++++++++++++++++++++++++++++++
248+
249+
::
250+
251+
User-agent: *
252+
Disallow: /en/deprecated-version/
253+
Disallow: /en/2.0/
254+
255+
.. note::
256+
257+
See `Google's docs`_ for its full syntax.
258+
259+
This file has to be served as is under ``/robots.txt``.
260+
Depending if you are using Sphinx or MkDocs, you will need a different configuration for this.
261+
262+
263+
Sphinx
264+
~~~~~~
265+
266+
Sphinx uses `html_extra`_ option to add static files to the output.
267+
You need to create a ``robots.txt`` file and put it under the path defined in ``html_extra``.
268+
269+
270+
MkDocs
271+
~~~~~~
272+
273+
MkDocs needs the ``robots.txt`` to be at the directory defined at `docs_dir`_ config.
274+
275+
276+
.. _Google's docs: https://support.google.com/webmasters/answer/6062608
277+
.. _html_extra: https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-html_extra_path
278+
.. _docs_dir: https://www.mkdocs.org/user-guide/configuration/#docs_dir

readthedocs/core/urls/subdomain.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
13
"""URL configurations for subdomains."""
24
from __future__ import absolute_import
35

@@ -10,7 +12,7 @@
1012

1113
from readthedocs.core.views.serve import (
1214
redirect_page_with_filename,
13-
redirect_project_slug, serve_docs
15+
redirect_project_slug, serve_docs, robots_txt,
1416
)
1517
from readthedocs.core.views import (
1618
server_error_500,
@@ -22,6 +24,8 @@
2224
handler404 = server_error_404
2325

2426
subdomain_urls = [
27+
url(r'robots.txt$', robots_txt, name='robots_txt'),
28+
2529
url(r'^(?:|projects/(?P<subproject_slug>{project_slug})/)'
2630
r'page/(?P<filename>.*)$'.format(**pattern_opts),
2731
redirect_page_with_filename,

readthedocs/core/views/serve.py

+46
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,49 @@ def _serve_symlink_docs(request, project, privacy_level, filename=''):
223223

224224
raise Http404(
225225
'File not found. Tried these files: %s' % ','.join(files_tried))
226+
227+
228+
@map_project_slug
229+
def robots_txt(request, project):
230+
"""
231+
Serve custom user's defined ``/robots.txt``.
232+
233+
If the user added a ``robots.txt`` in the "default version" of the project,
234+
we serve it directly.
235+
"""
236+
# Use the ``robots.txt`` file from the default version configured
237+
version_slug = project.get_default_version()
238+
version = project.versions.get(slug=version_slug)
239+
240+
no_serve_robots_txt = any([
241+
# If project is private or,
242+
project.privacy_level == constants.PRIVATE,
243+
# default version is private or,
244+
version.privacy_level == constants.PRIVATE,
245+
# default version is not active or,
246+
not version.active,
247+
# default version is not built
248+
not version.built,
249+
])
250+
if no_serve_robots_txt:
251+
# ... we do return a 404
252+
raise Http404()
253+
254+
filename = resolve_path(
255+
project,
256+
version_slug=version_slug,
257+
filename='robots.txt',
258+
subdomain=True, # subdomain will make it a "full" path without a URL prefix
259+
)
260+
261+
# This breaks path joining, by ignoring the root when given an "absolute" path
262+
if filename[0] == '/':
263+
filename = filename[1:]
264+
265+
basepath = PublicSymlink(project).project_root
266+
fullpath = os.path.join(basepath, filename)
267+
268+
if os.path.exists(fullpath):
269+
return HttpResponse(open(fullpath).read(), content_type='text/plain')
270+
271+
return HttpResponse('User-agent: *\nAllow: /\n', content_type='text/plain')

readthedocs/rtd_tests/tests/test_doc_serving.py

+64
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@
22

33
from __future__ import absolute_import, unicode_literals, division, print_function
44
import mock
5+
from mock import patch, mock_open
56
import django_dynamic_fixture as fixture
7+
import pytest
8+
import six
69

710
from django.contrib.auth.models import User
811
from django.test import TestCase
912
from django.test.utils import override_settings
1013
from django.http import Http404
1114
from django.conf import settings
15+
from django.urls import reverse
1216

1317
from readthedocs.rtd_tests.base import RequestFactoryTestMixin
1418
from readthedocs.projects import constants
@@ -77,6 +81,28 @@ def test_private_files_not_found(self):
7781
self.assertTrue('private_web_root' in str(exc.exception))
7882
self.assertTrue('public_web_root' not in str(exc.exception))
7983

84+
@override_settings(
85+
PYTHON_MEDIA=False,
86+
USE_SUBDOMAIN=True,
87+
PUBLIC_DOMAIN='readthedocs.io',
88+
ROOT_URLCONF=settings.SUBDOMAIN_URLCONF,
89+
)
90+
def test_robots_txt(self):
91+
self.public.versions.update(active=True, built=True)
92+
response = self.client.get(
93+
reverse('robots_txt'),
94+
HTTP_HOST='private.readthedocs.io',
95+
)
96+
self.assertEqual(response.status_code, 404)
97+
98+
self.client.force_login(self.eric)
99+
response = self.client.get(
100+
reverse('robots_txt'),
101+
HTTP_HOST='private.readthedocs.io',
102+
)
103+
# Private projects/versions always return 404 for robots.txt
104+
self.assertEqual(response.status_code, 404)
105+
80106

81107
@override_settings(SERVE_DOCS=[constants.PRIVATE, constants.PUBLIC])
82108
class TestPublicDocs(BaseDocServing):
@@ -110,3 +136,41 @@ def test_both_files_not_found(self):
110136
_serve_symlink_docs(request, project=self.private, filename='/en/latest/usage.html', privacy_level='public')
111137
self.assertTrue('private_web_root' not in str(exc.exception))
112138
self.assertTrue('public_web_root' in str(exc.exception))
139+
140+
@override_settings(
141+
PYTHON_MEDIA=False,
142+
USE_SUBDOMAIN=True,
143+
PUBLIC_DOMAIN='readthedocs.io',
144+
ROOT_URLCONF=settings.SUBDOMAIN_URLCONF,
145+
)
146+
def test_default_robots_txt(self):
147+
self.public.versions.update(active=True, built=True)
148+
response = self.client.get(
149+
reverse('robots_txt'),
150+
HTTP_HOST='public.readthedocs.io',
151+
)
152+
self.assertEqual(response.status_code, 200)
153+
self.assertEqual(response.content, b'User-agent: *\nAllow: /\n')
154+
155+
@override_settings(
156+
PYTHON_MEDIA=False,
157+
USE_SUBDOMAIN=True,
158+
PUBLIC_DOMAIN='readthedocs.io',
159+
ROOT_URLCONF=settings.SUBDOMAIN_URLCONF,
160+
)
161+
@patch(
162+
'builtins.open',
163+
new_callable=mock_open,
164+
read_data='My own robots.txt',
165+
)
166+
@patch('readthedocs.core.views.serve.os')
167+
@pytest.mark.skipif(six.PY2, reason='In Python2 the mock is __builtins__.open')
168+
def test_custom_robots_txt(self, os_mock, open_mock):
169+
os_mock.path.exists.return_value = True
170+
self.public.versions.update(active=True, built=True)
171+
response = self.client.get(
172+
reverse('robots_txt'),
173+
HTTP_HOST='public.readthedocs.io',
174+
)
175+
self.assertEqual(response.status_code, 200)
176+
self.assertEqual(response.content, b'My own robots.txt')

0 commit comments

Comments
 (0)