Skip to content

Commit 4cc9269

Browse files
authored
Merge pull request #8632 from readthedocs/humitos/spamfighting
2 parents 2552bb6 + 06dc7ed commit 4cc9269

File tree

8 files changed

+146
-9
lines changed

8 files changed

+146
-9
lines changed

media/images/header-logo.png

9.75 KB
Loading

readthedocs/projects/admin.py

Lines changed: 79 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Django administration interface for `projects.models`."""
22

3+
from django.db.models import Sum
4+
from django.conf import settings
35
from django.contrib import admin, messages
46
from django.contrib.admin.actions import delete_selected
57
from django.forms import BaseInlineFormSet
@@ -143,42 +145,90 @@ def queryset(self, request, queryset):
143145
return queryset
144146

145147

148+
class ProjectSpamThreshold(admin.SimpleListFilter):
149+
150+
"""Filter for projects that are potentially SPAM."""
151+
152+
title = 'Spam Threshold'
153+
parameter_name = 'spam_threshold'
154+
155+
DONT_SHOW_ADS = 'dont_show_ads'
156+
DENY_ON_ROBOTS = 'deny_on_robots'
157+
DONT_SERVE_DOCS = 'dont_serve_docs'
158+
DONT_SHOW_DASHBOARD = 'dont_show_dashboard'
159+
DELETE_PROJECT = 'delete_project'
160+
161+
def lookups(self, request, model_admin):
162+
return (
163+
(self.DONT_SHOW_ADS, _("Don't show Ads")),
164+
(self.DENY_ON_ROBOTS, _('Deny on robots')),
165+
(self.DONT_SERVE_DOCS, _("Don't serve docs")),
166+
(self.DONT_SHOW_DASHBOARD, _("Don't show dashboard")),
167+
(self.DELETE_PROJECT, _('Delete project')),
168+
)
169+
170+
def queryset(self, request, queryset):
171+
queryset = queryset.annotate(spam_score=Sum('spam_rules__value'))
172+
if self.value() == self.DONT_SHOW_ADS:
173+
return queryset.filter(spam_score__gte=settings.RTD_SPAM_THRESHOLD_DONT_SHOW_ADS)
174+
if self.value() == self.DENY_ON_ROBOTS:
175+
return queryset.filter(spam_score__gte=settings.RTD_SPAM_THRESHOLD_DENY_ON_ROBOTS)
176+
if self.value() == self.DONT_SERVE_DOCS:
177+
return queryset.filter(spam_score__gte=settings.RTD_SPAM_THRESHOLD_DONT_SERVE_DOCS)
178+
if self.value() == self.DONT_SHOW_DASHBOARD:
179+
return queryset.filter(spam_score__gte=settings.RTD_SPAM_THRESHOLD_DONT_SHOW_DASHBOARD)
180+
if self.value() == self.DELETE_PROJECT:
181+
return queryset.filter(spam_score__gte=settings.RTD_SPAM_THRESHOLD_DELETE_PROJECT)
182+
return queryset
183+
184+
146185
class ProjectAdmin(ExtraSimpleHistoryAdmin):
147186

148187
"""Project model admin view."""
149188

150189
prepopulated_fields = {'slug': ('name',)}
151-
list_display = ('name', 'slug', 'repo', 'repo_type', 'featured')
152-
list_filter = (
190+
list_display = ('name', 'slug', 'repo')
191+
192+
list_filter = tuple()
193+
if 'readthedocsext.spamfighting' in settings.INSTALLED_APPS:
194+
list_filter = list_filter + (ProjectSpamThreshold,)
195+
196+
list_filter = list_filter + (
197+
ProjectOwnerBannedFilter,
198+
'feature__feature_id',
153199
'repo_type',
154-
'featured',
155200
'privacy_level',
156-
'documentation_type',
157201
'programming_language',
158-
'feature__feature_id',
159-
ProjectOwnerBannedFilter,
202+
'documentation_type',
160203
)
161-
list_editable = ('featured',)
204+
162205
search_fields = ('slug', 'repo')
163206
inlines = [
164207
ProjectRelationshipInline,
165208
RedirectInline,
166209
VersionInline,
167210
DomainInline,
168211
]
169-
readonly_fields = ('pub_date', 'feature_flags',)
212+
readonly_fields = ('pub_date', 'feature_flags', 'matching_spam_rules')
170213
raw_id_fields = ('users', 'main_language_project', 'remote_repository')
171214
actions = [
172215
'send_owner_email',
173216
'ban_owner',
217+
'run_spam_rule_checks',
174218
'build_default_version',
175219
'reindex_active_versions',
176220
'wipe_all_versions',
177221
'import_tags_from_vcs',
178222
]
179223

224+
def matching_spam_rules(self, obj):
225+
result = []
226+
for spam_rule in obj.spam_rules.filter(enabled=True):
227+
result.append(f'{spam_rule.spam_rule_type} ({spam_rule.value})')
228+
return '\n'.join(result) or 'No matching spam rules'
229+
180230
def feature_flags(self, obj):
181-
return ', '.join([str(f.get_feature_display()) for f in obj.features])
231+
return '\n'.join([str(f.get_feature_display()) for f in obj.features])
182232

183233
def send_owner_email(self, request, queryset):
184234
view = ProjectSendNotificationView.as_view(
@@ -188,6 +238,26 @@ def send_owner_email(self, request, queryset):
188238

189239
send_owner_email.short_description = 'Notify project owners'
190240

241+
def run_spam_rule_checks(self, request, queryset):
242+
"""Run all the spam checks on this project."""
243+
if 'readthedocsext.spamfighting' not in settings.INSTALLED_APPS:
244+
messages.add_message(
245+
request,
246+
messages.ERROR,
247+
'Spam fighting Django application not installed',
248+
)
249+
return
250+
251+
from readthedocsext.spamfighting.tasks import spam_rules_check # noqa
252+
project_slugs = queryset.values_list('slug', flat=True)
253+
# NOTE: convert queryset to a simple list so Celery can serialize it
254+
spam_rules_check.delay(project_slugs=list(project_slugs))
255+
messages.add_message(
256+
request,
257+
messages.INFO,
258+
'Spam check task triggered for {} projects'.format(queryset.count()),
259+
)
260+
191261
def ban_owner(self, request, queryset):
192262
"""
193263
Ban project owner.

readthedocs/projects/models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,10 @@ def show_advertising(self):
12521252
if self.ad_free or self.gold_owners.exists():
12531253
return False
12541254

1255+
if 'readthedocsext.spamfighting' in settings.INSTALLED_APPS:
1256+
from readthedocsext.spamfighting.utils import is_show_ads_denied # noqa
1257+
return not is_show_ads_denied(self)
1258+
12551259
return True
12561260

12571261
def environment_variables(self, *, public_only=True):

readthedocs/proxito/views/mixins.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ def get_version_from_host(self, request, version_slug):
176176
version_slug = request.host_version_slug
177177
return version_slug
178178

179+
def _spam_response(self, request, project):
180+
if 'readthedocsext.spamfighting' in settings.INSTALLED_APPS:
181+
from readthedocsext.spamfighting.utils import is_serve_docs_denied # noqa
182+
if is_serve_docs_denied(project):
183+
return render(request, template_name='spam.html', status=401)
184+
179185

180186
class ServeRedirectMixin:
181187

readthedocs/proxito/views/serve.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from urllib.parse import urlparse
66

77
from readthedocs.core.resolver import resolve_path
8+
from django.conf import settings
89
from django.http import Http404, HttpResponse, HttpResponseRedirect
910
from django.shortcuts import render
1011
from django.urls import resolve as url_resolve
@@ -82,6 +83,11 @@ def get(self,
8283
final_project.slug, subproject_slug, lang_slug, version_slug, filename
8384
)
8485

86+
# Verify if the project is marked as spam and return a 401 in that case
87+
spam_response = self._spam_response(request, final_project)
88+
if spam_response:
89+
return spam_response
90+
8591
# Handle requests that need canonicalizing (eg. HTTP -> HTTPS, redirect to canonical domain)
8692
if hasattr(request, 'canonicalize'):
8793
try:
@@ -348,6 +354,16 @@ def get(self, request, project):
348354
project, we serve it directly.
349355
"""
350356

357+
# Verify if the project is marked as spam and return a custom robots.txt
358+
if 'readthedocsext.spamfighting' in settings.INSTALLED_APPS:
359+
from readthedocsext.spamfighting.utils import is_robotstxt_denied # noqa
360+
if is_robotstxt_denied(project):
361+
return render(
362+
request,
363+
'robots.spam.txt',
364+
content_type='text/plain',
365+
)
366+
351367
# Use the ``robots.txt`` file from the default version configured
352368
version_slug = project.get_default_version()
353369
version = project.versions.get(slug=version_slug)

readthedocs/settings/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,3 +848,9 @@ def DOCKER_LIMITS(self):
848848
RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT = 1
849849
RTD_EMBED_API_DOMAIN_RATE_LIMIT = 50
850850
RTD_EMBED_API_DOMAIN_RATE_LIMIT_TIMEOUT = 60
851+
852+
RTD_SPAM_THRESHOLD_DONT_SHOW_ADS = 100
853+
RTD_SPAM_THRESHOLD_DENY_ON_ROBOTS = 200
854+
RTD_SPAM_THRESHOLD_DONT_SHOW_DASHBOARD = 300
855+
RTD_SPAM_THRESHOLD_DONT_SERVE_DOCS = 500
856+
RTD_SPAM_THRESHOLD_DELETE_PROJECT = 1000

readthedocs/templates/robots.spam.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Spam project detected, removing from search indexing
2+
User-agent: *
3+
Disallow: /

readthedocs/templates/spam.html

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
{% load static %}
2+
<!DOCTYPE html>
3+
<html lang=en>
4+
<head>
5+
<meta charset=utf-8>
6+
<meta name=viewport content="width=device-width, initial-scale=1, shrink-to-fit=no">
7+
<link rel="icon" type="image/png" href="{% static 'images/favicon.png' %}">
8+
<link rel="stylesheet" href="{% static 'css/core.css' %}">
9+
<title>Spam Project</title>
10+
<style>
11+
main {
12+
max-width: 600px;
13+
margin: 4rem auto;
14+
text-align: center;
15+
}
16+
img {
17+
width: 100%;
18+
max-width: 500px;
19+
}
20+
</style>
21+
</head>
22+
<body>
23+
<main>
24+
<img alt="Read the Docs Logo" src={% static 'images/header-logo.png' %}>
25+
<h1>Project marked as spam</h1>
26+
<p>Read the Docs has marked this content as spam and is not serving it anymore.</p>
27+
{# We can't use url templatetag here because the URL is not defined in El Proxito #}
28+
<p>Please <a href="https://{{ PUBLIC_DOMAIN }}/support/">contact us</a> if you think this is a mistake.</p>
29+
<p style="color: #666;">401 - Unauthorized</p>
30+
</main>
31+
</body>
32+
</html>

0 commit comments

Comments
 (0)