Skip to content

Commit 730b0fd

Browse files
committed
Spam: deny dashboard on spammy projects
Quick/Initial implementation to deny showing a project's dashboard if spam score is above the threshold. - the old and non-used `ProjectSpamError` that checked for an invalid description was removed together with all its logic - the mixin `ProjectSpamMixin` was re-purposed to check the spam score and deny serving the dashboard - move "disable banned user to import projects" inside the `ImportWizardView` Note that currently we are only denying the dashboard for the project's detail ("Overview" in our UI) view. If we want to deny other pages like "Downloads", "Search", "Builds", "Versions" and "Admin" we need to adapt other views as well (e.g. migrate them from function-based view to class-based view to be able to re-use the mixin). This can be implemented in a future version. Together with this, we could only deny the dashboard to non-maintainers of the project itself, allowing them to make some changes in case of a mistake.
1 parent 55705d8 commit 730b0fd

File tree

11 files changed

+50
-140
lines changed

11 files changed

+50
-140
lines changed

pytest.ini

-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ markers =
99
python_files = tests.py test_*.py *_tests.py
1010
filterwarnings =
1111
# Ignore external dependencies warning deprecations
12-
# textclassifier
13-
ignore:The 'warn' method is deprecated, use 'warning' instead:DeprecationWarning
1412
# django-rest-framework
1513
ignore:Pagination may yield inconsistent results with an unordered object_list.*:django.core.paginator.UnorderedObjectListWarning
1614
# docutils

readthedocs/projects/exceptions.py

-10
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,3 @@ def get_default_message(self):
5151
if settings.ALLOW_PRIVATE_REPOS:
5252
return self.PRIVATE_ALLOWED
5353
return self.PRIVATE_NOT_ALLOWED
54-
55-
56-
class ProjectSpamError(Exception):
57-
58-
"""
59-
Error raised when a project field has detected spam.
60-
61-
This error is not raised to users, we use this for banning users in the
62-
background.
63-
"""

readthedocs/projects/forms.py

-3
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,13 @@
1111
from django.contrib.auth.models import User
1212
from django.template.loader import render_to_string
1313
from django.utils.translation import ugettext_lazy as _
14-
from textclassifier.validators import ClassifierValidator
1514

1615
from readthedocs.builds.constants import INTERNAL
1716
from readthedocs.core.history import SimpleHistoryModelForm
1817
from readthedocs.core.utils import slugify, trigger_build
1918
from readthedocs.core.utils.extend import SettingsOverrideObject
2019
from readthedocs.integrations.models import Integration
2120
from readthedocs.oauth.models import RemoteRepository
22-
from readthedocs.projects.exceptions import ProjectSpamError
2321
from readthedocs.projects.models import (
2422
Domain,
2523
EmailHook,
@@ -173,7 +171,6 @@ class Meta:
173171
)
174172

175173
description = forms.CharField(
176-
validators=[ClassifierValidator(raises=ProjectSpamError)],
177174
required=False,
178175
max_length=150,
179176
widget=forms.Textarea,

readthedocs/projects/views/base.py

+15-27
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
"""Mix-in classes for project views."""
22
import structlog
3-
from datetime import timedelta
43
from functools import lru_cache
54

65
from django.conf import settings
76
from django.http import HttpResponseRedirect
8-
from django.shortcuts import get_object_or_404
7+
from django.shortcuts import render, get_object_or_404
98
from django.urls import reverse
10-
from django.utils import timezone
119

12-
from readthedocs.projects.exceptions import ProjectSpamError
1310
from readthedocs.projects.models import Project
1411

1512
log = structlog.get_logger(__name__)
@@ -86,26 +83,17 @@ def get_form(self, data=None, files=None, **kwargs):
8683

8784
class ProjectSpamMixin:
8885

89-
"""Protects POST views from spammers."""
90-
91-
def post(self, request, *args, **kwargs):
92-
log.bind(user_username=request.user.username)
93-
if request.user.profile.banned:
94-
log.info('Rejecting project POST from shadowbanned user.')
95-
return HttpResponseRedirect(self.get_failure_url())
96-
try:
97-
return super().post(request, *args, **kwargs)
98-
except ProjectSpamError:
99-
date_maturity = timezone.now() - timedelta(
100-
days=settings.USER_MATURITY_DAYS
101-
)
102-
if request.user.date_joined > date_maturity:
103-
request.user.profile.banned = True
104-
request.user.profile.save()
105-
log.info('Spam detected from new user, shadowbanned user.')
106-
else:
107-
log.info('Spam detected from user.')
108-
return HttpResponseRedirect(self.get_failure_url())
109-
110-
def get_failure_url(self):
111-
return reverse('homepage')
86+
"""
87+
Protects views for spammy projects.
88+
89+
It shows a ``Project marked as spam`` page and return 410 GONE if the
90+
project's dashboard is denied.
91+
"""
92+
93+
def get(self, request, *args, **kwargs):
94+
if 'readthedocsext.spamfighting' in settings.INSTALLED_APPS:
95+
from readthedocsext.spamfighting.utils import is_show_dashboard_denied # noqa
96+
if is_show_dashboard_denied(self.get_project()):
97+
return render(request, template_name='spam.html', status=410)
98+
99+
return super().get(request, *args, **kwargs)

readthedocs/projects/views/mixins.py

+11
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@
44
from celery import chain
55
from django.shortcuts import get_object_or_404
66

7+
import structlog
8+
79
from readthedocs.core.resolver import resolve, resolve_path
810
from readthedocs.core.utils import prepare_build
911
from readthedocs.projects.models import Project
1012
from readthedocs.projects.signals import project_import
1113

1214

15+
log = structlog.get_logger(__name__)
16+
17+
1318
class ProjectRelationMixin:
1419

1520
"""
@@ -117,6 +122,12 @@ def finish_import_project(self, request, project, tags=None):
117122
for tag in tags:
118123
project.tags.add(tag)
119124

125+
log.info(
126+
'Project imported.',
127+
project_slug=project.slug,
128+
user_username=request.user.username,
129+
)
130+
120131
# TODO: this signal could be removed, or used for sync task
121132
project_import.send(sender=project, request=request)
122133

readthedocs/projects/views/private.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
)
7474
from readthedocs.projects.notifications import EmailConfirmNotification
7575
from readthedocs.projects.utils import get_csv_file
76-
from readthedocs.projects.views.base import ProjectAdminMixin, ProjectSpamMixin
76+
from readthedocs.projects.views.base import ProjectAdminMixin
7777
from readthedocs.projects.views.mixins import (
7878
ProjectImportMixin,
7979
ProjectRelationListMixin,
@@ -143,7 +143,7 @@ def get_queryset(self):
143143
return self.model.objects.for_admin_user(self.request.user)
144144

145145

146-
class ProjectUpdate(ProjectSpamMixin, ProjectMixin, UpdateView):
146+
class ProjectUpdate(ProjectMixin, UpdateView):
147147

148148
form_class = UpdateProjectForm
149149
success_message = _('Project settings updated')
@@ -153,7 +153,7 @@ def get_success_url(self):
153153
return reverse('projects_detail', args=[self.object.slug])
154154

155155

156-
class ProjectAdvancedUpdate(ProjectSpamMixin, ProjectMixin, UpdateView):
156+
class ProjectAdvancedUpdate(ProjectMixin, UpdateView):
157157

158158
form_class = ProjectAdvancedForm
159159
success_message = _('Project settings updated')
@@ -253,10 +253,7 @@ def post(self, request, *args, **kwargs):
253253
return HttpResponseRedirect(self.get_success_url())
254254

255255

256-
class ImportWizardView(
257-
ProjectImportMixin, ProjectSpamMixin, PrivateViewMixin,
258-
SessionWizardView,
259-
):
256+
class ImportWizardView(ProjectImportMixin, PrivateViewMixin, SessionWizardView):
260257

261258
"""
262259
Project import wizard.
@@ -287,10 +284,17 @@ def _set_initial_dict(self):
287284
else:
288285
self.initial_dict = self.storage.data.get(self.initial_dict_key, {})
289286

290-
def post(self, *args, **kwargs):
287+
def post(self, request, *args, **kwargs):
291288
self._set_initial_dict()
289+
290+
log.bind(user_username=request.user.username)
291+
292+
if request.user.profile.banned:
293+
log.info('Rejecting project POST from shadowbanned user.')
294+
return HttpResponseRedirect(reverse('homepage'))
295+
292296
# The storage is reset after everything is done.
293-
return super().post(*args, **kwargs)
297+
return super().post(request, *args, **kwargs)
294298

295299
def get_form_kwargs(self, step=None):
296300
"""Get args to pass into form instantiation."""

readthedocs/projects/views/public.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
from readthedocs.storage import build_media_storage
4141

4242
from ..constants import PRIVATE
43-
from .base import ProjectOnboardMixin
43+
from .base import ProjectOnboardMixin, ProjectSpamMixin
4444

4545
log = structlog.get_logger(__name__)
4646
search_log = structlog.get_logger(__name__ + '.search')
@@ -87,8 +87,13 @@ def project_redirect(request, invalid_project_slug):
8787
))
8888

8989

90-
class ProjectDetailViewBase(ProjectRelationListMixin, BuildTriggerMixin,
91-
ProjectOnboardMixin, DetailView):
90+
class ProjectDetailViewBase(
91+
ProjectSpamMixin,
92+
ProjectRelationListMixin,
93+
BuildTriggerMixin,
94+
ProjectOnboardMixin,
95+
DetailView,
96+
):
9297

9398
"""Display project onboard steps."""
9499

readthedocs/rtd_tests/tests/test_project_forms.py

-16
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from django.test.utils import override_settings
66
from django.utils.translation import ugettext_lazy as _
77
from django_dynamic_fixture import get
8-
from textclassifier.validators import ClassifierValidator
98

109
from readthedocs.builds.constants import EXTERNAL, LATEST, STABLE
1110
from readthedocs.builds.models import Version
@@ -16,7 +15,6 @@
1615
REPO_TYPE_HG,
1716
SPHINX,
1817
)
19-
from readthedocs.projects.exceptions import ProjectSpamError
2018
from readthedocs.projects.forms import (
2119
EmailHookForm,
2220
EnvironmentVariableForm,
@@ -36,20 +34,6 @@
3634

3735
class TestProjectForms(TestCase):
3836

39-
@mock.patch.object(ClassifierValidator, '__call__')
40-
def test_form_spam(self, mocked_validator):
41-
"""Form description field fails spam validation."""
42-
mocked_validator.side_effect = ProjectSpamError
43-
44-
data = {
45-
'description': 'foo',
46-
'documentation_type': 'sphinx',
47-
'language': 'en',
48-
}
49-
form = ProjectExtraForm(data)
50-
with self.assertRaises(ProjectSpamError):
51-
form.is_valid()
52-
5337
def test_import_repo_url(self):
5438
"""Validate different type of repository URLs on importing a Project."""
5539

readthedocs/rtd_tests/tests/test_project_views.py

+3-63
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from readthedocs.oauth.models import RemoteRepository, RemoteRepositoryRelation
1717
from readthedocs.organizations.models import Organization
1818
from readthedocs.projects.constants import PUBLIC
19-
from readthedocs.projects.exceptions import ProjectSpamError
2019
from readthedocs.projects.models import Domain, Project, WebHook, WebHookEvent
2120
from readthedocs.projects.views.mixins import ProjectRelationMixin
2221
from readthedocs.projects.views.private import ImportWizardView
@@ -25,7 +24,7 @@
2524

2625

2726
@mock.patch('readthedocs.projects.tasks.update_docs_task', mock.MagicMock())
28-
class TestProfileMiddleware(RequestFactoryTestMixin, TestCase):
27+
class TestImportProjectBannedUser(RequestFactoryTestMixin, TestCase):
2928

3029
wizard_class_slug = 'import_wizard_view'
3130
url = '/dashboard/import/manual/'
@@ -50,26 +49,15 @@ def setUp(self):
5049
for (k, v) in list(data[key].items())})
5150
self.data['{}-current_step'.format(self.wizard_class_slug)] = 'extra'
5251

53-
def test_profile_middleware_no_profile(self):
52+
def test_not_banned_user(self):
5453
"""User without profile and isn't banned."""
5554
req = self.request(method='post', path='/projects/import', data=self.data)
5655
req.user = get(User, profile=None)
5756
resp = ImportWizardView.as_view()(req)
5857
self.assertEqual(resp.status_code, 302)
5958
self.assertEqual(resp['location'], '/projects/foobar/')
6059

61-
@mock.patch('readthedocs.projects.views.private.ProjectBasicsForm.clean')
62-
def test_profile_middleware_spam(self, form):
63-
"""User will be banned."""
64-
form.side_effect = ProjectSpamError
65-
req = self.request(method='post', path='/projects/import', data=self.data)
66-
req.user = get(User)
67-
resp = ImportWizardView.as_view()(req)
68-
self.assertEqual(resp.status_code, 302)
69-
self.assertEqual(resp['location'], '/')
70-
self.assertTrue(req.user.profile.banned)
71-
72-
def test_profile_middleware_banned(self):
60+
def test_banned_user(self):
7361
"""User is banned."""
7462
req = self.request(method='post', path='/projects/import', data=self.data)
7563
req.user = get(User)
@@ -296,54 +284,6 @@ def test_remote_repository_is_added(self):
296284
self.assertIsNotNone(proj)
297285
self.assertEqual(proj.remote_repository, remote_repo)
298286

299-
@mock.patch(
300-
'readthedocs.projects.views.private.ProjectExtraForm.clean_description',
301-
create=True,
302-
)
303-
def test_form_spam(self, mocked_validator):
304-
"""Don't add project on a spammy description."""
305-
self.user.date_joined = timezone.now() - timedelta(days=365)
306-
self.user.save()
307-
mocked_validator.side_effect = ProjectSpamError
308-
309-
with self.assertRaises(Project.DoesNotExist):
310-
proj = Project.objects.get(name='foobar')
311-
312-
resp = self.post_step('basics')
313-
self.assertWizardResponse(resp, 'extra')
314-
resp = self.post_step('extra', session=list(resp._request.session.items()))
315-
self.assertIsInstance(resp, HttpResponseRedirect)
316-
self.assertEqual(resp.status_code, 302)
317-
self.assertEqual(resp['location'], '/')
318-
319-
with self.assertRaises(Project.DoesNotExist):
320-
proj = Project.objects.get(name='foobar')
321-
self.assertFalse(self.user.profile.banned)
322-
323-
@mock.patch(
324-
'readthedocs.projects.views.private.ProjectExtraForm.clean_description',
325-
create=True,
326-
)
327-
def test_form_spam_ban_user(self, mocked_validator):
328-
"""Don't add spam and ban new user."""
329-
self.user.date_joined = timezone.now()
330-
self.user.save()
331-
mocked_validator.side_effect = ProjectSpamError
332-
333-
with self.assertRaises(Project.DoesNotExist):
334-
proj = Project.objects.get(name='foobar')
335-
336-
resp = self.post_step('basics')
337-
self.assertWizardResponse(resp, 'extra')
338-
resp = self.post_step('extra', session=list(resp._request.session.items()))
339-
self.assertIsInstance(resp, HttpResponseRedirect)
340-
self.assertEqual(resp.status_code, 302)
341-
self.assertEqual(resp['location'], '/')
342-
343-
with self.assertRaises(Project.DoesNotExist):
344-
proj = Project.objects.get(name='foobar')
345-
self.assertTrue(self.user.profile.banned)
346-
347287

348288
@mock.patch('readthedocs.core.utils.trigger_build', mock.MagicMock())
349289
class TestPublicViews(TestCase):

readthedocs/settings/base.py

-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,6 @@ def INSTALLED_APPS(self): # noqa
170170
'rest_framework',
171171
'rest_framework.authtoken',
172172
'corsheaders',
173-
'textclassifier',
174173
'annoying',
175174
'django_extensions',
176175
'crispy_forms',

requirements/pip.txt

-6
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,6 @@ django-crispy-forms==1.13.0
7878

7979
docker==5.0.3
8080

81-
django-textclassifier==1.0
82-
# django-textclassifier doesn't have pinned versions
83-
# if there is an update they could break our code
84-
nltk==3.6.6
85-
textblob==0.17.1
86-
8781
django-annoying==0.10.6
8882
django-messages-extends==0.6.2
8983
djangorestframework-jsonp==1.0.2

0 commit comments

Comments
 (0)