Skip to content

Commit 5661e74

Browse files
committed
Cleanup project tags
- Adds a management command to clean up tags - Cleanup involves lowercasing and slugifying (canonicalizing) - Future tags will come in canonicalized
1 parent c6bacb0 commit 5661e74

File tree

3 files changed

+99
-0
lines changed

3 files changed

+99
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
"""
2+
Cleanup project tags
3+
4+
This specifically aims to cleanup:
5+
6+
- Differences only in lowercase/uppercase
7+
- Slugify all tags
8+
- Remove tags with no projects (old & spam mostly)
9+
"""
10+
11+
from django.core.management.base import BaseCommand
12+
from django.db.models import Count
13+
from taggit.models import Tag
14+
from taggit.utils import parse_tags, edit_string_for_tags
15+
16+
from readthedocs.projects.models import Project
17+
18+
19+
class Command(BaseCommand):
20+
21+
help = __doc__
22+
dry_run = False
23+
24+
def reprocess_tags(self):
25+
self.stdout.write('Reprocessing tags (lowercasing, slugifying, etc.)...')
26+
project_total = Project.objects.count()
27+
28+
# Use an iterator so the queryset isn't stored in memory
29+
# This may take a long time but should be memory efficient
30+
for i, project in enumerate(Project.objects.iterator()):
31+
old_tags_objs = list(project.tags.all())
32+
33+
if old_tags_objs:
34+
old_tags = sorted([t.name for t in old_tags_objs])
35+
old_tag_string = edit_string_for_tags(old_tags_objs)
36+
new_tags = parse_tags(old_tag_string)
37+
38+
# Update the tags on the project if they are different
39+
# Note: "parse_tags" handles sorting
40+
if new_tags != old_tags:
41+
if not self.dry_run:
42+
self.stdout.write(
43+
'[{}/{}] Setting tags on "{}"'.format(i, project_total, project.slug)
44+
)
45+
project.tags.set(*new_tags)
46+
else:
47+
self.stdout.write(
48+
'[{}/{}] Not setting tags on "{}" (dry run)'.format(
49+
i,
50+
project_total,
51+
project.slug,
52+
),
53+
)
54+
55+
def remove_tags_with_no_projects(self):
56+
self.stdout.write('Removing tags with no projects...')
57+
for tag in Tag.objects.all().annotate(num=Count('taggit_taggeditem_items')).filter(num=0):
58+
if not self.dry_run:
59+
self.stdout.write('Removing tag {}'.format(tag.name))
60+
tag.delete()
61+
else:
62+
self.stdout.write('Not removing tag "{}" (dry run)'.format(tag.name))
63+
64+
def add_arguments(self, parser):
65+
parser.add_argument(
66+
"--dry-run",
67+
action="store_true",
68+
help="Don't actually perform the actions, just print output",
69+
)
70+
71+
def handle(self, *args, **options):
72+
self.dry_run = options["dry_run"]
73+
74+
self.reprocess_tags()
75+
self.remove_tags_with_no_projects()

readthedocs/core/tag_utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""Customizations to Django Taggit"""
2+
from django.utils.text import slugify
3+
from taggit.utils import _parse_tags
4+
5+
6+
def rtd_parse_tags(tag_string):
7+
"""
8+
Parses a string into its tags
9+
10+
- Lowercases all tags
11+
- Slugifies tags
12+
13+
:see: https://django-taggit.readthedocs.io/page/custom_tagging.html
14+
:param tag_string: a delimited string of tags
15+
:return: a sorted list of tag strings
16+
"""
17+
if tag_string:
18+
tag_string = tag_string.lower()
19+
20+
return [slugify(tag) for tag in _parse_tags(tag_string)]

readthedocs/settings/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ def USE_PROMOS(self): # noqa
463463

464464
INTERNAL_IPS = ('127.0.0.1',)
465465

466+
# Taggit
467+
# https://django-taggit.readthedocs.io
468+
TAGGIT_TAGS_FROM_STRING = 'readthedocs.core.tag_utils.rtd_parse_tags'
469+
466470
# Stripe
467471
STRIPE_SECRET = None
468472
STRIPE_PUBLISHABLE = None

0 commit comments

Comments
 (0)