Skip to content

Commit 3dc3e60

Browse files
committed
Add a management command for importing tags from github
1 parent 6b936a7 commit 3dc3e60

File tree

2 files changed

+81
-3
lines changed

2 files changed

+81
-3
lines changed

readthedocs/core/management/commands/clean_tags.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,20 @@ def reprocess_tags(self):
4040
if new_tags != old_tags:
4141
if not self.dry_run:
4242
self.stdout.write(
43-
'[{}/{}] Setting tags on "{}"'.format(i, project_total, project.slug)
43+
'[{}/{}] Setting tags on "{}"'.format(
44+
i + 1,
45+
project_total,
46+
project.slug,
47+
)
4448
)
4549
project.tags.set(*new_tags)
4650
else:
4751
self.stdout.write(
4852
'[{}/{}] Not setting tags on "{}" (dry run)'.format(
49-
i,
53+
i + 1,
5054
project_total,
5155
project.slug,
52-
),
56+
)
5357
)
5458

5559
def remove_tags_with_no_projects(self):
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""
2+
Import a project's tags from GitHub.
3+
4+
Requires a ``GITHUB_AUTH_TOKEN`` to be set in the environment.
5+
This should be a "Personal access token" although no permissions are required.
6+
With the token, the rate limit is increased to 5,000 requests/hour
7+
8+
https://github.com/settings/tokens
9+
https://developer.github.com/v3/#rate-limiting
10+
"""
11+
12+
import os
13+
import time
14+
15+
import requests
16+
from django.core.management.base import BaseCommand, CommandError
17+
18+
from readthedocs.projects.constants import GITHUB_REGEXS
19+
from readthedocs.projects.models import Project
20+
21+
22+
class Command(BaseCommand):
23+
24+
help = __doc__
25+
26+
def handle(self, *args, **options):
27+
token = os.environ.get('GITHUB_AUTH_TOKEN')
28+
if not token:
29+
raise CommandError('Invalid GitHub token, exiting...')
30+
31+
queryset = Project.objects.filter(tags=None).filter(repo__contains='github.com')
32+
project_total = queryset.count()
33+
34+
for i, project in enumerate(queryset.iterator()):
35+
# Get the user and repo name from the URL as required by GitHub's API
36+
user = repo = ''
37+
for regex in GITHUB_REGEXS:
38+
match = regex.search(project.repo)
39+
if match:
40+
user, repo = match.groups()
41+
break
42+
43+
if not user:
44+
self.stderr.write(f'No GitHub repo for {project.repo}')
45+
continue
46+
47+
# https://developer.github.com/v3/repos/#list-all-topics-for-a-repository
48+
url = 'https://api.github.com/repos/{user}/{repo}/topics'.format(
49+
user=user,
50+
repo=repo,
51+
)
52+
headers = {
53+
'Authorization': 'token {token}'.format(token=token),
54+
55+
# Getting topics is a preview API and may change
56+
# It requires this custom Accept header
57+
'Accept': 'application/vnd.github.mercy-preview+json',
58+
}
59+
60+
self.stdout.write(
61+
'[{}/{}] Fetching tags for {}'.format(i + 1, project_total, project.slug)
62+
)
63+
64+
resp = requests.get(url, headers=headers)
65+
if resp.ok:
66+
tags = resp.json()['names']
67+
if tags:
68+
self.stdout.write('Setting tags for {}: {}'.format(project.slug, tags))
69+
project.tags.set(*tags)
70+
else:
71+
self.stderr.write('GitHub API error ({}): {}'.format(project.slug, resp.content))
72+
73+
# Sleeping half a second should keep us under 5k requests/hour
74+
time.sleep(0.5)

0 commit comments

Comments
 (0)