Skip to content

Sync RemoteRepository and RemoteOrganization in all VCS providers #7310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions readthedocs/oauth/services/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from allauth.socialaccount.models import SocialAccount
from allauth.socialaccount.providers import registry
from django.conf import settings
from django.db.models import Q
from django.utils import timezone
from oauthlib.oauth2.rfc6749.errors import InvalidClientIdError
from requests.exceptions import RequestException
Expand Down Expand Up @@ -181,11 +182,30 @@ def paginate(self, url, **kwargs):
url,
debug_data,
)
return []

return []

def sync(self):
"""Sync repositories and organizations."""
raise NotImplementedError
"""
Sync repositories (RemoteRepository) and organizations (RemoteOrganization).

- creates a new RemoteRepository/Organization per new repository
- updates fields for existing RemoteRepository/Organization
- deletes old RemoteRepository/Organization that are not present for this user
"""
repos = self.sync_repositories()
organizations, organization_repos = self.sync_organizations()

# Delete RemoteRepository where the user doesn't have access anymore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should run these delete tasks across all users as a one-time CLI command, to clean up our DB. There's likely a ton of these sitting around.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one-time command would require to sync all VCS account connected and delete all the old objects. That may be too much. We may want to limit the amount of accounts re-synced and run it several times by chunks. It will remove a lot of stale data.

However, as this cleanup is in the core of the sync action, all active accounts will be cleanup automatically when they login next time or, when they click the arrow from Import Project page.

# (skip RemoteRepository tied to a Project on this user)
repository_full_names = self.get_repository_full_names(repos + organization_repos)
self.user.oauth_repositories.exclude(
Q(full_name__in=repository_full_names) | Q(project__isnull=False)
).delete()

# Delete RemoteOrganization where the user doesn't have access anymore
organization_names = self.get_organization_names(organizations)
self.user.oauth_organizations.exclude(name__in=organization_names).delete()

def create_repository(self, fields, privacy=None, organization=None):
"""
Expand Down
34 changes: 25 additions & 9 deletions readthedocs/oauth/services/bitbucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,18 @@ class BitbucketService(Service):
url_pattern = re.compile(r'bitbucket.org')
https_url_pattern = re.compile(r'^https:\/\/[^@][email protected]/')

def sync(self):
"""Sync repositories and teams from Bitbucket API."""
self.sync_repositories()
self.sync_teams()

def sync_repositories(self):
"""Sync repositories from Bitbucket API."""
repos = []

# Get user repos
try:
repos = self.paginate(
'https://bitbucket.org/api/2.0/repositories/?role=member',
)
for repo in repos:
self.create_repository(repo)

except (TypeError, ValueError):
log.warning('Error syncing Bitbucket repositories')
raise SyncServiceError(
Expand All @@ -58,37 +56,49 @@ def sync_repositories(self):
resp = self.paginate(
'https://bitbucket.org/api/2.0/repositories/?role=admin',
)
repos = (
admin_repos = (
RemoteRepository.objects.filter(
users=self.user,
full_name__in=[r['full_name'] for r in resp],
account=self.account,
)
)
for repo in repos:
for repo in admin_repos:
repo.admin = True
repo.save()
except (TypeError, ValueError):
pass

def sync_teams(self):
"""Sync Bitbucket teams and team repositories."""
return repos

def sync_organizations(self):
"""Sync Bitbucket teams (our RemoteOrganization) and team repositories."""
teams = []
repositories = []

try:
teams = self.paginate(
'https://api.bitbucket.org/2.0/teams/?role=member',
)
for team in teams:
org = self.create_organization(team)
repos = self.paginate(team['links']['repositories']['href'])

# Add organization's repositories to the result
repositories.extend(repos)

for repo in repos:
self.create_repository(repo, organization=org)

except ValueError:
log.warning('Error syncing Bitbucket organizations')
raise SyncServiceError(
'Could not sync your Bitbucket team repositories, '
'try reconnecting your account',
)

return teams, repositories

def create_repository(self, fields, privacy=None, organization=None):
"""
Update or create a repository from Bitbucket API response.
Expand Down Expand Up @@ -180,6 +190,12 @@ def create_organization(self, fields):
organization.save()
return organization

def get_repository_full_names(self, repositories):
return {repository.get('full_name') for repository in repositories}

def get_organization_names(self, organizations):
return {organization.get('display_name') for organization in organizations}

def get_next_url_to_paginate(self, response):
return response.json().get('next')

Expand Down
36 changes: 21 additions & 15 deletions readthedocs/oauth/services/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from allauth.socialaccount.models import SocialToken
from allauth.socialaccount.providers.github.views import GitHubOAuth2Adapter

from django.conf import settings
from django.db.models import Q
from django.urls import reverse
Expand Down Expand Up @@ -33,34 +34,27 @@ class GitHubService(Service):
# TODO replace this with a less naive check
url_pattern = re.compile(r'github\.com')

def sync(self):
"""Sync repositories and organizations."""
repos = self.sync_repositories()
organization_repos = self.sync_organizations()

# Delete RemoteRepository where the user doesn't have access anymore
# (skip RemoteRepository tied to a Project on this user)
full_names = {repo.get('full_name') for repo in repos + organization_repos}
self.user.oauth_repositories.exclude(
Q(full_name__in=full_names) | Q(project__isnull=False)
).delete()

def sync_repositories(self):
"""Sync repositories from GitHub API."""
repos = self.paginate('https://api.github.com/user/repos?per_page=100')
repos = []

try:
repos = self.paginate('https://api.github.com/user/repos?per_page=100')
for repo in repos:
self.create_repository(repo)
return repos
except (TypeError, ValueError):
log.warning('Error syncing GitHub repositories')
raise SyncServiceError(
'Could not sync your GitHub repositories, '
'try reconnecting your account'
)
return repos

def sync_organizations(self):
"""Sync organizations from GitHub API."""
orgs = []
repositories = []

try:
orgs = self.paginate('https://api.github.com/user/orgs')
for org in orgs:
Expand All @@ -71,16 +65,22 @@ def sync_organizations(self):
org_repos = self.paginate(
'{org_url}/repos'.format(org_url=org['url']),
)

# Add all the repositories for this organization to the result
repositories.extend(org_repos)

for repo in org_repos:
self.create_repository(repo, organization=org_obj)
return org_repos

except (TypeError, ValueError):
log.warning('Error syncing GitHub organizations')
raise SyncServiceError(
'Could not sync your GitHub organizations, '
'try reconnecting your account'
)

return orgs, repositories

def create_repository(self, fields, privacy=None, organization=None):
"""
Update or create a repository from GitHub API response.
Expand Down Expand Up @@ -170,6 +170,12 @@ def create_organization(self, fields):
organization.save()
return organization

def get_repository_full_names(self, repositories):
return {repository.get('full_name') for repository in repositories}

def get_organization_names(self, organizations):
return {organization.get('name') for organization in organizations}

def get_next_url_to_paginate(self, response):
return response.links.get('next', {}).get('url')

Expand Down
58 changes: 33 additions & 25 deletions readthedocs/oauth/services/gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,26 +66,18 @@ def get_next_url_to_paginate(self, response):
def get_paginated_results(self, response):
return response.json()

def sync(self):
"""
Sync repositories and organizations from GitLab API.

See: https://docs.gitlab.com/ce/api/projects.html
"""
self.sync_repositories()
self.sync_organizations()

def sync_repositories(self):
repos = self.paginate(
'{url}/api/v4/projects'.format(url=self.adapter.provider_base_url),
per_page=100,
archived=False,
order_by='path',
sort='asc',
membership=True,
)

repos = []
try:
repos = self.paginate(
'{url}/api/v4/projects'.format(url=self.adapter.provider_base_url),
per_page=100,
archived=False,
order_by='path',
sort='asc',
membership=True,
)

for repo in repos:
self.create_repository(repo)
except (TypeError, ValueError):
Expand All @@ -95,16 +87,20 @@ def sync_repositories(self):
'try reconnecting your account'
)

return repos

def sync_organizations(self):
orgs = self.paginate(
'{url}/api/v4/groups'.format(url=self.adapter.provider_base_url),
per_page=100,
all_available=False,
order_by='path',
sort='asc',
)
orgs = []
repositories = []

try:
orgs = self.paginate(
'{url}/api/v4/groups'.format(url=self.adapter.provider_base_url),
per_page=100,
all_available=False,
order_by='path',
sort='asc',
)
for org in orgs:
org_obj = self.create_organization(org)
org_repos = self.paginate(
Expand All @@ -117,6 +113,10 @@ def sync_organizations(self):
order_by='path',
sort='asc',
)

# Add organization's repositories to the result
repositories.extend(org_repos)

for repo in org_repos:
self.create_repository(repo, organization=org_obj)
except (TypeError, ValueError):
Expand All @@ -126,6 +126,8 @@ def sync_organizations(self):
'try reconnecting your account'
)

return orgs, repositories

def is_owned_by(self, owner_id):
return self.account.extra_data['id'] == owner_id

Expand Down Expand Up @@ -230,6 +232,12 @@ def create_organization(self, fields):
organization.save()
return organization

def get_repository_full_names(self, repositories):
return {repository.get('name_with_namespace') for repository in repositories}

def get_organization_names(self, organizations):
return {organization.get('name') for organization in organizations}

def get_webhook_data(self, repo_id, project, integration):
"""
Get webhook JSON data to post to the API.
Expand Down