From 0c73d2658d4e69d6dec6d03bc0bdda8185158671 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 22 Jul 2020 14:44:39 +0200 Subject: [PATCH 1/3] Sync RemoteRepository and RemoteOrganization in all VCS providers - Refactor `Service.sync` to be unique for all the VCS providers - Make `sync_repositories` and `sync_organizations` to return `repositories` and `(organizations, repositories)` to allow deleting old instances where the user does not have more access - rename `BitBucketService.sync_teams` to `BitBucketService.sync_organizations` to keep our internal naming With this commit, all VCS providers (GitHub, GitLab and BitBucket) will start deleting old RemoteRepository instances (not only GitHub as it's currently working) but also deleting old RemoteOrganization instances as well. --- readthedocs/oauth/services/base.py | 21 ++++++++++++++++-- readthedocs/oauth/services/bitbucket.py | 29 +++++++++++++++++-------- readthedocs/oauth/services/github.py | 26 +++++++++++----------- readthedocs/oauth/services/gitlab.py | 24 ++++++++++++-------- 4 files changed, 67 insertions(+), 33 deletions(-) diff --git a/readthedocs/oauth/services/base.py b/readthedocs/oauth/services/base.py index e093ae24408..e7a97788de4 100644 --- a/readthedocs/oauth/services/base.py +++ b/readthedocs/oauth/services/base.py @@ -184,8 +184,25 @@ def paginate(self, url, **kwargs): return [] def sync(self): - """Sync repositories and organizations.""" - raise NotImplementedError + """ + Sync repositories (RemoteRepository) and organizations (RemoteOrganization). + + - creates a new RemoteRepository/Organization per new repository + - updates fields for existing RemoteRepository/Organization + - deletes old RemoteRepository/Organization that are not present for this user + """ + repos = self.sync_repositories() + # Delete RemoteRepository where the user doesn't have access anymore + # (skip RemoteRepository tied to a Project on this user) + repository_full_names = self.get_repository_full_names(repos + organization_repos) + self.user.oauth_repositories.exclude( + Q(full_name__in=repository_full_names) | Q(project__isnull=False) + ).delete() + + organizations, organization_repos = self.sync_organizations() + # Delete RemoteOrganization where the user doesn't have access anymore + organization_names = self.get_organization_names(organizations) + self.user.oauth_organizations.exclude(name__in=organization_names).delete() def create_repository(self, fields, privacy=None, organization=None): """ diff --git a/readthedocs/oauth/services/bitbucket.py b/readthedocs/oauth/services/bitbucket.py index 144b5e4508c..409fa3583e4 100644 --- a/readthedocs/oauth/services/bitbucket.py +++ b/readthedocs/oauth/services/bitbucket.py @@ -30,11 +30,6 @@ class BitbucketService(Service): url_pattern = re.compile(r'bitbucket.org') https_url_pattern = re.compile(r'^https:\/\/[^@]+@bitbucket.org/') - def sync(self): - """Sync repositories and teams from Bitbucket API.""" - self.sync_repositories() - self.sync_teams() - def sync_repositories(self): """Sync repositories from Bitbucket API.""" # Get user repos @@ -44,6 +39,7 @@ def sync_repositories(self): ) for repo in repos: self.create_repository(repo) + except (TypeError, ValueError): log.warning('Error syncing Bitbucket repositories') raise SyncServiceError( @@ -58,21 +54,24 @@ def sync_repositories(self): resp = self.paginate( 'https://bitbucket.org/api/2.0/repositories/?role=admin', ) - repos = ( + admin_repos = ( RemoteRepository.objects.filter( users=self.user, full_name__in=[r['full_name'] for r in resp], account=self.account, ) ) - for repo in repos: + for repo in admin_repos: repo.admin = True repo.save() except (TypeError, ValueError): pass - def sync_teams(self): - """Sync Bitbucket teams and team repositories.""" + return repos + + def sync_organizations(self): + """Sync Bitbucket teams (our RemoteOrganization) and team repositories.""" + repositories = [] try: teams = self.paginate( 'https://api.bitbucket.org/2.0/teams/?role=member', @@ -80,8 +79,14 @@ def sync_teams(self): for team in teams: org = self.create_organization(team) repos = self.paginate(team['links']['repositories']['href']) + + # Add organization's repositories to the result + repositories.extend(repos) + for repo in repos: self.create_repository(repo, organization=org) + + return teams, repositories except ValueError: log.warning('Error syncing Bitbucket organizations') raise SyncServiceError( @@ -180,6 +185,12 @@ def create_organization(self, fields): organization.save() return organization + def get_repository_full_names(self, repositories): + return {repository.get('full_name') for repository in repositories} + + def get_organization_names(self, organizations): + return {organization.get('display_name') for organization in organizations} + def get_next_url_to_paginate(self, response): return response.json().get('next') diff --git a/readthedocs/oauth/services/github.py b/readthedocs/oauth/services/github.py index cc18aa6df7f..14db3f66cf9 100644 --- a/readthedocs/oauth/services/github.py +++ b/readthedocs/oauth/services/github.py @@ -35,18 +35,6 @@ class GitHubService(Service): # TODO replace this with a less naive check url_pattern = re.compile(r'github\.com') - def sync(self): - """Sync repositories and organizations.""" - repos = self.sync_repositories() - organization_repos = self.sync_organizations() - - # Delete RemoteRepository where the user doesn't have access anymore - # (skip RemoteRepository tied to a Project on this user) - full_names = {repo.get('full_name') for repo in repos + organization_repos} - self.user.oauth_repositories.exclude( - Q(full_name__in=full_names) | Q(project__isnull=False) - ).delete() - def sync_repositories(self): """Sync repositories from GitHub API.""" repos = self.paginate('https://api.github.com/user/repos?per_page=100') @@ -63,6 +51,7 @@ def sync_repositories(self): def sync_organizations(self): """Sync organizations from GitHub API.""" + repositories = [] try: orgs = self.paginate('https://api.github.com/user/orgs') for org in orgs: @@ -73,9 +62,14 @@ def sync_organizations(self): org_repos = self.paginate( '{org_url}/repos'.format(org_url=org['url']), ) + + # Add all the repositories for this organization to the result + repositories.extend(org_repos) + for repo in org_repos: self.create_repository(repo, organization=org_obj) - return org_repos + + return orgs, repositories except (TypeError, ValueError): log.warning('Error syncing GitHub organizations') raise SyncServiceError( @@ -172,6 +166,12 @@ def create_organization(self, fields): organization.save() return organization + def get_repository_full_names(self, repositories): + return {repository.get('full_name') for repository in repositories} + + def get_organization_names(self, organizations): + return {organization.get('name') for organization in organizations} + def get_next_url_to_paginate(self, response): return response.links.get('next', {}).get('url') diff --git a/readthedocs/oauth/services/gitlab.py b/readthedocs/oauth/services/gitlab.py index df7ee2a7ed5..4f4cdd6933b 100644 --- a/readthedocs/oauth/services/gitlab.py +++ b/readthedocs/oauth/services/gitlab.py @@ -71,15 +71,6 @@ def get_next_url_to_paginate(self, response): def get_paginated_results(self, response): return response.json() - def sync(self): - """ - Sync repositories and organizations from GitLab API. - - See: https://docs.gitlab.com/ce/api/projects.html - """ - self.sync_repositories() - self.sync_organizations() - def sync_repositories(self): repos = self.paginate( '{url}/api/v4/projects'.format(url=self.adapter.provider_base_url), @@ -93,6 +84,8 @@ def sync_repositories(self): try: for repo in repos: self.create_repository(repo) + + return repos except (TypeError, ValueError): log.warning('Error syncing GitLab repositories') raise SyncServiceError( @@ -101,6 +94,7 @@ def sync_repositories(self): ) def sync_organizations(self): + repositories = [] orgs = self.paginate( '{url}/api/v4/groups'.format(url=self.adapter.provider_base_url), per_page=100, @@ -122,8 +116,14 @@ def sync_organizations(self): order_by='path', sort='asc', ) + + # Add organization's repositories to the result + repositories.extend(org_repos) + for repo in org_repos: self.create_repository(repo, organization=org_obj) + + return orgs, repositories except (TypeError, ValueError): log.warning('Error syncing GitLab organizations') raise SyncServiceError( @@ -235,6 +235,12 @@ def create_organization(self, fields): organization.save() return organization + def get_repository_full_names(self, repositories): + return {repository.get('name_with_namespace') for repository in repositories} + + def get_organization_names(self, organizations): + return {organization.get('name') for organization in organizations} + def get_webhook_data(self, repo_id, project, integration): """ Get webhook JSON data to post to the API. From ca5ffa9126c56b00d0a61aa119aa6e09e03765ef Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 22 Jul 2020 15:21:36 +0200 Subject: [PATCH 2/3] lint --- readthedocs/oauth/services/base.py | 4 +++- readthedocs/oauth/services/github.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/readthedocs/oauth/services/base.py b/readthedocs/oauth/services/base.py index e7a97788de4..4975d5c6886 100644 --- a/readthedocs/oauth/services/base.py +++ b/readthedocs/oauth/services/base.py @@ -6,6 +6,7 @@ from allauth.socialaccount.models import SocialAccount from allauth.socialaccount.providers import registry from django.conf import settings +from django.db.models import Q from django.utils import timezone from oauthlib.oauth2.rfc6749.errors import InvalidClientIdError from requests.exceptions import RequestException @@ -192,6 +193,8 @@ def sync(self): - deletes old RemoteRepository/Organization that are not present for this user """ repos = self.sync_repositories() + organizations, organization_repos = self.sync_organizations() + # Delete RemoteRepository where the user doesn't have access anymore # (skip RemoteRepository tied to a Project on this user) repository_full_names = self.get_repository_full_names(repos + organization_repos) @@ -199,7 +202,6 @@ def sync(self): Q(full_name__in=repository_full_names) | Q(project__isnull=False) ).delete() - organizations, organization_repos = self.sync_organizations() # Delete RemoteOrganization where the user doesn't have access anymore organization_names = self.get_organization_names(organizations) self.user.oauth_organizations.exclude(name__in=organization_names).delete() diff --git a/readthedocs/oauth/services/github.py b/readthedocs/oauth/services/github.py index 14db3f66cf9..fca5ee4abd2 100644 --- a/readthedocs/oauth/services/github.py +++ b/readthedocs/oauth/services/github.py @@ -7,7 +7,6 @@ from allauth.socialaccount.models import SocialToken from allauth.socialaccount.providers.github.views import GitHubOAuth2Adapter -from django.db.models import Q from django.conf import settings from django.urls import reverse from requests.exceptions import RequestException From d14075294409a395593d4b2246f36b59bec5d9d8 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 18 Aug 2020 11:28:10 +0200 Subject: [PATCH 3/3] Always return a valid list (could be empty) When fetching repositories and organizations always return a valid list. If the user does not have repositories or organizations, a empty list is returned. This allows the code to continue working properly in the further steps. --- readthedocs/oauth/services/base.py | 3 +- readthedocs/oauth/services/bitbucket.py | 7 ++++- readthedocs/oauth/services/github.py | 11 +++++-- readthedocs/oauth/services/gitlab.py | 42 +++++++++++++------------ 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/readthedocs/oauth/services/base.py b/readthedocs/oauth/services/base.py index 4975d5c6886..043b3a30a93 100644 --- a/readthedocs/oauth/services/base.py +++ b/readthedocs/oauth/services/base.py @@ -182,7 +182,8 @@ def paginate(self, url, **kwargs): url, debug_data, ) - return [] + + return [] def sync(self): """ diff --git a/readthedocs/oauth/services/bitbucket.py b/readthedocs/oauth/services/bitbucket.py index 409fa3583e4..65f8a81f9b6 100644 --- a/readthedocs/oauth/services/bitbucket.py +++ b/readthedocs/oauth/services/bitbucket.py @@ -32,6 +32,8 @@ class BitbucketService(Service): def sync_repositories(self): """Sync repositories from Bitbucket API.""" + repos = [] + # Get user repos try: repos = self.paginate( @@ -71,7 +73,9 @@ def sync_repositories(self): def sync_organizations(self): """Sync Bitbucket teams (our RemoteOrganization) and team repositories.""" + teams = [] repositories = [] + try: teams = self.paginate( 'https://api.bitbucket.org/2.0/teams/?role=member', @@ -86,7 +90,6 @@ def sync_organizations(self): for repo in repos: self.create_repository(repo, organization=org) - return teams, repositories except ValueError: log.warning('Error syncing Bitbucket organizations') raise SyncServiceError( @@ -94,6 +97,8 @@ def sync_organizations(self): 'try reconnecting your account', ) + return teams, repositories + def create_repository(self, fields, privacy=None, organization=None): """ Update or create a repository from Bitbucket API response. diff --git a/readthedocs/oauth/services/github.py b/readthedocs/oauth/services/github.py index fca5ee4abd2..82eb8ed96b0 100644 --- a/readthedocs/oauth/services/github.py +++ b/readthedocs/oauth/services/github.py @@ -36,21 +36,25 @@ class GitHubService(Service): def sync_repositories(self): """Sync repositories from GitHub API.""" - repos = self.paginate('https://api.github.com/user/repos?per_page=100') + repos = [] + try: + repos = self.paginate('https://api.github.com/user/repos?per_page=100') for repo in repos: self.create_repository(repo) - return repos except (TypeError, ValueError): log.warning('Error syncing GitHub repositories') raise SyncServiceError( 'Could not sync your GitHub repositories, ' 'try reconnecting your account' ) + return repos def sync_organizations(self): """Sync organizations from GitHub API.""" + orgs = [] repositories = [] + try: orgs = self.paginate('https://api.github.com/user/orgs') for org in orgs: @@ -68,7 +72,6 @@ def sync_organizations(self): for repo in org_repos: self.create_repository(repo, organization=org_obj) - return orgs, repositories except (TypeError, ValueError): log.warning('Error syncing GitHub organizations') raise SyncServiceError( @@ -76,6 +79,8 @@ def sync_organizations(self): 'try reconnecting your account' ) + return orgs, repositories + def create_repository(self, fields, privacy=None, organization=None): """ Update or create a repository from GitHub API response. diff --git a/readthedocs/oauth/services/gitlab.py b/readthedocs/oauth/services/gitlab.py index 4f4cdd6933b..73a66a37fff 100644 --- a/readthedocs/oauth/services/gitlab.py +++ b/readthedocs/oauth/services/gitlab.py @@ -72,20 +72,19 @@ def get_paginated_results(self, response): return response.json() def sync_repositories(self): - repos = self.paginate( - '{url}/api/v4/projects'.format(url=self.adapter.provider_base_url), - per_page=100, - archived=False, - order_by='path', - sort='asc', - membership=True, - ) - + repos = [] try: + repos = self.paginate( + '{url}/api/v4/projects'.format(url=self.adapter.provider_base_url), + per_page=100, + archived=False, + order_by='path', + sort='asc', + membership=True, + ) + for repo in repos: self.create_repository(repo) - - return repos except (TypeError, ValueError): log.warning('Error syncing GitLab repositories') raise SyncServiceError( @@ -93,17 +92,20 @@ def sync_repositories(self): 'try reconnecting your account' ) + return repos + def sync_organizations(self): + orgs = [] repositories = [] - orgs = self.paginate( - '{url}/api/v4/groups'.format(url=self.adapter.provider_base_url), - per_page=100, - all_available=False, - order_by='path', - sort='asc', - ) try: + orgs = self.paginate( + '{url}/api/v4/groups'.format(url=self.adapter.provider_base_url), + per_page=100, + all_available=False, + order_by='path', + sort='asc', + ) for org in orgs: org_obj = self.create_organization(org) org_repos = self.paginate( @@ -122,8 +124,6 @@ def sync_organizations(self): for repo in org_repos: self.create_repository(repo, organization=org_obj) - - return orgs, repositories except (TypeError, ValueError): log.warning('Error syncing GitLab organizations') raise SyncServiceError( @@ -131,6 +131,8 @@ def sync_organizations(self): 'try reconnecting your account' ) + return orgs, repositories + def is_owned_by(self, owner_id): return self.account.extra_data['id'] == owner_id