Skip to content

Validate webhook's payload #4940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 53 commits into from
Feb 27, 2019
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
fe3fa3d
Add secret field to Integration model
stsewd Nov 30, 2018
36e184b
Validate GitHub webhook's payload
stsewd Nov 30, 2018
9393cd8
Docstrings
stsewd Dec 3, 2018
ffd52f2
Add helptext
stsewd Dec 3, 2018
4b6ceab
Merge branch 'master' into validate-payload-webhooks
stsewd Dec 3, 2018
8859740
Backwards compatibility with old webhooks
stsewd Dec 3, 2018
8996499
Use custom parsers
stsewd Dec 3, 2018
0fa97c4
Docstrings
stsewd Dec 3, 2018
29a40a0
Put back normalize_request_payload
stsewd Dec 3, 2018
0855f77
Linter
stsewd Dec 3, 2018
c17868f
Tests
stsewd Dec 3, 2018
b1fea27
Python2 syntax
stsewd Dec 4, 2018
6b4f050
Python 2 again
stsewd Dec 4, 2018
d373f66
Validate GitLab's token
stsewd Dec 4, 2018
e41b656
More python2
stsewd Dec 4, 2018
0e444b9
More tests
stsewd Dec 4, 2018
0c73fed
Remove import
stsewd Dec 4, 2018
ff377cc
Make os.urandom python2 compatible
stsewd Dec 4, 2018
ddaa146
Merge branch 'master' into validate-payload-webhooks
stsewd Dec 4, 2018
e26452a
Keep compatibility with webhooks
stsewd Dec 4, 2018
0663179
Ignore six.PY2 from coverage
stsewd Dec 4, 2018
ed16a4e
Set secret only when rtd creates the webhook
stsewd Dec 4, 2018
3f27c14
Update secret on resync
stsewd Dec 4, 2018
da5e0b0
Merge branch 'master' into validate-payload-webhooks
stsewd Dec 5, 2018
12daf33
Refactor
stsewd Dec 5, 2018
43bad08
Fix
stsewd Dec 5, 2018
ac5750f
Rename
stsewd Dec 5, 2018
216a57f
Refactor
stsewd Dec 10, 2018
ab37b65
Refactor
stsewd Dec 10, 2018
c2c9a1a
Divide migrations
stsewd Dec 10, 2018
e139051
Fix migrations
stsewd Dec 10, 2018
8b57145
i18n
stsewd Dec 10, 2018
8999197
Set secret on webhook creation and update
stsewd Dec 11, 2018
d858acf
Hack django rest framework
stsewd Dec 11, 2018
6a4098e
Move msg to class
stsewd Dec 11, 2018
f124823
Merge branch 'master' into validate-payload-webhooks
stsewd Jan 2, 2019
60a2fc3
Fix merge
stsewd Jan 2, 2019
7e99478
Remove unused imports
stsewd Jan 14, 2019
62a3b51
Log warning
stsewd Jan 14, 2019
af4e4c8
Better comment
stsewd Jan 14, 2019
b2c728f
Change defaults for payload validation
stsewd Jan 14, 2019
92aead5
Merge branch 'master' into validate-payload-webhooks
stsewd Jan 14, 2019
9c01b12
Clean up tests
stsewd Jan 14, 2019
3962741
Linter
stsewd Jan 14, 2019
3ac887e
Use reverse
stsewd Jan 14, 2019
83ca111
Update docs
stsewd Jan 14, 2019
414e44d
Change wording
stsewd Jan 17, 2019
4426f71
Merge branch 'master' into validate-payload-webhooks
stsewd Jan 17, 2019
7ad06e0
Merge branch 'master' into validate-payload-webhooks
stsewd Jan 22, 2019
fbeb7fd
Remove python2 support
stsewd Jan 23, 2019
7eb6005
Merge branch 'master' into validate-payload-webhooks
stsewd Jan 24, 2019
42741a4
Merge branch 'master' into validate-payload-webhooks
stsewd Feb 18, 2019
96c7020
Merge branch 'master' into validate-payload-webhooks
stsewd Feb 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions readthedocs/integrations/migrations/0004_add_integration_secret.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.16 on 2018-12-03 17:56
from __future__ import unicode_literals

from django.db import migrations, models
import readthedocs.integrations.utils


class Migration(migrations.Migration):

dependencies = [
('integrations', '0003_add_missing_model_change_migrations'),
]

operations = [
migrations.AddField(
model_name='integration',
name='secret',
field=models.CharField(blank=True, default=readthedocs.integrations.utils.get_secret, help_text='Secret used to validate the payload of the webhook', max_length=255, null=True),
),
]
8 changes: 8 additions & 0 deletions readthedocs/integrations/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from rest_framework import status

from readthedocs.core.fields import default_token
from readthedocs.integrations.utils import get_secret
from readthedocs.projects.models import Project

from .utils import normalize_request_payload
Expand Down Expand Up @@ -268,6 +269,13 @@ class Integration(models.Model):
'HttpExchange',
related_query_name='integrations',
)
secret = models.CharField(
help_text='Secret used to validate the payload of the webhook',
max_length=255,
blank=True,
null=True,
default=get_secret
)

objects = IntegrationQuerySet.as_manager()

Expand Down
18 changes: 18 additions & 0 deletions readthedocs/integrations/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
"""Integration utility functions."""

from __future__ import division, print_function, unicode_literals

import os
import six


def normalize_request_payload(request):
"""
Expand All @@ -21,3 +26,16 @@ def normalize_request_payload(request):
except AttributeError:
pass
return request_payload


def get_secret(size=64):
"""
Get a random string of `size` bytes.

:param size: Number of bytes
"""
secret = os.urandom(size)
if six.PY2:
# On python two os.urandom returns str instead of bytes
return secret.encode('hex')
return secret.hex()
1 change: 1 addition & 0 deletions readthedocs/oauth/services/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def get_webhook_data(self, project, integration):
'integration_pk': integration.pk}
)
),
'secret': integration.secret,
'content_type': 'json',
},
'events': ['push', 'pull_request', 'create', 'delete'],
Expand Down
1 change: 1 addition & 0 deletions readthedocs/oauth/services/gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ def get_webhook_data(self, repo_id, project, integration):
},
),
),
'token': integration.secret,

# Optional
'issues_events': False,
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/projects/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ class IntegrationForm(forms.ModelForm):

class Meta(object):
model = Integration
exclude = ['provider_data', 'exchanges'] # pylint: disable=modelform-uses-exclude
exclude = ['provider_data', 'exchanges', 'secret'] # pylint: disable=modelform-uses-exclude

def __init__(self, *args, **kwargs):
self.project = kwargs.pop('project', None)
Expand Down
55 changes: 55 additions & 0 deletions readthedocs/restapi/parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from __future__ import division, print_function, unicode_literals

import json

from django.http import QueryDict
from rest_framework.exceptions import ParseError
from rest_framework.parsers import BaseParser


class RawBodyParser(BaseParser):

"""
Parse a request and expose the original payload.

DRF doesn't expose the request's body after using ``request.data``.
We are implementing a custom parser to exposed it as `raw_body`.
"""

media_type = None

def parse(self, stream, media_type, parser_context):
request = parser_context['request']
raw_body = stream.read().decode()
setattr(request, 'raw_body', raw_body)
return raw_body


class RawBodyJSONParser(RawBodyParser):

"""Parser adapted from `rest_framework.parsers.JSONParser`."""

media_type = 'application/json'

def parse(self, stream, media_type, parser_context):
raw_body = super(RawBodyJSONParser, self).parse(
stream, media_type, parser_context
)
try:
return json.loads(raw_body)
except ValueError as exc:
raise ParseError('JSON parse error - %s' % str(exc))


class RawBodyFormParser(RawBodyParser):

"""Parser adapted from `rest_framework.parsers.FormParser`."""

media_type = 'application/x-www-form-urlencoded'

def parse(self, stream, media_type, parser_context):
raw_body = super(RawBodyFormParser, self).parse(
stream, media_type, parser_context
)
data = QueryDict(raw_body)
return dict(data.items())
74 changes: 66 additions & 8 deletions readthedocs/restapi/views/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
unicode_literals,
)

import hashlib
import hmac
import json
import logging
import re

import six
from django.shortcuts import get_object_or_404
from rest_framework import permissions
from rest_framework.exceptions import NotFound, ParseError
from rest_framework.exceptions import AuthenticationFailed, NotFound, ParseError
from rest_framework.renderers import JSONRenderer
from rest_framework.response import Response
from rest_framework.views import APIView
Expand All @@ -26,15 +28,17 @@
)
from readthedocs.core.views.hooks import build_branches, sync_versions
from readthedocs.integrations.models import HttpExchange, Integration
from readthedocs.integrations.utils import normalize_request_payload
from readthedocs.projects.models import Project
from readthedocs.restapi.parsers import RawBodyFormParser, RawBodyJSONParser

log = logging.getLogger(__name__)

GITHUB_EVENT_HEADER = 'HTTP_X_GITHUB_EVENT'
GITHUB_SIGNATURE_HEADER = 'HTTP_X_HUB_SIGNATURE'
GITHUB_PUSH = 'push'
GITHUB_CREATE = 'create'
GITHUB_DELETE = 'delete'
GITLAB_TOKEN_HEADER = 'HTTP_X_GITLAB_TOKEN'
GITLAB_PUSH = 'push'
GITLAB_NULL_HASH = '0' * 40
GITLAB_TAG_PUSH = 'tag_push'
Expand All @@ -47,6 +51,7 @@ class WebhookMixin(object):
"""Base class for Webhook mixins."""

permission_classes = (permissions.AllowAny,)
parser_classes = (RawBodyJSONParser, RawBodyFormParser)
renderer_classes = (JSONRenderer,)
integration = None
integration_type = None
Expand All @@ -60,6 +65,8 @@ def post(self, request, project_slug):
except Project.DoesNotExist:
raise NotFound('Project not found')
self.data = self.get_data()
if not self.is_payload_valid():
raise AuthenticationFailed('Payload not valid')
resp = self.handle_webhook()
if resp is None:
log.info('Unhandled webhook event')
Expand All @@ -82,13 +89,16 @@ def finalize_response(self, req, *args, **kwargs):
return resp

def get_data(self):
"""Normalize posted data."""
return normalize_request_payload(self.request)
return self.request.data

def handle_webhook(self):
"""Handle webhook payload."""
raise NotImplementedError

def is_payload_valid(self):
"""Validates the webhook's payload using the integration's secret."""
return True

def get_integration(self):
"""
Get or create an inbound webhook to track webhook requests.
Expand All @@ -104,10 +114,19 @@ def get_integration(self):
# in `WebhookView`
if self.integration is not None:
return self.integration
integration, _ = Integration.objects.get_or_create(
project=self.project,
integration_type=self.integration_type,
)
try:
integration = Integration.objects.get(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we are deprecating old webhooks, is this get_integration method that guesses still needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, this is about webhooks that were created without a model. I guess that the old webhooks (using deprecated services) they could have a model?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should test this more. The old webhooks don't touch integrations at all, but I'm not sure why we need logic that tries to look up an integration without a pk. All integrations have a pk attached, and there should be no reason that we should need to look up an integration without using a pk. At least, I think...

project=self.project,
integration_type=self.integration_type,
)
except Integration.DoesNotExist:
integration = Integration.objects.create(
project=self.project,
integration_type=self.integration_type,
# If we didn't create the integration,
# we didn't set a secret.
secret='',
)
return integration

def get_response_push(self, project, branches):
Expand Down Expand Up @@ -178,6 +197,30 @@ def get_data(self):
pass
return super(GitHubWebhookView, self).get_data()

def is_payload_valid(self):
"""See https://developer.github.com/webhooks/securing/"""
signature = self.request.META.get(GITHUB_SIGNATURE_HEADER)
secret = self.get_integration().secret
if not secret:
log.info(
'Skipping payload validation for project: %s',
self.project.slug
)
return True
if not signature:
return False
msg = self.request.raw_body
digest = hmac.new(
secret.encode(),
msg=msg.encode(),
digestmod=hashlib.sha1
).hexdigest()
result = hmac.compare_digest(
b'sha1=' + digest.encode(),
signature.encode()
)
return result

def handle_webhook(self):
# Get event and trigger other webhook events
event = self.request.META.get(GITHUB_EVENT_HEADER, GITHUB_PUSH)
Expand Down Expand Up @@ -228,6 +271,21 @@ class GitLabWebhookView(WebhookMixin, APIView):

integration_type = Integration.GITLAB_WEBHOOK

def is_payload_valid(self):
"""GitLab only sends back the token."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improve the docstring here also: what does it do and how? Add the link to the gitlab docs also.

token = self.request.META.get(GITLAB_TOKEN_HEADER)
secret = self.get_integration().secret
if not secret:
log.info(
'Skipping payload validation for project: %s',
self.project.slug
)
return True
if not token:
return False
result = token == secret
return result

def handle_webhook(self):
"""
Handle GitLab events for push and tag_push.
Expand Down
Loading