-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Server side analytics #4131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Server side analytics #4131
Changes from 3 commits
27cc74b
edc96d3
9f796d4
dd320af
a1f8201
0e60520
30ee811
46ca5d7
2c09fda
e283d32
b425ce9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
"""App init""" | ||
|
||
default_app_config = 'readthedocs.analytics.apps.AnalyticsAppConfig' # noqa |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
"""Django app config for the analytics app.""" | ||
|
||
from __future__ import absolute_import | ||
from django.apps import AppConfig | ||
|
||
|
||
class AnalyticsAppConfig(AppConfig): | ||
|
||
"""Analytics app init code""" | ||
|
||
name = 'readthedocs.analytics' | ||
verbose_name = 'Analytics' | ||
|
||
def ready(self): | ||
"""Fired once during Django startup""" | ||
import readthedocs.analytics.signals # noqa |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""Intentionally blank""" | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
"""Django signal receivers for the analytics app.""" | ||
|
||
from __future__ import absolute_import, unicode_literals | ||
import logging | ||
|
||
from django.dispatch import receiver | ||
|
||
from readthedocs.restapi.signals import footer_response | ||
|
||
from .tasks import analytics_event | ||
from .utils import get_client_ip | ||
|
||
|
||
log = logging.getLogger(__name__) # noqa | ||
|
||
|
||
@receiver(footer_response) | ||
def fire_analytics_event(sender, **kwargs): | ||
"""Fires a server side google analytics event when the footer API is called""" | ||
del sender # unused | ||
request = kwargs['request'] | ||
context = kwargs['context'] | ||
project = context['project'] | ||
|
||
data = { | ||
'ec': 'footer-api', | ||
'ea': 'load', | ||
'el': project.slug, | ||
|
||
# User data | ||
'ua': request.META.get('HTTP_USER_AGENT'), | ||
'uip': get_client_ip(request), | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't look like this is gated by the feature flag, so it would start sending all footer data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct, it is not yet. |
||
analytics_event.delay(data) | ||
|
||
log.info('Fired analytics event for project "{}"'.format(project)) | ||
log.info(' - Path: {}'.format(request.build_absolute_uri())) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
"""Tasks for Read the Docs' analytics""" | ||
|
||
from __future__ import absolute_import | ||
|
||
from django.conf import settings | ||
|
||
from readthedocs import get_version | ||
from readthedocs.worker import app | ||
|
||
from .utils import send_to_analytics | ||
|
||
|
||
DEFAULT_PARAMETERS = { | ||
'v': '1', # analytics version (always 1) | ||
'aip': '1', # anonymize IP | ||
'tid': settings.GLOBAL_ANALYTICS_CODE, | ||
|
||
# User data | ||
'uip': None, # User IP address | ||
'ua': None, # User agent | ||
|
||
# Application info | ||
'an': 'Read the Docs', | ||
'av': get_version(), # App version | ||
} | ||
|
||
|
||
@app.task(queue='web') | ||
def analytics_pageview(pageview_data): | ||
""" | ||
Send a pageview to Google Analytics | ||
|
||
:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters | ||
:param kwargs: pageview parameters to send to GA | ||
""" | ||
data = { | ||
't': 'pageview', | ||
'dl': None, # URL of the pageview (required) | ||
'dt': None, # Title of the page | ||
} | ||
data.update(DEFAULT_PARAMETERS) | ||
data.update(pageview_data) | ||
send_to_analytics(data) | ||
|
||
|
||
@app.task(queue='web') | ||
def analytics_event(event_data): | ||
""" | ||
Send an analytics event to Google Analytics | ||
|
||
:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#event | ||
:param kwargs: event parameters to send to GA | ||
""" | ||
data = { | ||
't': 'event', # GA event - don't change | ||
'ec': None, # Event category (required) | ||
'ea': None, # Event action (required) | ||
'el': None, # Event label | ||
'ev': None, # Event value (numeric) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we be setting these to None by default? It seems like we should be a bit more defensive here, and make sure the incoming |
||
} | ||
data.update(DEFAULT_PARAMETERS) | ||
data.update(event_data) | ||
send_to_analytics(data) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
"""Utilities related to analytics""" | ||
|
||
from __future__ import absolute_import, unicode_literals | ||
import logging | ||
|
||
from django.utils.encoding import force_text | ||
import requests | ||
|
||
try: | ||
# Python 3.3+ only | ||
import ipaddress | ||
except ImportError: | ||
from .vendor import ipaddress | ||
|
||
log = logging.getLogger(__name__) # noqa | ||
|
||
# Used to anonymize an IP by zero-ing out the last 2 bytes | ||
MASK = int('0xFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000', 16) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this just live in the |
||
|
||
|
||
def get_client_ip(request): | ||
"""Gets the real IP based on a request object""" | ||
ip_address = request.META.get('REMOTE_ADDR') | ||
|
||
# Get the original IP address (eg. "X-Forwarded-For: client, proxy1, proxy2") | ||
x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0] | ||
if x_forwarded_for: | ||
ip_address = x_forwarded_for | ||
|
||
return ip_address | ||
|
||
|
||
def anonymize_ipaddress(ip_address): | ||
"""Anonymizes an IP address by zeroing the last 2 bytes""" | ||
try: | ||
ip_obj = ipaddress.ip_address(force_text(ip_address)) | ||
except ValueError: | ||
return None | ||
|
||
anonymized_ip = ipaddress.ip_address(int(ip_obj) & MASK) | ||
return anonymized_ip.compressed | ||
|
||
|
||
def send_to_analytics(data): | ||
"""Sends data to Google Analytics""" | ||
if data['uip']: | ||
# Anonymize IP address if applicable | ||
data['uip'] = anonymize_ipaddress(data['uip']) | ||
|
||
resp = None | ||
try: | ||
resp = requests.post( | ||
'https://www.google-analytics.com/collect', | ||
data=data, | ||
) | ||
except requests.Timeout: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the default timeout here? We should probably set it to something really low. |
||
log.warning('Timeout sending to Google Analytics') | ||
|
||
if resp and not resp.ok: | ||
log.warning('Unknown error sending to Google Analytics') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this is necessary with the AppConfig.