Skip to content

Server side analytics #4131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 7, 2018
3 changes: 3 additions & 0 deletions readthedocs/analytics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""App init"""

default_app_config = 'readthedocs.analytics.apps.AnalyticsAppConfig' # noqa
16 changes: 16 additions & 0 deletions readthedocs/analytics/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Django app config for the analytics app."""

from __future__ import absolute_import
from django.apps import AppConfig


class AnalyticsAppConfig(AppConfig):

"""Analytics app init code"""

name = 'readthedocs.analytics'
verbose_name = 'Analytics'

def ready(self):
"""Fired once during Django startup"""
import readthedocs.analytics.signals # noqa
1 change: 1 addition & 0 deletions readthedocs/analytics/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Intentionally blank"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is necessary with the AppConfig.

38 changes: 38 additions & 0 deletions readthedocs/analytics/signals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Django signal receivers for the analytics app."""

from __future__ import absolute_import, unicode_literals
import logging

from django.dispatch import receiver

from readthedocs.restapi.signals import footer_response

from .tasks import analytics_event
from .utils import get_client_ip


log = logging.getLogger(__name__) # noqa


@receiver(footer_response)
def fire_analytics_event(sender, **kwargs):
"""Fires a server side google analytics event when the footer API is called"""
del sender # unused
request = kwargs['request']
context = kwargs['context']
project = context['project']

data = {
'ec': 'footer-api',
'ea': 'load',
'el': project.slug,

# User data
'ua': request.META.get('HTTP_USER_AGENT'),
'uip': get_client_ip(request),
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't look like this is gated by the feature flag, so it would start sending all footer data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct, it is not yet.

analytics_event.delay(data)

log.info('Fired analytics event for project "{}"'.format(project))
log.info(' - Path: {}'.format(request.build_absolute_uri()))
63 changes: 63 additions & 0 deletions readthedocs/analytics/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Tasks for Read the Docs' analytics"""

from __future__ import absolute_import

from django.conf import settings

from readthedocs import get_version
from readthedocs.worker import app

from .utils import send_to_analytics


DEFAULT_PARAMETERS = {
'v': '1', # analytics version (always 1)
'aip': '1', # anonymize IP
'tid': settings.GLOBAL_ANALYTICS_CODE,

# User data
'uip': None, # User IP address
'ua': None, # User agent

# Application info
'an': 'Read the Docs',
'av': get_version(), # App version
}


@app.task(queue='web')
def analytics_pageview(pageview_data):
"""
Send a pageview to Google Analytics

:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters
:param kwargs: pageview parameters to send to GA
"""
data = {
't': 'pageview',
'dl': None, # URL of the pageview (required)
'dt': None, # Title of the page
}
data.update(DEFAULT_PARAMETERS)
data.update(pageview_data)
send_to_analytics(data)


@app.task(queue='web')
def analytics_event(event_data):
"""
Send an analytics event to Google Analytics

:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#event
:param kwargs: event parameters to send to GA
"""
data = {
't': 'event', # GA event - don't change
'ec': None, # Event category (required)
'ea': None, # Event action (required)
'el': None, # Event label
'ev': None, # Event value (numeric)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be setting these to None by default? It seems like we should be a bit more defensive here, and make sure the incoming event_data contains them.

}
data.update(DEFAULT_PARAMETERS)
data.update(event_data)
send_to_analytics(data)
60 changes: 60 additions & 0 deletions readthedocs/analytics/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Utilities related to analytics"""

from __future__ import absolute_import, unicode_literals
import logging

from django.utils.encoding import force_text
import requests

try:
# Python 3.3+ only
import ipaddress
except ImportError:
from .vendor import ipaddress

log = logging.getLogger(__name__) # noqa

# Used to anonymize an IP by zero-ing out the last 2 bytes
MASK = int('0xFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000', 16)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this just live in the anonymize_ip_address function? Not sure if we need it elsewhere.



def get_client_ip(request):
"""Gets the real IP based on a request object"""
ip_address = request.META.get('REMOTE_ADDR')

# Get the original IP address (eg. "X-Forwarded-For: client, proxy1, proxy2")
x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0]
if x_forwarded_for:
ip_address = x_forwarded_for

return ip_address


def anonymize_ipaddress(ip_address):
"""Anonymizes an IP address by zeroing the last 2 bytes"""
try:
ip_obj = ipaddress.ip_address(force_text(ip_address))
except ValueError:
return None

anonymized_ip = ipaddress.ip_address(int(ip_obj) & MASK)
return anonymized_ip.compressed


def send_to_analytics(data):
"""Sends data to Google Analytics"""
if data['uip']:
# Anonymize IP address if applicable
data['uip'] = anonymize_ipaddress(data['uip'])

resp = None
try:
resp = requests.post(
'https://www.google-analytics.com/collect',
data=data,
)
except requests.Timeout:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the default timeout here? We should probably set it to something really low.

log.warning('Timeout sending to Google Analytics')

if resp and not resp.ok:
log.warning('Unknown error sending to Google Analytics')
Empty file.
Loading