Skip to content

Store Pageviews in DB #6121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
May 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0998e8a
initial work
dojutsu-user Aug 28, 2019
3e8a99a
fix arguments
dojutsu-user Aug 29, 2019
f5c82cf
update migration file
dojutsu-user Aug 29, 2019
e1e954c
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Aug 30, 2019
9b03012
show top 10 viewed page to the users.
dojutsu-user Sep 3, 2019
152aff0
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Sep 6, 2019
f389476
initial work for showing graphs to the user
dojutsu-user Sep 8, 2019
1717cfc
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Sep 17, 2019
a19823f
show pageviews for a specific page
dojutsu-user Sep 18, 2019
af056c8
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 2, 2019
a1d0a9b
change view to class based view
dojutsu-user Oct 2, 2019
82b3182
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 3, 2019
415a3b3
fix lint
dojutsu-user Oct 3, 2019
2073a82
fix more lint
dojutsu-user Oct 5, 2019
b263c1b
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 14, 2019
3ec59dc
store page_slug instead of page_path
dojutsu-user Oct 15, 2019
108fd5f
little refactor
dojutsu-user Oct 15, 2019
ddc96d9
update test
dojutsu-user Oct 15, 2019
8da0f93
fix tests
dojutsu-user Oct 15, 2019
d0e1317
add test for search tasks
dojutsu-user Oct 16, 2019
3946398
use F expression
dojutsu-user Oct 17, 2019
b06d599
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 22, 2019
11eb6ac
fix tests
dojutsu-user Oct 22, 2019
95e7ee7
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 28, 2019
67d4885
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 31, 2019
9cb2310
Merge branch 'master' into search-pageviews-sorting
davidfischer May 7, 2020
3ce38c8
Index and refactor page view counting
davidfischer May 8, 2020
46747b0
Feedback updates and renames for clarity
davidfischer May 8, 2020
7e5a1f0
Merge branch 'master' into search-pageviews-sorting
davidfischer May 19, 2020
b530bbc
Move pageview models to the analytics app
davidfischer May 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions readthedocs/analytics/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Analytics Admin classes."""

from django.contrib import admin

from .models import PageView


class PageViewAdmin(admin.ModelAdmin):
raw_id_fields = ('project', 'version')
list_display = ('project', 'version', 'path', 'view_count', 'date')
search_fields = ('project__slug', 'version__slug', 'path')
readonly_fields = ('date',)
list_select_related = ('project', 'version', 'version__project')


admin.site.register(PageView, PageViewAdmin)
32 changes: 32 additions & 0 deletions readthedocs/analytics/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Generated by Django 2.2.12 on 2020-05-19 00:45

import datetime
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

initial = True

dependencies = [
('builds', '0022_migrate_protected_versions'),
('projects', '0048_remove_version_privacy_field'),
]

operations = [
migrations.CreateModel(
name='PageView',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('path', models.CharField(max_length=4096)),
('view_count', models.PositiveIntegerField(default=0)),
('date', models.DateField(db_index=True, default=datetime.date.today)),
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='projects.Project')),
('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='builds.Version', verbose_name='Version')),
],
options={
'unique_together': {('project', 'version', 'path', 'date')},
},
),
]
Empty file.
130 changes: 130 additions & 0 deletions readthedocs/analytics/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""Analytics modeling to help understand the projects on Read the Docs."""

import datetime

from django.db import models
from django.db.models import Sum
from django.utils import timezone
from django.utils.translation import ugettext_lazy as _

from readthedocs.builds.models import Version
from readthedocs.projects.models import Project


def _last_30_days_iter():
"""Returns iterator for previous 30 days (including today)."""
thirty_days_ago = timezone.now().date() - timezone.timedelta(days=30)

# this includes the current day, len() = 31
return (thirty_days_ago + timezone.timedelta(days=n) for n in range(31))


class PageView(models.Model):

"""PageView counts per day for a project, version, and path."""

project = models.ForeignKey(
Project,
related_name='page_views',
on_delete=models.CASCADE,
)
version = models.ForeignKey(
Version,
verbose_name=_('Version'),
related_name='page_views',
on_delete=models.CASCADE,
)
path = models.CharField(max_length=4096)
view_count = models.PositiveIntegerField(default=0)
date = models.DateField(default=datetime.date.today, db_index=True)

class Meta:
unique_together = ("project", "version", "path", "date")

def __str__(self):
return f'PageView: [{self.project.slug}:{self.version.slug}] - {self.path} for {self.date}'

@classmethod
def top_viewed_pages(cls, project, since=None):
"""
Returns top 10 pages according to view counts.

Structure of returned data is compatible to make graphs.
Sample returned data::
{
'pages': ['index', 'config-file/v1', 'intro/import-guide'],
'view_counts': [150, 120, 100]
}
This data shows that `index` is the most viewed page having 150 total views,
followed by `config-file/v1` and `intro/import-guide` having 120 and
100 total page views respectively.
"""
if since is None:
since = timezone.now().date() - timezone.timedelta(days=30)

qs = (
cls.objects
.filter(project=project, date__gte=since)
.values_list('path')
.annotate(total_views=Sum('view_count'))
.values_list('path', 'total_views')
.order_by('-total_views')[:10]
)

pages = []
view_counts = []

for data in qs.iterator():
pages.append(data[0])
view_counts.append(data[1])

final_data = {
'pages': pages,
'view_counts': view_counts,
}

return final_data

@classmethod
def page_views_by_date(cls, project_slug, since=None):
"""
Returns the total page views count for last 30 days for a particular project.

Structure of returned data is compatible to make graphs.
Sample returned data::
{
'labels': ['01 Jul', '02 Jul', '03 Jul'],
'int_data': [150, 200, 143]
}
This data shows that there were 150 page views on 01 July,
200 page views on 02 July and 143 page views on 03 July.
"""
if since is None:
since = timezone.now().date() - timezone.timedelta(days=30)

qs = cls.objects.filter(
project__slug=project_slug,
date__gt=since,
).values('date').annotate(total_views=Sum('view_count')).order_by('date')

count_dict = dict(
qs.order_by('date').values_list('date', 'total_views')
)

# This fills in any dates where there is no data
# to make sure we have a full 30 days of dates
count_data = [count_dict.get(date) or 0 for date in _last_30_days_iter()]

# format the date value to a more readable form
# Eg. `16 Jul`
last_30_days_str = [
timezone.datetime.strftime(date, '%d %b')
for date in _last_30_days_iter()
]

final_data = {
'labels': last_30_days_str,
'int_data': count_data,
}

return final_data
32 changes: 32 additions & 0 deletions readthedocs/analytics/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@
"""Tasks for Read the Docs' analytics."""

from django.conf import settings
from django.db.models import F
from django.utils import timezone

import readthedocs
from readthedocs.worker import app
from readthedocs.builds.models import Version
from readthedocs.projects.models import Project

from .models import PageView
from .utils import send_to_analytics


Expand Down Expand Up @@ -70,3 +75,30 @@ def analytics_event(
data.update(DEFAULT_PARAMETERS)
data.update(kwargs)
send_to_analytics(data)


@app.task(queue='web')
def increase_page_view_count(project_slug, version_slug, path):
"""Increase the page view count for the given project."""
project = Project.objects.get(slug=project_slug)

page_view, _ = PageView.objects.get_or_create(
project=project,
version=Version.objects.get(project=project, slug=version_slug),
path=path,
date=timezone.now().date(),
)
PageView.objects.filter(pk=page_view.pk).update(
view_count=F('view_count') + 1
)


@app.task(queue='web')
def delete_old_page_counts():
"""
Delete page counts older than 30 days.

This is intended to run from a periodic task daily.
"""
thirty_days_ago = timezone.now().date() - timezone.timedelta(days=30)
return PageView.objects.filter(date__lt=thirty_days_ago).delete()
89 changes: 88 additions & 1 deletion readthedocs/analytics/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# -*- coding: utf-8 -*-
from unittest import mock

from django_dynamic_fixture import get
from django.test import TestCase, RequestFactory
from django.utils import timezone

from readthedocs.builds.models import Version
from readthedocs.projects.models import Project

from .models import PageView
from .tasks import increase_page_view_count
from .utils import (
anonymize_ip_address,
anonymize_user_agent,
Expand Down Expand Up @@ -87,3 +95,82 @@ def test_get_client_ip_with_remote_addr(self):
request.META['REMOTE_ADDR'] = '203.0.113.195'
client_ip = get_client_ip(request)
self.assertEqual(client_ip, '203.0.113.195')


class AnalyticsTasksTests(TestCase):
def test_increase_page_view_count(self):
project = get(
Project,
slug='project-1',
)
version = get(Version, slug='1.8', project=project)

today = timezone.now()
tomorrow = timezone.now() + timezone.timedelta(days=1)
yesterday = timezone.now() - timezone.timedelta(days=1)

assert (
PageView.objects.all().count() == 0
), 'There\'s no PageView object created yet.'

# testing for yesterday
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = yesterday

increase_page_view_count(
project_slug=project.slug,
version_slug=version.slug,
path='index',
)

assert (
PageView.objects.all().count() == 1
), 'PageView object for path \'index\' is created'
assert (
PageView.objects.all().first().view_count == 1
), '\'index\' has 1 view'

increase_page_view_count(
project_slug=project.slug,
version_slug=version.slug,
path='index',
)

assert (
PageView.objects.all().count() == 1
), 'PageView object for path \'index\' is already created'
assert (
PageView.objects.all().first().view_count == 2
), '\'index\' has 2 views now'

# testing for today
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = today
increase_page_view_count(
project_slug=project.slug,
version_slug=version.slug,
path='index',
)

assert (
PageView.objects.all().count() == 2
), 'PageView object for path \'index\' is created for two days (yesterday and today)'
assert (
PageView.objects.all().order_by('-date').first().view_count == 1
), '\'index\' has 1 view today'

# testing for tomorrow
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = tomorrow
increase_page_view_count(
project_slug=project.slug,
version_slug=version.slug,
path='index',
)

assert (
PageView.objects.all().count() == 3
), 'PageView object for path \'index\' is created for three days (yesterday, today & tomorrow)'
assert (
PageView.objects.all().order_by('-date').first().view_count == 1
), '\'index\' has 1 view tomorrow'
10 changes: 10 additions & 0 deletions readthedocs/api/v2/views/footer_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
highest_version,
parse_version_failsafe,
)
from readthedocs.analytics.tasks import increase_page_view_count


def get_version_compare_data(project, base_version=None):
Expand Down Expand Up @@ -221,6 +222,15 @@ def get(self, request, format=None):
'version_supported': version.supported,
}

# increase the page view count for the given page
page_slug = request.GET.get('page', '')
if page_slug:
increase_page_view_count.delay(
project_slug=context['project'].slug,
version_slug=context['version'].slug,
path=page_slug
)

# Allow folks to hook onto the footer response for various information
# collection, or to modify the resp_data.
footer_response.send(
Expand Down
5 changes: 5 additions & 0 deletions readthedocs/projects/urls/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
IntegrationExchangeDetail,
IntegrationList,
IntegrationWebhookSync,
TrafficAnalyticsView,
ProjectAdvancedUpdate,
ProjectAdvertisingUpdate,
ProjectDashboard,
Expand Down Expand Up @@ -139,6 +140,10 @@
SearchAnalytics.as_view(),
name='projects_search_analytics',
),
url(
r'^(?P<project_slug>[-\w]+)/traffic-analytics/$',
TrafficAnalyticsView.as_view(), name='projects_traffic_analytics',
),
]

domain_urls = [
Expand Down
Loading