Skip to content

Store Pageviews in DB #6121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
May 19, 2020
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0998e8a
initial work
dojutsu-user Aug 28, 2019
3e8a99a
fix arguments
dojutsu-user Aug 29, 2019
f5c82cf
update migration file
dojutsu-user Aug 29, 2019
e1e954c
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Aug 30, 2019
9b03012
show top 10 viewed page to the users.
dojutsu-user Sep 3, 2019
152aff0
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Sep 6, 2019
f389476
initial work for showing graphs to the user
dojutsu-user Sep 8, 2019
1717cfc
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Sep 17, 2019
a19823f
show pageviews for a specific page
dojutsu-user Sep 18, 2019
af056c8
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 2, 2019
a1d0a9b
change view to class based view
dojutsu-user Oct 2, 2019
82b3182
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 3, 2019
415a3b3
fix lint
dojutsu-user Oct 3, 2019
2073a82
fix more lint
dojutsu-user Oct 5, 2019
b263c1b
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 14, 2019
3ec59dc
store page_slug instead of page_path
dojutsu-user Oct 15, 2019
108fd5f
little refactor
dojutsu-user Oct 15, 2019
ddc96d9
update test
dojutsu-user Oct 15, 2019
8da0f93
fix tests
dojutsu-user Oct 15, 2019
d0e1317
add test for search tasks
dojutsu-user Oct 16, 2019
3946398
use F expression
dojutsu-user Oct 17, 2019
b06d599
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 22, 2019
11eb6ac
fix tests
dojutsu-user Oct 22, 2019
95e7ee7
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 28, 2019
67d4885
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 31, 2019
9cb2310
Merge branch 'master' into search-pageviews-sorting
davidfischer May 7, 2020
3ce38c8
Index and refactor page view counting
davidfischer May 8, 2020
46747b0
Feedback updates and renames for clarity
davidfischer May 8, 2020
7e5a1f0
Merge branch 'master' into search-pageviews-sorting
davidfischer May 19, 2020
b530bbc
Move pageview models to the analytics app
davidfischer May 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions readthedocs/api/v2/views/footer_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
highest_version,
parse_version_failsafe,
)
from readthedocs.search.tasks import increase_page_view_count


def get_version_compare_data(project, base_version=None):
Expand Down Expand Up @@ -221,6 +222,15 @@ def get(self, request, format=None):
'version_supported': version.supported,
}

# increase the page view count for the given page
page_slug = request.GET.get('page', '')
if page_slug:
increase_page_view_count.delay(
project_slug=context['project'].slug,
version_slug=context['version'].slug,
path=page_slug
)

# Allow folks to hook onto the footer response for various information
# collection, or to modify the resp_data.
footer_response.send(
Expand Down
5 changes: 5 additions & 0 deletions readthedocs/projects/urls/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
IntegrationExchangeDetail,
IntegrationList,
IntegrationWebhookSync,
TrafficAnalyticsView,
ProjectAdvancedUpdate,
ProjectAdvertisingUpdate,
ProjectDashboard,
Expand Down Expand Up @@ -139,6 +140,10 @@
SearchAnalytics.as_view(),
name='projects_search_analytics',
),
url(
r'^(?P<project_slug>[-\w]+)/traffic-analytics/$',
TrafficAnalyticsView.as_view(), name='projects_traffic_analytics',
),
]

domain_urls = [
Expand Down
42 changes: 30 additions & 12 deletions readthedocs/projects/views/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from allauth.socialaccount.models import SocialAccount
from django.conf import settings
from django.contrib import messages
from django.contrib.auth.decorators import login_required
from django.db.models import Count
from django.http import (
Http404,
Expand Down Expand Up @@ -41,7 +40,6 @@
)
from readthedocs.core.mixins import (
ListViewWithForm,
LoginRequiredMixin,
PrivateViewMixin,
)
from readthedocs.core.utils import broadcast, trigger_build
Expand Down Expand Up @@ -83,7 +81,7 @@
ProjectImportMixin,
ProjectRelationListMixin,
)
from readthedocs.search.models import SearchQuery
from readthedocs.search.models import SearchQuery, PageView

from ..tasks import retry_domain_verification

Expand Down Expand Up @@ -977,15 +975,6 @@ class RegexAutomationRuleUpdate(RegexAutomationRuleMixin, UpdateView):
pass


@login_required
def search_analytics_view(request, project_slug):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea what this view is. It doesn't appear to be used or valid.

"""View for search analytics."""
project = get_object_or_404(
Project.objects.for_admin_user(request.user),
slug=project_slug,
)


class SearchAnalytics(ProjectAdminMixin, PrivateViewMixin, TemplateView):

template_name = 'projects/projects_search_analytics.html'
Expand Down Expand Up @@ -1063,3 +1052,32 @@ def _search_analytics_csv_data(self):
)
response['Content-Disposition'] = f'attachment; filename="{file_name}"'
return response


class TrafficAnalyticsView(ProjectAdminMixin, PrivateViewMixin, TemplateView):

template_name = 'projects/project_traffic_analytics.html'
http_method_names = ['get']

def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
project = self.get_project()

# Count of views for top pages over the month
top_pages = PageView.top_viewed_pages(project)
top_viewed_pages = zip(
top_pages['pages'],
top_pages['view_counts']
)

# Aggregate pageviews grouped by day
page_data = PageView.page_views_by_date(
project_slug=project.slug,
)

context.update({
'top_viewed_pages': top_viewed_pages,
'page_data': page_data,
})

return context
52 changes: 29 additions & 23 deletions readthedocs/rtd_tests/tests/test_footer.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,30 +378,36 @@ def render(self):

def test_version_queries(self):
# The number of Versions shouldn't impact the number of queries
with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.8.1')

for patch in range(3):
identifier = '0.99.{}'.format(patch)
self.pip.versions.create(
verbose_name=identifier,
identifier=identifier,
type=TAG,
active=True,
)

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.99.0')
with mock.patch('readthedocs.api.v2.views.footer_views.increase_page_view_count') as mocked:
mocked.side_effect = None

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.8.1')

for patch in range(3):
identifier = '0.99.{}'.format(patch)
self.pip.versions.create(
verbose_name=identifier,
identifier=identifier,
type=TAG,
active=True,
)

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.99.0')

def test_domain_queries(self):
# Setting up a custom domain shouldn't impact the number of queries
self.pip.domains.create(
domain='http://docs.foobar.com',
canonical=True,
)
with mock.patch('readthedocs.api.v2.views.footer_views.increase_page_view_count') as mocked:
mocked.side_effect = None

self.pip.domains.create(
domain='http://docs.foobar.com',
canonical=True,
)

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, 'docs.foobar.com')
with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, 'docs.foobar.com')
11 changes: 10 additions & 1 deletion readthedocs/search/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from django.contrib import admin

from .models import SearchQuery
from .models import SearchQuery, PageView


class SearchQueryAdmin(admin.ModelAdmin):
Expand All @@ -14,4 +14,13 @@ class SearchQueryAdmin(admin.ModelAdmin):
list_select_related = ('project', 'version', 'version__project')


class PageViewAdmin(admin.ModelAdmin):
raw_id_fields = ('project', 'version')
list_display = ('__str__', 'view_count')
search_fields = ('project__slug', 'version__slug', 'path')
readonly_fields = ('date',)
list_select_related = ('project', 'version', 'version__project')


admin.site.register(SearchQuery, SearchQueryAdmin)
admin.site.register(PageView, PageViewAdmin)
31 changes: 31 additions & 0 deletions readthedocs/search/migrations/0002_create_page_views_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Generated by Django 2.2.12 on 2020-05-07 23:32

import datetime
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('projects', '0048_remove_version_privacy_field'),
('builds', '0019_migrate_protected_versions_to_hidden'),
('search', '0001_initial'),
]

operations = [
migrations.CreateModel(
name='PageView',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('path', models.CharField(max_length=4096)),
('view_count', models.PositiveIntegerField(default=0)),
('date', models.DateField(db_index=True, default=datetime.date.today)),
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='projects.Project')),
('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='builds.Version', verbose_name='Version')),
],
options={
'unique_together': {('project', 'version', 'path', 'date')},
},
),
]
124 changes: 116 additions & 8 deletions readthedocs/search/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Search Queries."""

import datetime

from django.db import models
from django.db.models import Count
from django.db.models import Count, Sum
from django.db.models.functions import TruncDate
from django.utils import timezone
from django.utils.translation import ugettext_lazy as _
Expand All @@ -11,6 +13,7 @@
from readthedocs.builds.models import Version
from readthedocs.projects.models import Project
from readthedocs.projects.querysets import RelatedProjectQuerySet
from readthedocs.search.utils import _last_30_days_iter


class SearchQuery(TimeStampedModel):
Expand Down Expand Up @@ -58,9 +61,6 @@ def generate_queries_count_of_one_month(cls, project_slug):
today = timezone.now().date()
last_30th_day = timezone.now().date() - timezone.timedelta(days=30)

# this includes the current day also
last_31_days_iter = [last_30th_day + timezone.timedelta(days=n) for n in range(31)]

qs = cls.objects.filter(
project__slug=project_slug,
created__date__lte=today,
Expand All @@ -77,17 +77,125 @@ def generate_queries_count_of_one_month(cls, project_slug):
.values_list('created_date', 'count')
)

count_data = [count_dict.get(date) or 0 for date in last_31_days_iter]
count_data = [count_dict.get(date) or 0 for date in _last_30_days_iter()]

# format the date value to a more readable form
# Eg. `16 Jul`
last_30_days_str = [
timezone.datetime.strftime(date, '%d %b')
for date in _last_30_days_iter()
]

final_data = {
'labels': last_30_days_str,
'int_data': count_data,
}

return final_data


class PageView(models.Model):
project = models.ForeignKey(
Project,
related_name='page_views',
on_delete=models.CASCADE,
)
version = models.ForeignKey(
Version,
verbose_name=_('Version'),
related_name='page_views',
on_delete=models.CASCADE,
)
path = models.CharField(max_length=4096)
view_count = models.PositiveIntegerField(default=0)
date = models.DateField(default=datetime.date.today, db_index=True)

class Meta:
unique_together = ("project", "version", "path", "date")

def __str__(self):
return f'PageView: [{self.project.slug}:{self.version.slug}] - {self.path} for {self.date}'

@classmethod
def top_viewed_pages(cls, project, since=None):
"""
Returns top 10 pages according to view counts.

Structure of returned data is compatible to make graphs.
Sample returned data::
{
'pages': ['index', 'config-file/v1', 'intro/import-guide'],
'view_counts': [150, 120, 100]
}
This data shows that `index` is the most viewed page having 150 total views,
followed by `config-file/v1` and `intro/import-guide` having 120 and
100 total page views respectively.
"""
if since is None:
since = timezone.now().date() - timezone.timedelta(days=30)

qs = (
cls.objects
.filter(project=project, date__gte=since)
.values_list('path')
.annotate(total_views=Sum('view_count'))
.values_list('path', 'total_views')
.order_by('-total_views')[:10]
)

pages = []
view_counts = []

for data in qs.iterator():
pages.append(data[0])
view_counts.append(data[1])

final_data = {
'pages': pages,
'view_counts': view_counts,
}

return final_data

@classmethod
def page_views_by_date(cls, project_slug, since=None):
"""
Returns the total page views count for last 30 days for a particular project.

Structure of returned data is compatible to make graphs.
Sample returned data::
{
'labels': ['01 Jul', '02 Jul', '03 Jul'],
'int_data': [150, 200, 143]
}
This data shows that there were 150 page views on 01 July,
200 page views on 02 July and 143 page views on 03 July.
"""
if since is None:
since = timezone.now().date() - timezone.timedelta(days=30)

qs = cls.objects.filter(
project__slug=project_slug,
date__gt=since,
).values('date').annotate(total_views=Sum('view_count')).order_by('date')

count_dict = dict(
qs.order_by('date').values_list('date', 'total_views')
)

# This fills in any dates where there is no data
# to make sure we have a full 30 days of dates
count_data = [count_dict.get(date) or 0 for date in _last_30_days_iter()]

# format the date value to a more readable form
# Eg. `16 Jul`
last_31_days_str = [
last_30_days_str = [
timezone.datetime.strftime(date, '%d %b')
for date in last_31_days_iter
for date in _last_30_days_iter()
]

final_data = {
'labels': last_31_days_str,
'labels': last_30_days_str,
'int_data': count_data,
}

Expand Down
Loading