Skip to content

Store Pageviews in DB #6121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
May 19, 2020
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0998e8a
initial work
dojutsu-user Aug 28, 2019
3e8a99a
fix arguments
dojutsu-user Aug 29, 2019
f5c82cf
update migration file
dojutsu-user Aug 29, 2019
e1e954c
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Aug 30, 2019
9b03012
show top 10 viewed page to the users.
dojutsu-user Sep 3, 2019
152aff0
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Sep 6, 2019
f389476
initial work for showing graphs to the user
dojutsu-user Sep 8, 2019
1717cfc
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Sep 17, 2019
a19823f
show pageviews for a specific page
dojutsu-user Sep 18, 2019
af056c8
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 2, 2019
a1d0a9b
change view to class based view
dojutsu-user Oct 2, 2019
82b3182
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 3, 2019
415a3b3
fix lint
dojutsu-user Oct 3, 2019
2073a82
fix more lint
dojutsu-user Oct 5, 2019
b263c1b
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 14, 2019
3ec59dc
store page_slug instead of page_path
dojutsu-user Oct 15, 2019
108fd5f
little refactor
dojutsu-user Oct 15, 2019
ddc96d9
update test
dojutsu-user Oct 15, 2019
8da0f93
fix tests
dojutsu-user Oct 15, 2019
d0e1317
add test for search tasks
dojutsu-user Oct 16, 2019
3946398
use F expression
dojutsu-user Oct 17, 2019
b06d599
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 22, 2019
11eb6ac
fix tests
dojutsu-user Oct 22, 2019
95e7ee7
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 28, 2019
67d4885
Merge branch 'master' into search-pageviews-sorting
dojutsu-user Oct 31, 2019
9cb2310
Merge branch 'master' into search-pageviews-sorting
davidfischer May 7, 2020
3ce38c8
Index and refactor page view counting
davidfischer May 8, 2020
46747b0
Feedback updates and renames for clarity
davidfischer May 8, 2020
7e5a1f0
Merge branch 'master' into search-pageviews-sorting
davidfischer May 19, 2020
b530bbc
Move pageview models to the analytics app
davidfischer May 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions readthedocs/api/v2/views/footer_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
highest_version,
parse_version_failsafe,
)
from readthedocs.search.tasks import increase_page_view_count


def get_version_compare_data(project, base_version=None):
Expand Down Expand Up @@ -210,6 +211,14 @@ def get(self, request, format=None):
'version_supported': version.supported,
}

# increase the page view count
page_slug = request.GET.get('page', 'index')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should likely not default to index, no? If we don't have a page, we probably shouldn't be storing anything, or some other string like rtd-unknown-page, not index.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't seen any real examples where the page parameter is blank. It is index for the root and even with the dirhtml builder it uses the subdirectory name. I'll add a simple if page_slug check.

increase_page_view_count.delay(
project_slug=context['project'].slug,
version_slug=context['version'].slug,
path=page_slug
)

# Allow folks to hook onto the footer response for various information
# collection, or to modify the resp_data.
footer_response.send(
Expand Down
5 changes: 5 additions & 0 deletions readthedocs/projects/urls/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
IntegrationExchangeDetail,
IntegrationList,
IntegrationWebhookSync,
PageViewAdmin,
ProjectAdvancedUpdate,
ProjectAdvertisingUpdate,
ProjectDashboard,
Expand Down Expand Up @@ -134,6 +135,10 @@
SearchAnalytics.as_view(),
name='projects_search_analytics',
),
url(
r'^(?P<project_slug>[-\w]+)/page-views/$',
PageViewAdmin.as_view(), name='projects_page_views',
),
]

domain_urls = [
Expand Down
40 changes: 39 additions & 1 deletion readthedocs/projects/views/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
from readthedocs.projects.utils import Echo
from readthedocs.projects.views.base import ProjectAdminMixin, ProjectSpamMixin
from readthedocs.projects.views.mixins import ProjectImportMixin
from readthedocs.search.models import SearchQuery
from readthedocs.search.models import SearchQuery, PageView

from ..tasks import retry_domain_verification

Expand Down Expand Up @@ -1002,3 +1002,41 @@ def _search_analytics_csv_data(self):
)
response['Content-Disposition'] = f'attachment; filename="{file_name}"'
return response


class PageViewAdmin(ProjectAdminMixin, PrivateViewMixin, TemplateView):

template_name = 'projects/project_page_views.html'
http_method_names = ['get']

def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
project = self.get_project()

top_viewed_pages = PageView.get_top_viewed_pages(project)
top_viewed_pages_iter = zip(
top_viewed_pages['pages'],
top_viewed_pages['view_counts']
)

all_pages = PageView.objects.filter(project=project).values_list('path', flat=True)
if all_pages.exists():
all_pages = sorted(list(set(all_pages)))
page_path = self.request.GET.get('page', all_pages[0])
else:
all_pages = []
page_path = ''

page_data = PageView.get_page_view_count_of_one_month(
project_slug=project.slug,
page_path=page_path
)

context.update({
'top_viewed_pages_iter': top_viewed_pages_iter,
'page_data': page_data,
'page_path': page_path,
'all_pages': all_pages,
})

return context
56 changes: 31 additions & 25 deletions readthedocs/rtd_tests/tests/test_footer.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@ class TestFooterPerformance(APITestCase):
url = '/api/v2/footer_html/?project=pip&version=latest&page=index&docroot=/'
factory = APIRequestFactory()

# The expected number of queries for generating the footer
# This shouldn't increase unless we modify the footer API
# The expected number of queries for generating the footer.
# This shouldn't increase unless we modify the footer API.
EXPECTED_QUERIES = 9

def setUp(self):
Expand All @@ -249,30 +249,36 @@ def render(self):

def test_version_queries(self):
# The number of Versions shouldn't impact the number of queries
with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.8.1')

for patch in range(3):
identifier = '0.99.{}'.format(patch)
self.pip.versions.create(
verbose_name=identifier,
identifier=identifier,
type=TAG,
active=True,
)

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.99.0')
with mock.patch('readthedocs.api.v2.views.footer_views.increase_page_view_count') as mocked:
mocked.side_effect = None

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.8.1')

for patch in range(3):
identifier = '0.99.{}'.format(patch)
self.pip.versions.create(
verbose_name=identifier,
identifier=identifier,
type=TAG,
active=True,
)

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, '0.99.0')

def test_domain_queries(self):
# Setting up a custom domain shouldn't impact the number of queries
self.pip.domains.create(
domain='http://docs.foobar.com',
canonical=True,
)
with mock.patch('readthedocs.api.v2.views.footer_views.increase_page_view_count') as mocked:
mocked.side_effect = None

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, 'docs.foobar.com')
self.pip.domains.create(
domain='http://docs.foobar.com',
canonical=True,
)

with self.assertNumQueries(self.EXPECTED_QUERIES):
response = self.render()
self.assertContains(response, 'docs.foobar.com')
11 changes: 10 additions & 1 deletion readthedocs/search/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from django.contrib import admin

from .models import SearchQuery
from .models import SearchQuery, PageView


class SearchQueryAdmin(admin.ModelAdmin):
Expand All @@ -14,4 +14,13 @@ class SearchQueryAdmin(admin.ModelAdmin):
list_select_related = ('project', 'version', 'version__project')


class PageViewAdmin(admin.ModelAdmin):
raw_id_fields = ('project', 'version')
list_display = ('__str__', 'view_count')
search_fields = ('project__slug', 'version__slug', 'path')
readonly_fields = ('date',)
list_select_related = ('project', 'version', 'version__project')


admin.site.register(SearchQuery, SearchQueryAdmin)
admin.site.register(PageView, PageViewAdmin)
29 changes: 29 additions & 0 deletions readthedocs/search/migrations/0002_create_page_views_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.24 on 2019-09-18 06:11
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('builds', '0010_add-description-field-to-automation-rule'),
('projects', '0044_auto_20190703_1300'),
('search', '0001_initial'),
]

operations = [
migrations.CreateModel(
name='PageView',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('path', models.CharField(max_length=4096)),
('view_count', models.PositiveIntegerField(default=1)),
('date', models.DateField()),
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='projects.Project')),
('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='page_views', to='builds.Version', verbose_name='Version')),
],
),
]
112 changes: 106 additions & 6 deletions readthedocs/search/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Search Queries."""

from django.db import models
from django.db.models import Count
from django.db.models import Count, Sum
from django.db.models.functions import TruncDate
from django.utils import timezone
from django.utils.translation import ugettext_lazy as _
Expand All @@ -11,6 +11,7 @@
from readthedocs.builds.models import Version
from readthedocs.projects.models import Project
from readthedocs.projects.querysets import RelatedProjectQuerySet
from readthedocs.search.utils import _get_last_31_days_iter, _get_last_31_days_str


class SearchQuery(TimeStampedModel):
Expand Down Expand Up @@ -59,7 +60,7 @@ def generate_queries_count_of_one_month(cls, project_slug):
last_30th_day = timezone.now().date() - timezone.timedelta(days=30)

# this includes the current day also
last_31_days_iter = [last_30th_day + timezone.timedelta(days=n) for n in range(31)]
last_31_days_iter = _get_last_31_days_iter()

qs = cls.objects.filter(
project__slug=project_slug,
Expand All @@ -81,10 +82,109 @@ def generate_queries_count_of_one_month(cls, project_slug):

# format the date value to a more readable form
# Eg. `16 Jul`
last_31_days_str = [
timezone.datetime.strftime(date, '%d %b')
for date in last_31_days_iter
]
last_31_days_str = _get_last_31_days_str(date_format='%d %b')

final_data = {
'labels': last_31_days_str,
'int_data': count_data,
}

return final_data


class PageView(models.Model):
project = models.ForeignKey(
Project,
related_name='page_views',
on_delete=models.CASCADE,
)
version = models.ForeignKey(
Version,
verbose_name=_('Version'),
related_name='page_views',
on_delete=models.CASCADE,
)
path = models.CharField(max_length=4096)
view_count = models.PositiveIntegerField(default=1)
date = models.DateField()

def __str__(self):
return f'[{self.project.slug}:{self.version.slug}]: {self.path}'

@classmethod
def get_top_viewed_pages(cls, project):
"""
Returns top 10 pages according to view counts.

Structure of returned data is compatible to make graphs.
Sample returned data::
{
'pages': ['index', 'config-file/v1', 'intro/import-guide'],
'view_counts': [150, 120, 100]
}
This data shows that `index` is the most viewed page having 150 total views,
followed by `config-file/v1` and `intro/import-guide` having 120 and
100 total page views respectively.
"""
qs = (
cls.objects
.filter(project=project)
.values_list('path')
.annotate(total_views=Sum('view_count'))
.values_list('path', 'total_views')
.order_by('-total_views')[:10]
)

pages = []
view_counts = []

for data in qs.iterator():
pages.append(data[0])
view_counts.append(data[1])

final_data = {
'pages': pages,
'view_counts': view_counts,
}

return final_data

@classmethod
def get_page_view_count_of_one_month(cls, project_slug, page_path):
"""
Returns the total page views count for last 30 days for a particular `page_path`.

Structure of returned data is compatible to make graphs.
Sample returned data::
{
'labels': ['01 Jul', '02 Jul', '03 Jul'],
'int_data': [150, 200, 143]
}
This data shows that there were 150 page views on 01 July,
200 page views on 02 July and 143 page views on 03 July for a particular `page_path`.
"""
today = timezone.now().date()
last_30th_day = timezone.now().date() - timezone.timedelta(days=30)

# this includes the current day also
last_31_days_iter = _get_last_31_days_iter()

qs = cls.objects.filter(
project__slug=project_slug,
path=page_path,
).order_by('-date')

count_dict = dict(
qs.values('date')
.order_by('date')
.values_list('date', 'view_count')
)

count_data = [count_dict.get(date) or 0 for date in last_31_days_iter]

# format the date value to a more readable form
# Eg. `16 Jul`
last_31_days_str = _get_last_31_days_str(date_format='%d %b')

final_data = {
'labels': last_31_days_str,
Expand Down
26 changes: 25 additions & 1 deletion readthedocs/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from readthedocs.builds.models import Version
from readthedocs.projects.models import Project
from readthedocs.search.models import SearchQuery
from readthedocs.search.models import SearchQuery, PageView
from readthedocs.worker import app
from .utils import _get_index, _get_document

Expand Down Expand Up @@ -212,3 +212,27 @@ def record_search_query(project_slug, version_slug, query, total_results, time_s
)
obj.created = time
obj.save()


@app.task(queue='web')
def increase_page_view_count(project_slug, version_slug, path):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if running this as a task is actually a good deal slower then running it in the footer itself? We already have the Project & Version object in the footer, and not we're adding additional queries for them here. They should be fast queries, but definitely additional load.

I guess the proper solution is to cache these lookups, which we can do later if it's an issue.

today_date = timezone.now().date()
page_view_obj = PageView.objects.filter(
project__slug=project_slug,
version__slug=version_slug,
path=path,
date=today_date,
).first()

if page_view_obj:
page_view_obj.view_count += 1
page_view_obj.save()
else:
project = Project.objects.get(slug=project_slug)
version = Version.objects.get(project=project, slug=version_slug)
PageView.objects.create(
project=project,
version=version,
path=path,
date=today_date,
)
Loading