Skip to content

Page views: use origin URL instead of page name #7293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions readthedocs/analytics/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from django.utils import timezone

from readthedocs.api.v2.signals import footer_response
from readthedocs.core.unresolver import unresolve
from readthedocs.projects.models import Feature

from .models import PageView
Expand All @@ -17,16 +18,22 @@ def increase_page_view_count(sender, **kwargs):

project = context['project']
version = context['version']
# footer_response sends an empty path for the index
path = context['path'] or '/'

if project.has_feature(Feature.STORE_PAGEVIEWS):
page_view, _ = PageView.objects.get_or_create(
project=project,
version=version,
path=path,
date=timezone.now().date(),
)
PageView.objects.filter(pk=page_view.pk).update(
view_count=F('view_count') + 1
)
origin = kwargs['origin']

if not origin or not project.has_feature(Feature.STORE_PAGEVIEWS):
return

unresolved = unresolve(origin)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this is the best approach right now. I think this is going to add a good number of queries to the footer, which is already our most expensive view. If we can just pull the path off the request, I think that is probably best, but maybe calling unresolve is correct, and we should focus on optimizations there?

path = unresolved.filename
if path.endswith('/'):
path += 'index.html'

page_view, _ = PageView.objects.get_or_create(
project=project,
version=version,
path=path,
date=timezone.now().date(),
)
PageView.objects.filter(pk=page_view.pk).update(
view_count=F('view_count') + 1
)
48 changes: 25 additions & 23 deletions readthedocs/analytics/tests.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
from unittest import mock

from django_dynamic_fixture import get
from django.test import TestCase, RequestFactory
import pytest
from django.test import RequestFactory, TestCase, override_settings
from django.utils import timezone
from django_dynamic_fixture import get

from readthedocs.builds.models import Version
from readthedocs.projects.models import Project, Feature
from readthedocs.projects.models import Feature, Project

from .models import PageView
from .signals import increase_page_view_count
from .utils import (
anonymize_ip_address,
anonymize_user_agent,
get_client_ip,
)
from .utils import anonymize_ip_address, anonymize_user_agent, get_client_ip


class UtilsTests(TestCase):
Expand Down Expand Up @@ -97,14 +94,17 @@ def test_get_client_ip_with_remote_addr(self):
self.assertEqual(client_ip, '203.0.113.195')


class AnalyticsTasksTests(TestCase):
@pytest.mark.proxito
@override_settings(PUBLIC_DOMAIN='readthedocs.io')
class AnalyticsPageViewsTests(TestCase):

def test_increase_page_view_count(self):
project = get(
Project,
slug='project-1',
)
version = get(Version, slug='1.8', project=project)
path = "index"
origin = f"https://{project.slug}.readthedocs.io/en/latest/index.html"

today = timezone.now()
tomorrow = timezone.now() + timezone.timedelta(days=1)
Expand All @@ -117,11 +117,10 @@ def test_increase_page_view_count(self):
context = {
"project": project,
"version": version,
"path": path,
}

# Without the feature flag, no PageView is created
increase_page_view_count(None, context=context)
increase_page_view_count(None, context=context, origin=origin)
assert (
PageView.objects.all().count() == 0
)
Expand All @@ -135,44 +134,47 @@ def test_increase_page_view_count(self):
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = yesterday

increase_page_view_count(None, context=context)
increase_page_view_count(None, context=context, origin=origin)

assert (
PageView.objects.all().count() == 1
), f'PageView object for path \'{path}\' is created'
), f'PageView object for path \'{origin}\' is created'
assert (
PageView.objects.all().first().view_count == 1
), '\'index\' has 1 view'

increase_page_view_count(None, context=context)
increase_page_view_count(None, context=context, origin=origin)

assert (
PageView.objects.all().count() == 1
), f'PageView object for path \'{path}\' is already created'
), f'PageView object for path \'{origin}\' is already created'
assert PageView.objects.filter(path='index.html').count() == 1
assert (
PageView.objects.all().first().view_count == 2
), f'\'{path}\' has 2 views now'
), f'\'{origin}\' has 2 views now'

# testing for today
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = today
increase_page_view_count(None, context=context)
increase_page_view_count(None, context=context, origin=origin)

assert (
PageView.objects.all().count() == 2
), f'PageView object for path \'{path}\' is created for two days (yesterday and today)'
), f'PageView object for path \'{origin}\' is created for two days (yesterday and today)'
assert PageView.objects.filter(path='index.html').count() == 2
assert (
PageView.objects.all().order_by('-date').first().view_count == 1
), f'\'{path}\' has 1 view today'
), f'\'{origin}\' has 1 view today'

# testing for tomorrow
with mock.patch('readthedocs.analytics.tasks.timezone.now') as mocked_timezone:
mocked_timezone.return_value = tomorrow
increase_page_view_count(None, context=context)
increase_page_view_count(None, context=context, origin=origin)

assert (
PageView.objects.all().count() == 3
), f'PageView object for path \'{path}\' is created for three days (yesterday, today & tomorrow)'
), f'PageView object for path \'{origin}\' is created for three days (yesterday, today & tomorrow)'
assert PageView.objects.filter(path='index.html').count() == 3
assert (
PageView.objects.all().order_by('-date').first().view_count == 1
), f'\'{path}\' has 1 view tomorrow'
), f'\'{origin}\' has 1 view tomorrow'
16 changes: 15 additions & 1 deletion readthedocs/api/v2/views/footer_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from readthedocs.builds.models import Version
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.projects.constants import MKDOCS, SPHINX_HTMLDIR
from readthedocs.projects.models import Project, Feature
from readthedocs.projects.models import Feature, Project
from readthedocs.projects.version_handling import (
highest_version,
parse_version_failsafe,
Expand Down Expand Up @@ -81,6 +81,19 @@ class BaseFooterHTML(APIView):
"""
Render and return footer markup.

Query parameters:

- project
- version
- page: Sphinx's page name, used for path operations,
like change between languages (deprecated in favor of ``origin``).
- origin: Full path with domain, used for path operations.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we call it full_path?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or absolute_uri as Django calls it when it has the domain on it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm avoiding naming this path since it includes the domain as well, we use full path to refer to a path in other parts of the code. Not sure about uri

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, origin isn't the right name, since that's used in HTTP for just the origin hostname: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Origin

I think absolute_uri or similar is best: https://docs.djangoproject.com/en/3.1/ref/request-response/#django.http.HttpRequest.build_absolute_uri

- theme: Used to decide how to integrate the flyout menu.
- docroot: Path where all the source documents are.
Used to build the ``edit_on`` URL.
- source_suffix: Suffix from the source document.
Used to build the ``edit_on`` URL.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are passing subproject too, but we are not using it. I can remove it in another PR if that's ok


.. note::

The methods `_get_project` and `_get_version`
Expand Down Expand Up @@ -229,6 +242,7 @@ def get(self, request, format=None):
request=request,
context=context,
resp_data=resp_data,
origin=self.request.GET.get('origin'),
)

return Response(resp_data)
Expand Down
1 change: 1 addition & 0 deletions readthedocs/core/static-src/core/js/doc-embed/footer.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function init() {
project: rtd['project'],
version: rtd['version'],
page: rtd['page'],
origin: window.location.href,
theme: rtd.get_theme_name(),
format: "jsonp",
};
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/core/static/core/js/readthedocs-doc-embed.js

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion readthedocs/rtd_tests/tests/test_footer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from unittest import mock

from django.contrib.sessions.backends.base import SessionBase
from django.test import TestCase
from django.test.utils import override_settings
Expand Down Expand Up @@ -418,7 +419,7 @@ class TestFooterPerformance(APITestCase):

# The expected number of queries for generating the footer
# This shouldn't increase unless we modify the footer API
EXPECTED_QUERIES = 14
EXPECTED_QUERIES = 13
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have one query less bc we are not sending the origin parameter, so it defaults to false before doing a query for the feature flag.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should test this with an origin. I expect the unresolve method to actually add a few queries, which we should be careful about :/


def setUp(self):
self.pip = Project.objects.get(slug='pip')
Expand Down