Skip to content

Commit 55b5624

Browse files
authored
Search: move api.py into a module (#9616)
1 parent c31899b commit 55b5624

File tree

11 files changed

+182
-174
lines changed

11 files changed

+182
-174
lines changed

readthedocs/api/v2/proxied_urls.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from readthedocs.analytics.proxied_api import AnalyticsView
1111
from readthedocs.api.v2.views.proxied import ProxiedEmbedAPI, ProxiedFooterHTML
12-
from readthedocs.search.proxied_api import ProxiedPageSearchAPIView
12+
from readthedocs.search.api.v2.views import ProxiedPageSearchAPIView
1313

1414
api_footer_urls = [
1515
re_path(r'footer_html/', ProxiedFooterHTML.as_view(), name='footer_html'),

readthedocs/search/api/__init__.py

Whitespace-only changes.

readthedocs/search/api/pagination.py

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
from collections import namedtuple
2+
from math import ceil
3+
4+
from django.utils.translation import gettext as _
5+
from rest_framework.exceptions import NotFound
6+
from rest_framework.pagination import PageNumberPagination
7+
8+
9+
class PaginatorPage:
10+
11+
"""
12+
Mimics the result from a paginator.
13+
14+
By using this class, we avoid having to override a lot of methods
15+
of `PageNumberPagination` to make it work with the ES DSL object.
16+
"""
17+
18+
def __init__(self, page_number, total_pages, count):
19+
self.number = page_number
20+
Paginator = namedtuple("Paginator", ["num_pages", "count"])
21+
self.paginator = Paginator(total_pages, count)
22+
23+
def has_next(self):
24+
return self.number < self.paginator.num_pages
25+
26+
def has_previous(self):
27+
return self.number > 1
28+
29+
def next_page_number(self):
30+
return self.number + 1
31+
32+
def previous_page_number(self):
33+
return self.number - 1
34+
35+
36+
class SearchPagination(PageNumberPagination):
37+
38+
"""Paginator for the results of PageSearch."""
39+
40+
page_size = 50
41+
page_size_query_param = "page_size"
42+
max_page_size = 100
43+
44+
def _get_page_number(self, number):
45+
try:
46+
if isinstance(number, float) and not number.is_integer():
47+
raise ValueError
48+
number = int(number)
49+
except (TypeError, ValueError):
50+
number = -1
51+
return number
52+
53+
def paginate_queryset(self, queryset, request, view=None):
54+
"""
55+
Override to get the paginated result from the ES queryset.
56+
57+
This makes use of our custom paginator and slicing support from the ES DSL object,
58+
instead of the one used by django's ORM.
59+
60+
Mostly inspired by https://github.com/encode/django-rest-framework/blob/acbd9d8222e763c7f9c7dc2de23c430c702e06d4/rest_framework/pagination.py#L191 # noqa
61+
"""
62+
# Needed for other methods of this class.
63+
self.request = request
64+
65+
page_size = self.get_page_size(request)
66+
page_number = request.query_params.get(self.page_query_param, 1)
67+
68+
original_page_number = page_number
69+
page_number = self._get_page_number(page_number)
70+
71+
if page_number <= 0:
72+
msg = self.invalid_page_message.format(
73+
page_number=original_page_number,
74+
message=_("Invalid page"),
75+
)
76+
raise NotFound(msg)
77+
78+
start = (page_number - 1) * page_size
79+
end = page_number * page_size
80+
81+
result = []
82+
total_count = 0
83+
total_pages = 1
84+
85+
if queryset:
86+
result = queryset[start:end].execute()
87+
total_count = result.hits.total["value"]
88+
hits = max(1, total_count)
89+
total_pages = ceil(hits / page_size)
90+
91+
if total_pages > 1 and self.template is not None:
92+
# The browsable API should display pagination controls.
93+
self.display_page_controls = True
94+
95+
# Needed for other methods of this class.
96+
self.page = PaginatorPage(
97+
page_number=page_number,
98+
total_pages=total_pages,
99+
count=total_count,
100+
)
101+
102+
return result

readthedocs/search/api/v2/__init__.py

Whitespace-only changes.

readthedocs/search/serializers.py renamed to readthedocs/search/api/v2/serializers.py

+36-37
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
from readthedocs.projects.models import Project
1919

2020
# Structures used for storing cached data of a version mostly.
21-
ProjectData = namedtuple('ProjectData', ['version', 'alias'])
22-
VersionData = namedtuple('VersionData', ['slug', 'docs_url'])
21+
ProjectData = namedtuple("ProjectData", ["version", "alias"])
22+
VersionData = namedtuple("VersionData", ["slug", "docs_url"])
2323

2424

2525
class ProjectHighlightSerializer(serializers.Serializer):
@@ -29,31 +29,31 @@ class ProjectHighlightSerializer(serializers.Serializer):
2929
description = serializers.SerializerMethodField()
3030

3131
def get_name(self, obj):
32-
return list(getattr(obj, 'name', []))
32+
return list(getattr(obj, "name", []))
3333

3434
def get_slug(self, obj):
35-
return list(getattr(obj, 'slug', []))
35+
return list(getattr(obj, "slug", []))
3636

3737
def get_description(self, obj):
38-
return list(getattr(obj, 'description', []))
38+
return list(getattr(obj, "description", []))
3939

4040

4141
class ProjectSearchSerializer(serializers.Serializer):
4242

43-
type = serializers.CharField(default='project', source=None, read_only=True)
43+
type = serializers.CharField(default="project", source=None, read_only=True)
4444
name = serializers.CharField()
4545
slug = serializers.CharField()
46-
link = serializers.CharField(source='url')
46+
link = serializers.CharField(source="url")
4747
description = serializers.CharField()
48-
highlights = ProjectHighlightSerializer(source='meta.highlight', default=dict)
48+
highlights = ProjectHighlightSerializer(source="meta.highlight", default=dict)
4949

5050

5151
class PageHighlightSerializer(serializers.Serializer):
5252

5353
title = serializers.SerializerMethodField()
5454

5555
def get_title(self, obj):
56-
return list(getattr(obj, 'title', []))
56+
return list(getattr(obj, "title", []))
5757

5858

5959
class PageSearchSerializer(serializers.Serializer):
@@ -66,14 +66,14 @@ class PageSearchSerializer(serializers.Serializer):
6666
It's a dictionary mapping the project slug to a ProjectData object.
6767
"""
6868

69-
type = serializers.CharField(default='page', source=None, read_only=True)
69+
type = serializers.CharField(default="page", source=None, read_only=True)
7070
project = serializers.CharField()
7171
project_alias = serializers.SerializerMethodField()
7272
version = serializers.CharField()
7373
title = serializers.CharField()
7474
path = serializers.SerializerMethodField()
7575
domain = serializers.SerializerMethodField()
76-
highlights = PageHighlightSerializer(source='meta.highlight', default=dict)
76+
highlights = PageHighlightSerializer(source="meta.highlight", default=dict)
7777
blocks = serializers.SerializerMethodField()
7878

7979
def _get_project_data(self, obj):
@@ -85,16 +85,18 @@ def _get_project_data(self, obj):
8585
If the result is fetched from the database,
8686
it's cached into ``projects_data``.
8787
"""
88-
project_data = self.context.get('projects_data', {}).get(obj.project)
88+
project_data = self.context.get("projects_data", {}).get(obj.project)
8989
if project_data:
9090
return project_data
9191

9292
project = Project.objects.filter(slug=obj.project).first()
9393
if project:
9494
docs_url = project.get_docs_url(version_slug=obj.version)
95-
project_alias = project.superprojects.values_list('alias', flat=True).first()
95+
project_alias = project.superprojects.values_list(
96+
"alias", flat=True
97+
).first()
9698

97-
projects_data = self.context.setdefault('projects_data', {})
99+
projects_data = self.context.setdefault("projects_data", {})
98100
version_data = VersionData(
99101
slug=obj.version,
100102
docs_url=docs_url,
@@ -116,7 +118,7 @@ def get_domain(self, obj):
116118
full_path = self._get_full_path(obj)
117119
if full_path:
118120
parsed = urlparse(full_path)
119-
return f'{parsed.scheme}://{parsed.netloc}'
121+
return f"{parsed.scheme}://{parsed.netloc}"
120122
return None
121123

122124
def get_path(self, obj):
@@ -136,16 +138,16 @@ def _get_full_path(self, obj):
136138
# and always end it with / so it goes directly to proxito.
137139
# For a generic doctype we just strip the index.html part if it exists.
138140
if obj.doctype in {SPHINX_HTMLDIR, MKDOCS, GENERIC}:
139-
path = re.sub('(^|/)index.html$', '/', path)
141+
path = re.sub("(^|/)index.html$", "/", path)
140142

141-
return docs_url.rstrip('/') + '/' + path.lstrip('/')
143+
return docs_url.rstrip("/") + "/" + path.lstrip("/")
142144
return None
143145

144146
def get_blocks(self, obj):
145147
"""Combine and sort inner results (domains and sections)."""
146148
serializers = {
147-
'domain': DomainSearchSerializer,
148-
'section': SectionSearchSerializer,
149+
"domain": DomainSearchSerializer,
150+
"section": SectionSearchSerializer,
149151
}
150152

151153
inner_hits = obj.meta.inner_hits
@@ -154,19 +156,16 @@ def get_blocks(self, obj):
154156

155157
# Make them identifiable before merging them
156158
for s in sections:
157-
s.type = 'section'
159+
s.type = "section"
158160
for d in domains:
159-
d.type = 'domain'
161+
d.type = "domain"
160162

161163
sorted_results = sorted(
162164
itertools.chain(sections, domains),
163-
key=attrgetter('meta.score'),
165+
key=attrgetter("meta.score"),
164166
reverse=True,
165167
)
166-
sorted_results = [
167-
serializers[hit.type](hit).data
168-
for hit in sorted_results
169-
]
168+
sorted_results = [serializers[hit.type](hit).data for hit in sorted_results]
170169
return sorted_results
171170

172171

@@ -176,20 +175,20 @@ class DomainHighlightSerializer(serializers.Serializer):
176175
content = serializers.SerializerMethodField()
177176

178177
def get_name(self, obj):
179-
return list(getattr(obj, 'domains.name', []))
178+
return list(getattr(obj, "domains.name", []))
180179

181180
def get_content(self, obj):
182-
return list(getattr(obj, 'domains.docstrings', []))
181+
return list(getattr(obj, "domains.docstrings", []))
183182

184183

185184
class DomainSearchSerializer(serializers.Serializer):
186185

187-
type = serializers.CharField(default='domain', source=None, read_only=True)
188-
role = serializers.CharField(source='role_name')
186+
type = serializers.CharField(default="domain", source=None, read_only=True)
187+
role = serializers.CharField(source="role_name")
189188
name = serializers.CharField()
190-
id = serializers.CharField(source='anchor')
191-
content = serializers.CharField(source='docstrings')
192-
highlights = DomainHighlightSerializer(source='meta.highlight', default=dict)
189+
id = serializers.CharField(source="anchor")
190+
content = serializers.CharField(source="docstrings")
191+
highlights = DomainHighlightSerializer(source="meta.highlight", default=dict)
193192

194193

195194
class SectionHighlightSerializer(serializers.Serializer):
@@ -198,16 +197,16 @@ class SectionHighlightSerializer(serializers.Serializer):
198197
content = serializers.SerializerMethodField()
199198

200199
def get_title(self, obj):
201-
return list(getattr(obj, 'sections.title', []))
200+
return list(getattr(obj, "sections.title", []))
202201

203202
def get_content(self, obj):
204-
return list(getattr(obj, 'sections.content', []))
203+
return list(getattr(obj, "sections.content", []))
205204

206205

207206
class SectionSearchSerializer(serializers.Serializer):
208207

209-
type = serializers.CharField(default='section', source=None, read_only=True)
208+
type = serializers.CharField(default="section", source=None, read_only=True)
210209
id = serializers.CharField()
211210
title = serializers.CharField()
212211
content = serializers.CharField()
213-
highlights = SectionHighlightSerializer(source='meta.highlight', default=dict)
212+
highlights = SectionHighlightSerializer(source="meta.highlight", default=dict)

readthedocs/search/api/v2/urls.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from django.urls import path
2+
3+
from readthedocs.search.api.v2.views import PageSearchAPIView
4+
5+
urlpatterns = [
6+
path("", PageSearchAPIView.as_view(), name="search_api"),
7+
]

0 commit comments

Comments
 (0)