From 70d780460cfc1b9932527977852db86aad35e5f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Wed, 3 Jan 2018 01:40:41 +0100 Subject: [PATCH 1/8] Search.get_page to start off pagination --- elasticsearch_dsl/search.py | 10 +++++++++ test_elasticsearch_dsl/test_pagination.py | 27 +++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 test_elasticsearch_dsl/test_pagination.py diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 94778dfa7..ee32be2fa 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -337,6 +337,16 @@ def __getitem__(self, n): s._extra['size'] = 1 return s + def get_page(self, page_no): + if page_no < 0: + raise ValueError("Search pagination does not support negative indexing.") + elif page_no == 0: + raise ValueError("Search pagination is 1-based.") + size = self._extra.get("size", 10) + s = self._clone() + s._extra["from"] = size * (page_no - 1) + return s.execute() + @classmethod def from_dict(cls, d): """ diff --git a/test_elasticsearch_dsl/test_pagination.py b/test_elasticsearch_dsl/test_pagination.py new file mode 100644 index 000000000..91c41030e --- /dev/null +++ b/test_elasticsearch_dsl/test_pagination.py @@ -0,0 +1,27 @@ +from elasticsearch_dsl.search import Search + +from pytest import raises + +class DummySearch(Search): + def __init__(self, *args, **kwargs): + super(DummySearch, self).__init__(*args, **kwargs) + self._executions = [] + + def execute(self, *args, **kwargs): + return self.to_dict() + +def test_pages_are_1_based(): + body = DummySearch().get_page(1) + assert body.get("size", 10) == 10 + assert body.get("from", 0) == 0 + +def test_pages_respect_page_size(): + body = DummySearch()[:6].get_page(2) + assert body["size"] == 6 + assert body["from"] == 6 + +def test_get_page_doesnt_allow_0_or_negative_pages(): + with raises(ValueError): + DummySearch().get_page(0) + with raises(ValueError): + DummySearch().get_page(-1) From 0b2e916506b9f95891509f44caaebf8075ec300a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Wed, 3 Jan 2018 01:55:06 +0100 Subject: [PATCH 2/8] Search.get_next_page --- elasticsearch_dsl/search.py | 7 +++++++ test_elasticsearch_dsl/test_pagination.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index ee32be2fa..41dda147b 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -347,6 +347,13 @@ def get_page(self, page_no): s._extra["from"] = size * (page_no - 1) return s.execute() + def get_next_page(self, last_hit, step=1): + size = self._extra.get("size", 10) + s = self._clone() + s._extra["from"] = size * (step - 1) + s._extra["search_after"] = last_hit + return s.execute() + @classmethod def from_dict(cls, d): """ diff --git a/test_elasticsearch_dsl/test_pagination.py b/test_elasticsearch_dsl/test_pagination.py index 91c41030e..cdad8c8bc 100644 --- a/test_elasticsearch_dsl/test_pagination.py +++ b/test_elasticsearch_dsl/test_pagination.py @@ -25,3 +25,15 @@ def test_get_page_doesnt_allow_0_or_negative_pages(): DummySearch().get_page(0) with raises(ValueError): DummySearch().get_page(-1) + +def test_next_page_respects_size(): + body = DummySearch()[123:124].get_next_page([1, 2]) + assert body["size"] == 1 + assert body["from"] == 0 + assert body["search_after"] == [1, 2] + +def test_next_page_can_skip_pages(): + body = DummySearch()[123:124].get_next_page([1, 2], 3) + assert body["size"] == 1 + assert body["from"] == 2 + assert body["search_after"] == [1, 2] From af28ae5e6f8b6cf7aac56ba4dc899be584ec1b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Wed, 3 Jan 2018 02:41:40 +0100 Subject: [PATCH 3/8] Added Search.get_previous_page --- elasticsearch_dsl/search.py | 31 ++++++++++++++++ test_elasticsearch_dsl/test_pagination.py | 44 +++++++++++++++++------ 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 41dda147b..28d64be73 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -264,6 +264,25 @@ def _clone(self): return s + + +def _reverse_sort_entry(sort_entry): + # "field" + if isinstance(sort_entry, string_types): + if sort_entry == '_score': + return {'_score': 'asc'} + return {sort_entry: 'desc'} + + f, sort_entry = sort_entry.copy().popitem() + # {"field": "asc/desc"} + if isinstance(sort_entry, string_types): + return {f: 'asc' if sort_entry == 'desc' else 'desc'} + + # {"field": {"order": "asc/desc"}} + sort_entry = sort_entry.copy() + sort_entry['order'] = 'asc' if sort_entry['order'] == 'desc' else 'desc' + return {f: sort_entry} + class Search(Request): query = ProxyDescriptor('query') post_filter = ProxyDescriptor('post_filter') @@ -354,6 +373,18 @@ def get_next_page(self, last_hit, step=1): s._extra["search_after"] = last_hit return s.execute() + def get_previous_page(self, first_hit, step=1): + size = self._extra.get("size", 10) + s = self._clone() + s._extra["from"] = size * (step - 1) + s._extra["search_after"] = first_hit + # reverse the sort order + s._sort = [_reverse_sort_entry(s) for s in self._sort] + resp = s.execute() + # reverse the hits in the page + resp['hits']['hits'] = resp['hits']['hits'][::-1] + return resp + @classmethod def from_dict(cls, d): """ diff --git a/test_elasticsearch_dsl/test_pagination.py b/test_elasticsearch_dsl/test_pagination.py index cdad8c8bc..343df1699 100644 --- a/test_elasticsearch_dsl/test_pagination.py +++ b/test_elasticsearch_dsl/test_pagination.py @@ -8,17 +8,22 @@ def __init__(self, *args, **kwargs): self._executions = [] def execute(self, *args, **kwargs): - return self.to_dict() + return { + 'req': self.to_dict(), + 'hits': { + 'hits': list(range(self._extra.get('size', 10))) + } + } def test_pages_are_1_based(): body = DummySearch().get_page(1) - assert body.get("size", 10) == 10 - assert body.get("from", 0) == 0 + assert body['req'].get("size", 10) == 10 + assert body['req'].get("from", 0) == 0 def test_pages_respect_page_size(): body = DummySearch()[:6].get_page(2) - assert body["size"] == 6 - assert body["from"] == 6 + assert body['req']["size"] == 6 + assert body['req']["from"] == 6 def test_get_page_doesnt_allow_0_or_negative_pages(): with raises(ValueError): @@ -28,12 +33,29 @@ def test_get_page_doesnt_allow_0_or_negative_pages(): def test_next_page_respects_size(): body = DummySearch()[123:124].get_next_page([1, 2]) - assert body["size"] == 1 - assert body["from"] == 0 - assert body["search_after"] == [1, 2] + assert body['req']["size"] == 1 + assert body['req']["from"] == 0 + assert body['req']["search_after"] == [1, 2] def test_next_page_can_skip_pages(): body = DummySearch()[123:124].get_next_page([1, 2], 3) - assert body["size"] == 1 - assert body["from"] == 2 - assert body["search_after"] == [1, 2] + assert body['req']["size"] == 1 + assert body['req']["from"] == 2 + assert body['req']["search_after"] == [1, 2] + +def test_previous_page_reverses_sort_and_hits(): + body = DummySearch()[:5].sort( + '_score', + '-publish_date', + {'author.keyword': 'asc'} + ).get_previous_page([1, 2]) + + assert body['req']["size"] == 5 + assert body['req']["from"] == 0 + assert body['req']["search_after"] == [1, 2] + assert body['req']['sort'] == [ + {'_score': 'asc'}, + {"publish_date": {"order": "asc"}}, + {'author.keyword': 'desc'} + ] + assert body['hits']['hits'] == [4, 3, 2, 1, 0] From 138c3df0e8174ca2176565aa3b03628769c5c92b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Wed, 3 Jan 2018 02:48:17 +0100 Subject: [PATCH 4/8] avoid name clash in python 2 --- elasticsearch_dsl/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 28d64be73..489d0a32e 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -379,7 +379,7 @@ def get_previous_page(self, first_hit, step=1): s._extra["from"] = size * (step - 1) s._extra["search_after"] = first_hit # reverse the sort order - s._sort = [_reverse_sort_entry(s) for s in self._sort] + s._sort = [_reverse_sort_entry(se) for se in self._sort] resp = s.execute() # reverse the hits in the page resp['hits']['hits'] = resp['hits']['hits'][::-1] From f850b57a02e2924b3d332595f28e3ea5594dd1b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Thu, 4 Jan 2018 03:49:37 +0100 Subject: [PATCH 5/8] integration tests for pagination --- elasticsearch_dsl/search.py | 14 ++--- .../test_integration/test_pagination.py | 62 +++++++++++++++++++ test_elasticsearch_dsl/test_pagination.py | 15 ++--- 3 files changed, 74 insertions(+), 17 deletions(-) create mode 100644 test_elasticsearch_dsl/test_integration/test_pagination.py diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 489d0a32e..a70fa579c 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -366,23 +366,23 @@ def get_page(self, page_no): s._extra["from"] = size * (page_no - 1) return s.execute() - def get_next_page(self, last_hit, step=1): + def get_next_page(self, last_hit): size = self._extra.get("size", 10) s = self._clone() - s._extra["from"] = size * (step - 1) - s._extra["search_after"] = last_hit + s._extra["from"] = 0 + s._extra["search_after"] = list(last_hit) return s.execute() - def get_previous_page(self, first_hit, step=1): + def get_previous_page(self, first_hit): size = self._extra.get("size", 10) s = self._clone() - s._extra["from"] = size * (step - 1) - s._extra["search_after"] = first_hit + s._extra["from"] = 0 + s._extra["search_after"] = list(first_hit) # reverse the sort order s._sort = [_reverse_sort_entry(se) for se in self._sort] resp = s.execute() # reverse the hits in the page - resp['hits']['hits'] = resp['hits']['hits'][::-1] + resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1] return resp @classmethod diff --git a/test_elasticsearch_dsl/test_integration/test_pagination.py b/test_elasticsearch_dsl/test_integration/test_pagination.py new file mode 100644 index 000000000..24c032bfa --- /dev/null +++ b/test_elasticsearch_dsl/test_integration/test_pagination.py @@ -0,0 +1,62 @@ +from random import shuffle + +from elasticsearch_dsl import Search + +from pytest import fixture + +@fixture(scope="session") +def sorted_search(data_client): + return Search(index='flat-git').sort( + 'stats.lines', + '-stats.files', + {'_id': {'order': 'desc'}}) + +@fixture(scope="session") +def commits(sorted_search): + """ + List of all commits as sorted by ``sorted_search`` + """ + return list(sorted_search.params(preserve_order=True).scan()) + +def get_commit_page(commits, page, size=10): + """ + Get appropriate page using python slicing for control. + """ + start = (page - 1) * size + return commits[start:start + size] + +def test_get_page(sorted_search, commits): + # set page size to 2 + s = sorted_search[:2] + + # process pages in random order to avoid possible side effects + pages = list(range(1, 26)) + shuffle(pages) + + for page_no in pages: + page = get_commit_page(commits, page_no, 2) + assert page == s.get_page(page_no).hits + + # non existing page returns empty + assert len(s.get_page(27).hits) == 0 + assert len(s.get_page(42).hits) == 0 + +def test_get_next_page(sorted_search, commits): + # manually retrieve page 4 of size 5 + page4 = sorted_search[15:20].execute() + assert page4.hits == get_commit_page(commits, 4, 5) + + # set page size to 5 + s = sorted_search[:5] + page5 = s.get_next_page(page4.hits[-1].meta.sort) + assert page5.hits == get_commit_page(commits, 5, 5) + +def test_get_previous_page(sorted_search, commits): + # manually retrieve page 4 of size 5 + page4 = sorted_search[15:20].execute() + assert page4.hits == get_commit_page(commits, 4, 5) + + # set page size to 5 + s = sorted_search[:5] + page3 = s.get_previous_page(page4.hits[0].meta.sort) + assert page3.hits == get_commit_page(commits, 3, 5) diff --git a/test_elasticsearch_dsl/test_pagination.py b/test_elasticsearch_dsl/test_pagination.py index 343df1699..663a9cd10 100644 --- a/test_elasticsearch_dsl/test_pagination.py +++ b/test_elasticsearch_dsl/test_pagination.py @@ -1,3 +1,4 @@ +from elasticsearch_dsl.utils import AttrDict from elasticsearch_dsl.search import Search from pytest import raises @@ -8,17 +9,17 @@ def __init__(self, *args, **kwargs): self._executions = [] def execute(self, *args, **kwargs): - return { + return AttrDict({ 'req': self.to_dict(), 'hits': { 'hits': list(range(self._extra.get('size', 10))) } - } + }) def test_pages_are_1_based(): body = DummySearch().get_page(1) - assert body['req'].get("size", 10) == 10 - assert body['req'].get("from", 0) == 0 + assert "size" not in body['req'] + assert body['req']["from"] == 0 def test_pages_respect_page_size(): body = DummySearch()[:6].get_page(2) @@ -37,12 +38,6 @@ def test_next_page_respects_size(): assert body['req']["from"] == 0 assert body['req']["search_after"] == [1, 2] -def test_next_page_can_skip_pages(): - body = DummySearch()[123:124].get_next_page([1, 2], 3) - assert body['req']["size"] == 1 - assert body['req']["from"] == 2 - assert body['req']["search_after"] == [1, 2] - def test_previous_page_reverses_sort_and_hits(): body = DummySearch()[:5].sort( '_score', From c28ca488a70088a6d959e3f8edf1b806557e67cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Thu, 4 Jan 2018 17:51:00 +0100 Subject: [PATCH 6/8] Allow negative pages (from back) for get_page --- elasticsearch_dsl/search.py | 21 ++++++++++++------- .../test_integration/test_pagination.py | 18 +++++++++++++++- test_elasticsearch_dsl/test_pagination.py | 4 +--- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index a70fa579c..4f1e4ca52 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -357,24 +357,31 @@ def __getitem__(self, n): return s def get_page(self, page_no): - if page_no < 0: - raise ValueError("Search pagination does not support negative indexing.") - elif page_no == 0: + if page_no == 0: raise ValueError("Search pagination is 1-based.") size = self._extra.get("size", 10) s = self._clone() - s._extra["from"] = size * (page_no - 1) - return s.execute() + s._extra["from"] = size * (abs(page_no) - 1) + + # reverse the sort order when pagination from back + if page_no < 0: + s._sort = [_reverse_sort_entry(se) for se in self._sort] + + resp = s.execute() + + # reverse the hits in the page when pagination from back + if page_no < 0: + resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1] + + return resp def get_next_page(self, last_hit): - size = self._extra.get("size", 10) s = self._clone() s._extra["from"] = 0 s._extra["search_after"] = list(last_hit) return s.execute() def get_previous_page(self, first_hit): - size = self._extra.get("size", 10) s = self._clone() s._extra["from"] = 0 s._extra["search_after"] = list(first_hit) diff --git a/test_elasticsearch_dsl/test_integration/test_pagination.py b/test_elasticsearch_dsl/test_integration/test_pagination.py index 24c032bfa..d96e0834a 100644 --- a/test_elasticsearch_dsl/test_integration/test_pagination.py +++ b/test_elasticsearch_dsl/test_integration/test_pagination.py @@ -30,7 +30,7 @@ def test_get_page(sorted_search, commits): s = sorted_search[:2] # process pages in random order to avoid possible side effects - pages = list(range(1, 26)) + pages = list(range(1, 27)) shuffle(pages) for page_no in pages: @@ -41,6 +41,22 @@ def test_get_page(sorted_search, commits): assert len(s.get_page(27).hits) == 0 assert len(s.get_page(42).hits) == 0 +def test_get_negative_page(sorted_search, commits): + # set page size to 2 + s = sorted_search[:2] + + # process pages in random order to avoid possible side effects + pages = list(range(-1, -27, -1)) + shuffle(pages) + + for page_no in pages: + page = get_commit_page(commits, 27 + page_no, 2) + assert page == s.get_page(page_no).hits + + # non existing page returns empty + assert len(s.get_page(-27).hits) == 0 + assert len(s.get_page(-42).hits) == 0 + def test_get_next_page(sorted_search, commits): # manually retrieve page 4 of size 5 page4 = sorted_search[15:20].execute() diff --git a/test_elasticsearch_dsl/test_pagination.py b/test_elasticsearch_dsl/test_pagination.py index 663a9cd10..7b5d5309a 100644 --- a/test_elasticsearch_dsl/test_pagination.py +++ b/test_elasticsearch_dsl/test_pagination.py @@ -26,11 +26,9 @@ def test_pages_respect_page_size(): assert body['req']["size"] == 6 assert body['req']["from"] == 6 -def test_get_page_doesnt_allow_0_or_negative_pages(): +def test_get_page_doesnt_allow_0(): with raises(ValueError): DummySearch().get_page(0) - with raises(ValueError): - DummySearch().get_page(-1) def test_next_page_respects_size(): body = DummySearch()[123:124].get_next_page([1, 2]) From 7788da0113551b6f74a32325e2eab0a64c6dfe09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Thu, 4 Jan 2018 17:55:12 +0100 Subject: [PATCH 7/8] Search.get_page_count --- elasticsearch_dsl/search.py | 9 +++++++++ .../test_integration/test_pagination.py | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 4f1e4ca52..a61aba0d8 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -356,6 +356,15 @@ def __getitem__(self, n): s._extra['size'] = 1 return s + def get_page_count(self): + size = self._extra.get("size", 10) + if size == 0: + return 0 + pages, docs_left = divmod(self.count(), size) + if docs_left: + pages += 1 + return pages + def get_page(self, page_no): if page_no == 0: raise ValueError("Search pagination is 1-based.") diff --git a/test_elasticsearch_dsl/test_integration/test_pagination.py b/test_elasticsearch_dsl/test_integration/test_pagination.py index d96e0834a..ddaabe7aa 100644 --- a/test_elasticsearch_dsl/test_integration/test_pagination.py +++ b/test_elasticsearch_dsl/test_integration/test_pagination.py @@ -25,6 +25,13 @@ def get_commit_page(commits, page, size=10): start = (page - 1) * size return commits[start:start + size] +def test_get_page_count(sorted_search): + assert sorted_search.get_page_count() == 6 + assert sorted_search[:2].get_page_count() == 26 + assert sorted_search[:1].get_page_count() == 52 + assert sorted_search[:100].get_page_count() == 1 + assert sorted_search[:0].get_page_count() == 0 + def test_get_page(sorted_search, commits): # set page size to 2 s = sorted_search[:2] From eefdc2d3e12e8b907caa3bf1cac67fc2790961bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20Kr=C3=A1l?= Date: Thu, 4 Jan 2018 18:59:03 +0100 Subject: [PATCH 8/8] allow size to be overridden --- elasticsearch_dsl/search.py | 17 +++++++++++------ test_elasticsearch_dsl/test_pagination.py | 7 ++++++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index a61aba0d8..c4943aa00 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -356,8 +356,8 @@ def __getitem__(self, n): s._extra['size'] = 1 return s - def get_page_count(self): - size = self._extra.get("size", 10) + def get_page_count(self, size=None): + size = size if size is not None else self._extra.get("size", 10) if size == 0: return 0 pages, docs_left = divmod(self.count(), size) @@ -365,12 +365,13 @@ def get_page_count(self): pages += 1 return pages - def get_page(self, page_no): + def get_page(self, page_no, size=None): if page_no == 0: raise ValueError("Search pagination is 1-based.") - size = self._extra.get("size", 10) + size = size if size is not None else self._extra.get("size", 10) s = self._clone() s._extra["from"] = size * (abs(page_no) - 1) + s._extra["size"] = size # reverse the sort order when pagination from back if page_no < 0: @@ -384,15 +385,19 @@ def get_page(self, page_no): return resp - def get_next_page(self, last_hit): + def get_next_page(self, last_hit, size=None): + size = size if size is not None else self._extra.get("size", 10) s = self._clone() s._extra["from"] = 0 + s._extra["size"] = size s._extra["search_after"] = list(last_hit) return s.execute() - def get_previous_page(self, first_hit): + def get_previous_page(self, first_hit, size=None): + size = size if size is not None else self._extra.get("size", 10) s = self._clone() s._extra["from"] = 0 + s._extra["size"] = size s._extra["search_after"] = list(first_hit) # reverse the sort order s._sort = [_reverse_sort_entry(se) for se in self._sort] diff --git a/test_elasticsearch_dsl/test_pagination.py b/test_elasticsearch_dsl/test_pagination.py index 7b5d5309a..10bb56979 100644 --- a/test_elasticsearch_dsl/test_pagination.py +++ b/test_elasticsearch_dsl/test_pagination.py @@ -18,7 +18,7 @@ def execute(self, *args, **kwargs): def test_pages_are_1_based(): body = DummySearch().get_page(1) - assert "size" not in body['req'] + assert body['req']["size"] == 10 assert body['req']["from"] == 0 def test_pages_respect_page_size(): @@ -26,6 +26,11 @@ def test_pages_respect_page_size(): assert body['req']["size"] == 6 assert body['req']["from"] == 6 +def test_page_size_can_be_overwritten(): + body = DummySearch()[:6].get_page(2, size=10) + assert body['req']["size"] == 10 + assert body['req']["from"] == 10 + def test_get_page_doesnt_allow_0(): with raises(ValueError): DummySearch().get_page(0)