Skip to content

Commit 8a110b0

Browse files
Add index creation to aggregations example (elastic#1862)
* Add index creation to aggregations example * remove shard/replica restrictions from test datasets
1 parent 663d6f4 commit 8a110b0

File tree

4 files changed

+97
-88
lines changed

4 files changed

+97
-88
lines changed

examples/async/composite_agg.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
import os
2020
from typing import Any, AsyncIterator, Dict, List, Optional, Union
2121

22+
from elasticsearch.helpers import async_bulk
23+
2224
from elasticsearch_dsl import A, Agg, AsyncSearch, Response, async_connections
25+
from tests.test_integration.test_data import DATA, GIT_INDEX
2326

2427

2528
async def scan_aggs(
@@ -56,8 +59,17 @@ async def run_search(**kwargs: Any) -> Response:
5659

5760
async def main() -> None:
5861
# initiate the default connection to elasticsearch
59-
async_connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
62+
client = async_connections.create_connection(
63+
hosts=[os.environ["ELASTICSEARCH_URL"]]
64+
)
65+
66+
# create the index and populate it with some data
67+
# note that the dataset is imported from the library's test suite
68+
await client.indices.delete(index="git", ignore_unavailable=True)
69+
await client.indices.create(index="git", **GIT_INDEX)
70+
await async_bulk(client, DATA, raise_on_error=True, refresh=True)
6071

72+
# run some aggregations on the data
6173
async for b in scan_aggs(
6274
AsyncSearch(index="git"),
6375
{"files": A("terms", field="files")},

examples/composite_agg.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
import os
1919
from typing import Any, Dict, Iterator, List, Optional, Union
2020

21+
from elasticsearch.helpers import bulk
22+
2123
from elasticsearch_dsl import A, Agg, Response, Search, connections
24+
from tests.test_integration.test_data import DATA, GIT_INDEX
2225

2326

2427
def scan_aggs(
@@ -55,8 +58,15 @@ def run_search(**kwargs: Any) -> Response:
5558

5659
def main() -> None:
5760
# initiate the default connection to elasticsearch
58-
connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
61+
client = connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
62+
63+
# create the index and populate it with some data
64+
# note that the dataset is imported from the library's test suite
65+
client.indices.delete(index="git", ignore_unavailable=True)
66+
client.indices.create(index="git", **GIT_INDEX)
67+
bulk(client, DATA, raise_on_error=True, refresh=True)
5968

69+
# run some aggregations on the data
6070
for b in scan_aggs(
6171
Search(index="git"),
6272
{"files": A("terms", field="files")},

tests/test_integration/test_data.py

Lines changed: 72 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -19,99 +19,85 @@
1919

2020
from elasticsearch import Elasticsearch
2121

22+
user_mapping = {
23+
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
24+
}
2225

23-
def create_flat_git_index(client: Elasticsearch, index: str) -> None:
24-
# we will use user on several places
25-
user_mapping = {
26-
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
27-
}
28-
29-
client.indices.create(
30-
index=index,
31-
body={
32-
"settings": {
33-
# just one shard, no replicas for testing
34-
"number_of_shards": 1,
35-
"number_of_replicas": 0,
36-
# custom analyzer for analyzing file paths
37-
"analysis": {
38-
"analyzer": {
39-
"file_path": {
40-
"type": "custom",
41-
"tokenizer": "path_hierarchy",
42-
"filter": ["lowercase"],
43-
}
44-
}
45-
},
46-
},
47-
"mappings": {
48-
"properties": {
49-
"description": {"type": "text", "analyzer": "snowball"},
50-
"author": user_mapping,
51-
"authored_date": {"type": "date"},
52-
"committer": user_mapping,
53-
"committed_date": {"type": "date"},
54-
"parent_shas": {"type": "keyword"},
55-
"files": {
56-
"type": "text",
57-
"analyzer": "file_path",
58-
"fielddata": True,
59-
},
26+
FLAT_GIT_INDEX: Dict[str, Any] = {
27+
"settings": {
28+
# custom analyzer for analyzing file paths
29+
"analysis": {
30+
"analyzer": {
31+
"file_path": {
32+
"type": "custom",
33+
"tokenizer": "path_hierarchy",
34+
"filter": ["lowercase"],
6035
}
36+
}
37+
},
38+
},
39+
"mappings": {
40+
"properties": {
41+
"description": {"type": "text", "analyzer": "snowball"},
42+
"author": user_mapping,
43+
"authored_date": {"type": "date"},
44+
"committer": user_mapping,
45+
"committed_date": {"type": "date"},
46+
"parent_shas": {"type": "keyword"},
47+
"files": {
48+
"type": "text",
49+
"analyzer": "file_path",
50+
"fielddata": True,
6151
},
52+
}
53+
},
54+
}
55+
56+
GIT_INDEX: Dict[str, Any] = {
57+
"settings": {
58+
# custom analyzer for analyzing file paths
59+
"analysis": {
60+
"analyzer": {
61+
"file_path": {
62+
"type": "custom",
63+
"tokenizer": "path_hierarchy",
64+
"filter": ["lowercase"],
65+
}
66+
}
6267
},
63-
)
68+
},
69+
"mappings": {
70+
"properties": {
71+
# common fields
72+
"description": {"type": "text", "analyzer": "snowball"},
73+
"commit_repo": {"type": "join", "relations": {"repo": "commit"}},
74+
# COMMIT mappings
75+
"author": user_mapping,
76+
"authored_date": {"type": "date"},
77+
"committer": user_mapping,
78+
"committed_date": {"type": "date"},
79+
"parent_shas": {"type": "keyword"},
80+
"files": {
81+
"type": "text",
82+
"analyzer": "file_path",
83+
"fielddata": True,
84+
},
85+
# REPO mappings
86+
"is_public": {"type": "boolean"},
87+
"owner": user_mapping,
88+
"created_at": {"type": "date"},
89+
"tags": {"type": "keyword"},
90+
}
91+
},
92+
}
6493

6594

66-
def create_git_index(client: Elasticsearch, index: str) -> None:
67-
# we will use user on several places
68-
user_mapping = {
69-
"properties": {"name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}}
70-
}
95+
def create_flat_git_index(client: Elasticsearch, index: str) -> None:
96+
client.indices.create(index=index, body=FLAT_GIT_INDEX)
7197

72-
client.indices.create(
73-
index=index,
74-
body={
75-
"settings": {
76-
# just one shard, no replicas for testing
77-
"number_of_shards": 1,
78-
"number_of_replicas": 0,
79-
# custom analyzer for analyzing file paths
80-
"analysis": {
81-
"analyzer": {
82-
"file_path": {
83-
"type": "custom",
84-
"tokenizer": "path_hierarchy",
85-
"filter": ["lowercase"],
86-
}
87-
}
88-
},
89-
},
90-
"mappings": {
91-
"properties": {
92-
# common fields
93-
"description": {"type": "text", "analyzer": "snowball"},
94-
"commit_repo": {"type": "join", "relations": {"repo": "commit"}},
95-
# COMMIT mappings
96-
"author": user_mapping,
97-
"authored_date": {"type": "date"},
98-
"committer": user_mapping,
99-
"committed_date": {"type": "date"},
100-
"parent_shas": {"type": "keyword"},
101-
"files": {
102-
"type": "text",
103-
"analyzer": "file_path",
104-
"fielddata": True,
105-
},
106-
# REPO mappings
107-
"is_public": {"type": "boolean"},
108-
"owner": user_mapping,
109-
"created_at": {"type": "date"},
110-
"tags": {"type": "keyword"},
111-
}
112-
},
113-
},
114-
)
98+
99+
def create_git_index(client: Elasticsearch, index: str) -> None:
100+
client.indices.create(index=index, body=GIT_INDEX)
115101

116102

117103
DATA = [

utils/run-unasync.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def main(check=False):
6464
"async_connections": "connections",
6565
"async_scan": "scan",
6666
"async_simulate": "simulate",
67+
"async_bulk": "bulk",
6768
"async_mock_client": "mock_client",
6869
"async_client": "client",
6970
"async_data_client": "data_client",

0 commit comments

Comments
 (0)