Skip to content

Commit 69f5ca2

Browse files
committed
Fix doc_type names and document inheritance with Index
1 parent b578c95 commit 69f5ca2

File tree

6 files changed

+123
-39
lines changed

6 files changed

+123
-39
lines changed

Changelog.rst

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Changelog
66
6.3.0 (dev)
77
-----------
88

9+
* Fixed logic around defining a different ``doc_type`` name.
910
* Added ``retry_on_conflict`` parameter to ``Document.update``.
1011

1112
6.2.1 (2018-07-03)

docs/persistence.rst

+33
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,39 @@ the index, its name, settings and other attributes:
334334
``aliases``
335335
dictionary with any aliases definitions
336336

337+
Document Inheritance
338+
~~~~~~~~~~~~~~~~~~~~
339+
340+
You can use standard Python inheritance to extend models, this can be useful in
341+
a few scenarios. For example if you want to have a ``BaseDocument`` defining some common fields that several different ``Document`` classes should share:
342+
343+
.. code:: python
344+
345+
class User(InnerDoc):
346+
username = Text(fields={'keyword': Keyword()})
347+
email = Text()
348+
349+
class BaseDocument(Document):
350+
created_by = Object(User)
351+
created_date = Date()
352+
last_updated = Date()
353+
354+
def save(**kwargs):
355+
if not self.created_date:
356+
self.created_date = datetime.now()
357+
self.last_updated = datetime.now()
358+
return super(BaseDocument, self).save(**kwargs)
359+
360+
class BlogPost(BaseDocument):
361+
class Index:
362+
name = 'blog'
363+
364+
Another use case would be using the `join type
365+
<https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html>`_
366+
to have multiple different entities in a single index. You can see an `example
367+
<https://github.com/elastic/elasticsearch-dsl-py/blob/master/examples/parent_child.py>`_
368+
of this approach.
369+
337370
.. _index:
338371

339372
Index

elasticsearch_dsl/document.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from .search import Search
1111
from .connections import connections
1212
from .exceptions import ValidationException, IllegalOperation
13-
from .index import Index, DEFAULT_INDEX
13+
from .index import Index
1414

1515

1616
class MetaField(object):
@@ -25,24 +25,30 @@ def __new__(cls, name, bases, attrs):
2525
return super(DocumentMeta, cls).__new__(cls, name, bases, attrs)
2626

2727
class IndexMeta(DocumentMeta):
28+
# global flag to guard us from associating an Index with the base Document
29+
# class, only user defined subclasses should have an _index attr
30+
_document_initialized = False
31+
2832
def __new__(cls, name, bases, attrs):
29-
index_opts = attrs.pop('Index', None)
3033
new_cls = super(IndexMeta, cls).__new__(cls, name, bases, attrs)
31-
new_cls._index = cls.construct_index(index_opts, bases)
32-
new_cls._index.document(new_cls)
34+
if cls._document_initialized:
35+
index_opts = attrs.pop('Index', None)
36+
new_cls._index = cls.construct_index(index_opts, bases)
37+
new_cls._index.document(new_cls)
38+
cls._document_initialized = True
3339
return new_cls
3440

3541
@classmethod
3642
def construct_index(cls, opts, bases):
3743
if opts is None:
3844
for b in bases:
39-
if getattr(b, '_index', DEFAULT_INDEX) is not DEFAULT_INDEX:
45+
if hasattr(b, '_index'):
4046
return b._index
41-
return DEFAULT_INDEX
47+
# create an all-matching index pattern
48+
return Index('*')
4249

4350
i = Index(
4451
getattr(opts, 'name', '*'),
45-
doc_type=getattr(opts, 'doc_type', 'doc'),
4652
using=getattr(opts, 'using', 'default')
4753
)
4854
i.settings(**getattr(opts, 'settings', {}))

elasticsearch_dsl/index.py

+26-18
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
from .exceptions import IllegalOperation
44
from .mapping import Mapping
55

6+
DEFAULT_DOC_TYPE = 'doc'
7+
68
class IndexTemplate(object):
79
def __init__(self, name, template, index=None, **kwargs):
810
if index is None:
@@ -28,7 +30,7 @@ def save(self, using=None):
2830
es.indices.put_template(name=self._template_name, body=self.to_dict())
2931

3032
class Index(object):
31-
def __init__(self, name, doc_type='doc', using='default'):
33+
def __init__(self, name, doc_type=DEFAULT_DOC_TYPE, using='default'):
3234
"""
3335
:arg name: name of the index
3436
:arg using: connection alias to use, defaults to ``'default'``
@@ -39,7 +41,14 @@ def __init__(self, name, doc_type='doc', using='default'):
3941
self._settings = {}
4042
self._aliases = {}
4143
self._analysis = {}
42-
self._mapping = Mapping(doc_type)
44+
self._mapping = None
45+
if doc_type is not DEFAULT_DOC_TYPE:
46+
self._mapping = Mapping(doc_type)
47+
48+
def get_or_create_mapping(self, doc_type=DEFAULT_DOC_TYPE):
49+
if self._mapping is None:
50+
self._mapping = Mapping(doc_type)
51+
return self._mapping
4352

4453
def as_template(self, template_name, pattern=None):
4554
# TODO: should we allow pattern to be a top-level arg?
@@ -48,10 +57,10 @@ def as_template(self, template_name, pattern=None):
4857
return IndexTemplate(template_name, pattern or self._name, index=self)
4958

5059
def resolve_field(self, field_path):
51-
return self._mapping.resolve_field(field_path)
60+
return self.get_or_create_mapping().resolve_field(field_path)
5261

5362
def load_mappings(self, using=None):
54-
self._mapping.update_from_es(self._name, using=using or self._using)
63+
self.get_or_create_mapping().update_from_es(self._name, using=using or self._using)
5564

5665
def clone(self, name=None, doc_type=None, using=None):
5766
"""
@@ -68,14 +77,18 @@ def clone(self, name=None, doc_type=None, using=None):
6877
:arg name: name of the index
6978
:arg using: connection alias to use, defaults to ``'default'``
7079
"""
80+
doc_type = doc_type or (
81+
DEFAULT_DOC_TYPE if self._mapping is None else self._mapping.doc_type
82+
)
7183
i = Index(name or self._name,
72-
doc_type=doc_type or self._mapping.doc_type,
84+
doc_type=doc_type,
7385
using=using or self._using)
7486
i._settings = self._settings.copy()
7587
i._aliases = self._aliases.copy()
7688
i._analysis = self._analysis.copy()
7789
i._doc_types = self._doc_types[:]
78-
i._mapping = self._mapping._clone()
90+
if self._mapping is not None:
91+
i._mapping = self._mapping._clone()
7992
return i
8093

8194
def _get_connection(self, using=None):
@@ -89,11 +102,11 @@ def mapping(self, mapping):
89102
This means that, when this index is created, it will contain the
90103
mappings for the document type defined by those mappings.
91104
"""
92-
if mapping.doc_type != self._mapping.doc_type:
105+
if self._mapping is not None and mapping.doc_type != self._mapping.doc_type:
93106
raise IllegalOperation(
94107
'Index object cannot have multiple types, %s already set, '
95108
'trying to assign %s.' % (self._mapping.doc_type, mapping.doc_type))
96-
self._mapping.update(mapping)
109+
self.get_or_create_mapping(mapping.doc_type).update(mapping)
97110

98111
def document(self, document):
99112
"""
@@ -117,17 +130,14 @@ class Post(Document):
117130
s = i.search()
118131
"""
119132
name = document._doc_type.name
120-
if name != self._mapping.doc_type:
133+
if self._mapping is not None and name != self._mapping.doc_type:
121134
raise IllegalOperation(
122135
'Index object cannot have multiple types, %s already set, '
123136
'trying to assign %s.' % (self._mapping.doc_type, name))
124137
self._doc_types.append(document)
125138
# TODO: do this at save time to allow Document to be modified after
126139
# creation?
127-
self._mapping.update(document._doc_type.mapping)
128-
129-
if document._index is DEFAULT_INDEX:
130-
document._index = self
140+
self.get_or_create_mapping(document._doc_type.name).update(document._doc_type.mapping)
131141
return document
132142
doc_type = document
133143

@@ -188,9 +198,9 @@ def to_dict(self):
188198
out['settings'] = self._settings
189199
if self._aliases:
190200
out['aliases'] = self._aliases
191-
mappings = self._mapping.to_dict()
192-
analysis = self._mapping._collect_analysis()
193-
if mappings[self._mapping.doc_type]:
201+
mappings = self._mapping.to_dict() if self._mapping else None
202+
analysis = self._mapping._collect_analysis() if self._mapping else {}
203+
if mappings and mappings[self._mapping.doc_type]:
194204
out['mappings'] = mappings
195205
if analysis or self._analysis:
196206
for key in self._analysis:
@@ -548,5 +558,3 @@ def shrink(self, using=None, **kwargs):
548558
``Elasticsearch.indices.shrink`` unchanged.
549559
"""
550560
return self._get_connection(using).indices.shrink(index=self._name, **kwargs)
551-
552-
DEFAULT_INDEX = Index('*')

elasticsearch_dsl/utils.py

+38-9
Original file line numberDiff line numberDiff line change
@@ -396,16 +396,24 @@ def __getattr__(self, name):
396396
return value
397397
raise
398398

399+
def __get_field(self, name):
400+
try:
401+
return self._doc_type.mapping[name]
402+
except KeyError:
403+
# fallback to fields on the Index
404+
if hasattr(self, '_index') and self._index._mapping:
405+
try:
406+
return self._index._mapping[name]
407+
except KeyError:
408+
pass
409+
399410
def to_dict(self, skip_empty=True):
400411
out = {}
401412
for k, v in iteritems(self._d_):
402-
try:
403-
f = self._doc_type.mapping[k]
404-
except KeyError:
405-
pass
406-
else:
407-
if f._coerce:
408-
v = f.serialize(v)
413+
# if this is a mapped field,
414+
f = self.__get_field(k)
415+
if f and f._coerce:
416+
v = f.serialize(v)
409417

410418
# if someone assigned AttrList, unwrap it
411419
if isinstance(v, AttrList):
@@ -420,11 +428,32 @@ def to_dict(self, skip_empty=True):
420428
out[k] = v
421429
return out
422430

423-
def clean_fields(self):
424-
errors = {}
431+
def __list_fields(self):
432+
"""
433+
Get all the fields defined for our class, if we have an Index, try
434+
looking at the index mappings as well, mark the fields from Index as
435+
optional.
436+
"""
425437
for name in self._doc_type.mapping:
426438
field = self._doc_type.mapping[name]
439+
yield name, field, False
440+
441+
if hasattr(self, '_index'):
442+
if not self._index._mapping:
443+
return
444+
for name in self._index._mapping:
445+
# don't return fields that are in _doc_type
446+
if name in self._doc_type.mapping:
447+
continue
448+
field = self._index._mapping[name]
449+
yield name, field, True
450+
451+
def clean_fields(self):
452+
errors = {}
453+
for name, field, optional in self.__list_fields():
427454
data = self._d_.get(name, None)
455+
if data is None and optional:
456+
continue
428457
try:
429458
# save the cleaned value
430459
data = field.clean(data)

test_elasticsearch_dsl/test_document.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,18 @@ def test_document_can_redefine_doc_type():
7272
class D(document.Document):
7373
class Meta:
7474
doc_type = 'not-doc'
75-
class Index:
76-
doc_type = 'not-doc'
75+
assert D._index._mapping.doc_type == 'not-doc'
76+
77+
def test_document_cannot_specify_different_doc_type_if_index_defined():
78+
# this will initiate ._index with doc_type = 'doc'
79+
class C(document.Document):
80+
pass
81+
82+
with raises(IllegalOperation):
83+
class D(C):
84+
class Meta:
85+
doc_type = 'not-doc'
86+
7787

7888
def test_ip_address_serializes_properly():
7989
host = Host(ip=ipaddress.IPv4Address(u'10.0.0.1'))
@@ -459,9 +469,6 @@ def test_index_inheritance():
459469
assert issubclass(MyMultiSubDoc, document.Document)
460470
assert hasattr(MyMultiSubDoc, '_doc_type')
461471
assert hasattr(MyMultiSubDoc, '_index')
462-
# index and using should be
463-
assert MyMultiSubDoc._index._name == MySubDoc._index._name
464-
assert MyMultiSubDoc._index._using == MySubDoc._index._using
465472
assert {
466473
'doc': {
467474
'properties': {

0 commit comments

Comments
 (0)