Skip to content

Commit 7c4f625

Browse files
committed
Merge remote-tracking branch 'upstream/master' into ea-take
2 parents 9a6c7d4 + 6cacdde commit 7c4f625

File tree

6 files changed

+83
-32
lines changed

6 files changed

+83
-32
lines changed

pandas/core/arrays/base.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,9 @@ class ExtensionArray(object):
3838
* copy
3939
* _concat_same_type
4040
41-
Some additional methods are available to satisfy pandas' internal, private
42-
block API:
41+
An additional method is available to satisfy pandas' internal,
42+
private block API.
4343
44-
* _can_hold_na
4544
* _formatting_values
4645
4746
Some methods require casting the ExtensionArray to an ndarray of Python
@@ -399,7 +398,8 @@ def _values_for_factorize(self):
399398
Returns
400399
-------
401400
values : ndarray
402-
An array suitable for factoraization. This should maintain order
401+
402+
An array suitable for factorization. This should maintain order
403403
and be a supported dtype (Float64, Int64, UInt64, String, Object).
404404
By default, the extension array is cast to object dtype.
405405
na_value : object
@@ -422,7 +422,7 @@ def factorize(self, na_sentinel=-1):
422422
Returns
423423
-------
424424
labels : ndarray
425-
An interger NumPy array that's an indexer into the original
425+
An integer NumPy array that's an indexer into the original
426426
ExtensionArray.
427427
uniques : ExtensionArray
428428
An ExtensionArray containing the unique values of `self`.
@@ -588,16 +588,12 @@ def _concat_same_type(cls, to_concat):
588588
"""
589589
raise AbstractMethodError(cls)
590590

591-
@property
592-
def _can_hold_na(self):
593-
# type: () -> bool
594-
"""Whether your array can hold missing values. True by default.
595-
596-
Notes
597-
-----
598-
Setting this to false will optimize some operations like fillna.
599-
"""
600-
return True
591+
# The _can_hold_na attribute is set to True so that pandas internals
592+
# will use the ExtensionDtype.na_value as the NA value in operations
593+
# such as take(), reindex(), shift(), etc. In addition, those results
594+
# will then be of the ExtensionArray subclass rather than an array
595+
# of objects
596+
_can_hold_na = True
601597

602598
@property
603599
def _ndarray_values(self):

pandas/tests/extension/base/interface.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def test_ndim(self, data):
2121
assert data.ndim == 1
2222

2323
def test_can_hold_na_valid(self, data):
24-
assert data._can_hold_na in {True, False}
24+
# GH-20761
25+
assert data._can_hold_na is True
2526

2627
def test_memory_usage(self, data):
2728
s = pd.Series(data)

pandas/tests/extension/base/missing.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@
99

1010
class BaseMissingTests(BaseExtensionTests):
1111
def test_isna(self, data_missing):
12-
if data_missing._can_hold_na:
13-
expected = np.array([True, False])
14-
else:
15-
expected = np.array([False, False])
12+
expected = np.array([True, False])
1613

1714
result = pd.isna(data_missing)
1815
tm.assert_numpy_array_equal(result, expected)

pandas/tests/extension/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def na_cmp():
5757
Should return a function of two arguments that returns
5858
True if both arguments are (scalar) NA for your type.
5959
60-
By default, uses ``operator.or``
60+
By default, uses ``operator.is_``
6161
"""
6262
return operator.is_
6363

pandas/tests/extension/json/array.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ def take(self, indexer, allow_fill=False, fill_value=None):
140140
def copy(self, deep=False):
141141
return type(self)(self.data[:])
142142

143+
def astype(self, dtype, copy=True):
144+
# NumPy has issues when all the dicts are the same length.
145+
# np.array([UserDict(...), UserDict(...)]) fails,
146+
# but np.array([{...}, {...}]) works, so cast.
147+
return np.array([dict(x) for x in self], dtype=dtype, copy=copy)
148+
143149
def unique(self):
144150
# Parent method doesn't work since np.array will try to infer
145151
# a 2-dim object.

pandas/tests/extension/json/test_json.py

Lines changed: 62 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import operator
2+
import collections
23

34
import pytest
45

5-
6+
import pandas as pd
7+
import pandas.util.testing as tm
68
from pandas.compat import PY2, PY36
79
from pandas.tests.extension import base
810

@@ -59,27 +61,76 @@ def data_for_grouping():
5961
])
6062

6163

62-
class TestDtype(base.BaseDtypeTests):
64+
class BaseJSON(object):
65+
# NumPy doesn't handle an array of equal-length UserDicts.
66+
# The default assert_series_equal eventually does a
67+
# Series.values, which raises. We work around it by
68+
# converting the UserDicts to dicts.
69+
def assert_series_equal(self, left, right, **kwargs):
70+
if left.dtype.name == 'json':
71+
assert left.dtype == right.dtype
72+
left = pd.Series(JSONArray(left.values.astype(object)),
73+
index=left.index, name=left.name)
74+
right = pd.Series(JSONArray(right.values.astype(object)),
75+
index=right.index, name=right.name)
76+
tm.assert_series_equal(left, right, **kwargs)
77+
78+
def assert_frame_equal(self, left, right, *args, **kwargs):
79+
tm.assert_index_equal(
80+
left.columns, right.columns,
81+
exact=kwargs.get('check_column_type', 'equiv'),
82+
check_names=kwargs.get('check_names', True),
83+
check_exact=kwargs.get('check_exact', False),
84+
check_categorical=kwargs.get('check_categorical', True),
85+
obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
86+
87+
jsons = (left.dtypes == 'json').index
88+
89+
for col in jsons:
90+
self.assert_series_equal(left[col], right[col],
91+
*args, **kwargs)
92+
93+
left = left.drop(columns=jsons)
94+
right = right.drop(columns=jsons)
95+
tm.assert_frame_equal(left, right, *args, **kwargs)
96+
97+
98+
class TestDtype(BaseJSON, base.BaseDtypeTests):
6399
pass
64100

65101

66-
class TestInterface(base.BaseInterfaceTests):
67-
pass
102+
class TestInterface(BaseJSON, base.BaseInterfaceTests):
103+
def test_custom_asserts(self):
104+
# This would always trigger the KeyError from trying to put
105+
# an array of equal-length UserDicts inside an ndarray.
106+
data = JSONArray([collections.UserDict({'a': 1}),
107+
collections.UserDict({'b': 2}),
108+
collections.UserDict({'c': 3})])
109+
a = pd.Series(data)
110+
self.assert_series_equal(a, a)
111+
self.assert_frame_equal(a.to_frame(), a.to_frame())
112+
113+
b = pd.Series(data.take([0, 0, 1]))
114+
with pytest.raises(AssertionError):
115+
self.assert_series_equal(a, b)
116+
117+
with pytest.raises(AssertionError):
118+
self.assert_frame_equal(a.to_frame(), b.to_frame())
68119

69120

70-
class TestConstructors(base.BaseConstructorsTests):
121+
class TestConstructors(BaseJSON, base.BaseConstructorsTests):
71122
pass
72123

73124

74-
class TestReshaping(base.BaseReshapingTests):
125+
class TestReshaping(BaseJSON, base.BaseReshapingTests):
75126
pass
76127

77128

78-
class TestGetitem(base.BaseGetitemTests):
129+
class TestGetitem(BaseJSON, base.BaseGetitemTests):
79130
pass
80131

81132

82-
class TestMissing(base.BaseMissingTests):
133+
class TestMissing(BaseJSON, base.BaseMissingTests):
83134
@pytest.mark.xfail(reason="Setting a dict as a scalar")
84135
def test_fillna_series(self):
85136
"""We treat dictionaries as a mapping in fillna, not a scalar."""
@@ -94,7 +145,7 @@ def test_fillna_frame(self):
94145
reason="Dictionary order unstable")
95146

96147

97-
class TestMethods(base.BaseMethodsTests):
148+
class TestMethods(BaseJSON, base.BaseMethodsTests):
98149
@unhashable
99150
def test_value_counts(self, all_data, dropna):
100151
pass
@@ -126,7 +177,7 @@ def test_sort_values_missing(self, data_missing_for_sorting, ascending):
126177
data_missing_for_sorting, ascending)
127178

128179

129-
class TestCasting(base.BaseCastingTests):
180+
class TestCasting(BaseJSON, base.BaseCastingTests):
130181
@pytest.mark.xfail
131182
def test_astype_str(self):
132183
"""This currently fails in NumPy on np.array(self, dtype=str) with
@@ -139,7 +190,7 @@ def test_astype_str(self):
139190
# internals has trouble setting sequences of values into scalar positions.
140191

141192

142-
class TestGroupby(base.BaseGroupbyTests):
193+
class TestGroupby(BaseJSON, base.BaseGroupbyTests):
143194

144195
@unhashable
145196
def test_groupby_extension_transform(self):

0 commit comments

Comments
 (0)