Skip to content

Commit 125ca0b

Browse files
committed
Simplify
Upcasting is still broken
1 parent c721915 commit 125ca0b

File tree

11 files changed

+88
-77
lines changed

11 files changed

+88
-77
lines changed

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1510,7 +1510,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
15101510
# TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs
15111511
# dispatch to internal type takes
15121512
if is_extension_array_dtype(arr):
1513-
return arr.take(indexer, fill_value=fill_value)
1513+
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
15141514
elif is_datetimetz(arr):
15151515
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
15161516
elif is_interval_dtype(arr):

pandas/core/arrays/base.py

+56-63
Original file line numberDiff line numberDiff line change
@@ -5,71 +5,14 @@
55
This is an experimental API and subject to breaking changes
66
without warning.
77
"""
8-
import textwrap
9-
108
import numpy as np
119

1210
from pandas.errors import AbstractMethodError
1311
from pandas.compat.numpy import function as nv
14-
from pandas.util._decorators import Appender, Substitution
1512

1613
_not_implemented_message = "{} does not implement {}."
1714

1815

19-
_take_docstring = textwrap.dedent("""\
20-
Take elements from an array.
21-
22-
Parameters
23-
----------
24-
%(arr)s\
25-
indexer : sequence of integers
26-
Indices to be taken. See Notes for how negative indicies
27-
are handled.
28-
fill_value : any, optional
29-
Fill value to use for NA-indicies. This has a few behaviors.
30-
31-
* fill_value is not specified : triggers NumPy's semantics
32-
where negative values in `indexer` mean slices from the end.
33-
* fill_value is NA : Fill positions where `indexer` is ``-1``
34-
with ``self.dtype.na_value``. Anything considered NA by
35-
:func:`pandas.isna` will result in ``self.dtype.na_value``
36-
being used to fill.
37-
* fill_value is not NA : Fill positions where `indexer` is ``-1``
38-
with `fill_value`.
39-
40-
Returns
41-
-------
42-
ExtensionArray
43-
44-
Raises
45-
------
46-
IndexError
47-
When the indexer is out of bounds for the array.
48-
ValueError
49-
When the indexer contains negative values other than ``-1``
50-
and `fill_value` is specified.
51-
52-
Notes
53-
-----
54-
The meaning of negative values in `indexer` depends on the
55-
`fill_value` argument. By default, we follow the behavior
56-
:meth:`numpy.take` of where negative indices indicate slices
57-
from the end.
58-
59-
When `fill_value` is specified, we follow pandas semantics of ``-1``
60-
indicating a missing value. In this case, positions where `indexer`
61-
is ``-1`` will be filled with `fill_value` or the default NA value
62-
for this type.
63-
64-
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
65-
``iloc``, when the indexer is a sequence of values. Additionally,
66-
it's called by :meth:`Series.reindex` with a `fill_value`.
67-
68-
See Also
69-
--------
70-
numpy.take""")
71-
72-
7316
class ExtensionArray(object):
7417
"""Abstract base class for custom 1-D array types.
7518
@@ -532,15 +475,66 @@ def _values_for_take(self):
532475
"""
533476
return self.astype(object)
534477

535-
@Substitution(arr='')
536-
@Appender(_take_docstring)
537478
def take(self, indexer, fill_value=None, allow_fill=None):
538-
# type: (Sequence[int], Optional[Any]) -> ExtensionArray
539-
# assert fill_value is not np.nan
479+
# type: (Sequence[int], Optional[Any], Optional[bool]) -> ExtensionArray
480+
"""Take elements from an array.
481+
482+
Parameters
483+
----------
484+
indexer : sequence of integers
485+
Indices to be taken. See Notes for how negative indicies
486+
are handled.
487+
fill_value : any, optional
488+
Fill value to use for NA-indicies. This has a few behaviors.
489+
490+
* fill_value is not specified : triggers NumPy's semantics
491+
where negative values in `indexer` mean slices from the end.
492+
* fill_value is NA : Fill positions where `indexer` is ``-1``
493+
with ``self.dtype.na_value``. Anything considered NA by
494+
:func:`pandas.isna` will result in ``self.dtype.na_value``
495+
being used to fill.
496+
* fill_value is not NA : Fill positions where `indexer` is ``-1``
497+
with `fill_value`.
498+
499+
Returns
500+
-------
501+
ExtensionArray
502+
503+
Raises
504+
------
505+
IndexError
506+
When the indexer is out of bounds for the array.
507+
ValueError
508+
When the indexer contains negative values other than ``-1``
509+
and `fill_value` is specified.
510+
511+
Notes
512+
-----
513+
The meaning of negative values in `indexer` depends on the
514+
`fill_value` argument. By default, we follow the behavior
515+
:meth:`numpy.take` of where negative indices indicate slices
516+
from the end.
517+
518+
When `fill_value` is specified, we follow pandas semantics of ``-1``
519+
indicating a missing value. In this case, positions where `indexer`
520+
is ``-1`` will be filled with `fill_value` or the default NA value
521+
for this type.
522+
523+
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
524+
``iloc``, when the indexer is a sequence of values. Additionally,
525+
it's called by :meth:`Series.reindex` with a `fill_value`.
526+
527+
See Also
528+
--------
529+
numpy.take
530+
"""
540531
from pandas.core.algorithms import take
541532

542533
data = self._values_for_take()
543-
result = take(data, indexer, fill_value=fill_value)
534+
if allow_fill and fill_value is None:
535+
fill_value = self.dtype.na_value
536+
537+
result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill)
544538
return self._from_sequence(result)
545539

546540
def copy(self, deep=False):
@@ -605,4 +599,3 @@ def _ndarray_values(self):
605599
used for interacting with our indexers.
606600
"""
607601
return np.array(self)
608-

pandas/core/dtypes/cast.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,11 @@ def changeit():
256256

257257
def maybe_promote(dtype, fill_value=np.nan):
258258
# if we passed an array here, determine the fill value by dtype
259-
if isinstance(fill_value, np.ndarray):
259+
if is_extension_array_dtype(dtype):
260+
# XXX: verify this change
261+
fill_value = dtype.na_value
262+
263+
elif isinstance(fill_value, np.ndarray):
260264
if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)):
261265
fill_value = iNaT
262266
else:

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3476,7 +3476,7 @@ def _reindex_index(self, new_index, method, copy, level, fill_value=np.nan,
34763476
allow_dups=False)
34773477

34783478
def _reindex_columns(self, new_columns, method, copy, level,
3479-
fill_value=np.nan, limit=None, tolerance=None):
3479+
fill_value=None, limit=None, tolerance=None):
34803480
new_columns, indexer = self.columns.reindex(new_columns, method=method,
34813481
level=level, limit=limit,
34823482
tolerance=tolerance)

pandas/core/generic.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -3660,7 +3660,7 @@ def reindex(self, *args, **kwargs):
36603660
copy = kwargs.pop('copy', True)
36613661
limit = kwargs.pop('limit', None)
36623662
tolerance = kwargs.pop('tolerance', None)
3663-
fill_value = kwargs.pop('fill_value', np.nan)
3663+
fill_value = kwargs.pop('fill_value', None)
36643664

36653665
# Series.reindex doesn't use / need the axis kwarg
36663666
# We pop and ignore it here, to make writing Series/Frame generic code
@@ -3776,7 +3776,7 @@ def _reindex_multi(self, axes, copy, fill_value):
37763776

37773777
@Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
37783778
def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
3779-
limit=None, fill_value=np.nan):
3779+
limit=None, fill_value=None):
37803780
msg = ("'.reindex_axis' is deprecated and will be removed in a future "
37813781
"version. Use '.reindex' instead.")
37823782
self._consolidate_inplace()
@@ -3790,7 +3790,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
37903790
return self._reindex_with_indexers({axis: [new_index, indexer]},
37913791
fill_value=fill_value, copy=copy)
37923792

3793-
def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False,
3793+
def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
37943794
allow_dups=False):
37953795
"""allow_dups indicates an internal call here """
37963796

@@ -7252,7 +7252,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
72527252
raise TypeError('unsupported type: %s' % type(other))
72537253

72547254
def _align_frame(self, other, join='outer', axis=None, level=None,
7255-
copy=True, fill_value=np.nan, method=None, limit=None,
7255+
copy=True, fill_value=None, method=None, limit=None,
72567256
fill_axis=0):
72577257
# defaults
72587258
join_index, join_columns = None, None
@@ -7420,6 +7420,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
74207420
if other.ndim <= self.ndim:
74217421

74227422
_, other = self.align(other, join='left', axis=axis,
7423+
# XXX
74237424
level=level, fill_value=np.nan)
74247425

74257426
# if we are NOT aligned, raise as we cannot where index

pandas/core/internals.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1888,6 +1888,11 @@ def _holder(self):
18881888
# For extension blocks, the holder is values-dependent.
18891889
return type(self.values)
18901890

1891+
@property
1892+
def fill_value(self):
1893+
# Used in reindex_indexer
1894+
return self.values.dtype.na_value
1895+
18911896
@property
18921897
def _can_hold_na(self):
18931898
# The default ExtensionArray._can_hold_na is True
@@ -1951,7 +1956,8 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
19511956
# axis doesn't matter; we are really a single-dim object
19521957
# but are passed the axis depending on the calling routing
19531958
# if its REALLY axis 0, then this will be a reindex and not a take
1954-
new_values = self.values.take(indexer, fill_value=fill_value)
1959+
new_values = self.values.take(indexer, fill_value=fill_value,
1960+
allow_fill=True)
19551961

19561962
# if we are a 1-dim object, then always place at 0
19571963
if self.ndim == 1:

pandas/core/series.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -2185,7 +2185,7 @@ def _binop(self, other, func, level=None, fill_value=None):
21852185
result.name = None
21862186
return result
21872187

2188-
def combine(self, other, func, fill_value=np.nan):
2188+
def combine(self, other, func, fill_value=None):
21892189
"""
21902190
Perform elementwise binary operation on two Series using given function
21912191
with optional fill value when an index is missing from one Series or
@@ -3216,7 +3216,10 @@ def _reindex_indexer(self, new_index, indexer, copy):
32163216
return self.copy()
32173217
return self
32183218

3219-
new_values = algorithms.take_1d(self._values, indexer)
3219+
# TODO: determine if we want EA to handle fill_value=None
3220+
# if not, then we have to determine this here.
3221+
new_values = algorithms.take_1d(self._values, indexer,
3222+
fill_value=None, allow_fill=True)
32203223
return self._constructor(new_values, index=new_index)
32213224

32223225
def _needs_reindex_multi(self, axes, method, level):

pandas/tests/extension/base/getitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def test_take_non_na_fill_value(self, data_missing):
155155

156156
def test_take_pandas_style_negative_raises(self, data, na_value):
157157
with pytest.raises(ValueError):
158-
data.take([0, -2], fill_value=na_value)
158+
data.take([0, -2], fill_value=na_value, allow_fill=True)
159159

160160
@pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
161161
def test_take_series(self, data):

pandas/tests/extension/decimal/array.py

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class DecimalArray(ExtensionArray):
2828
dtype = DecimalDtype()
2929

3030
def __init__(self, values):
31+
assert all(isinstance(v, decimal.Decimal) for v in values)
3132
values = np.asarray(values, dtype=object)
3233

3334
self._data = values

pandas/tests/extension/decimal/test_decimal.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ def test_take_basic(self):
119119
decimal.Decimal('3')])
120120
self.assert_extension_array_equal(result, expected)
121121

122-
result = ea.take([1, 2, -1], fill_value=ea.dtype.na_value)
122+
result = ea.take([1, 2, -1], fill_value=ea.dtype.na_value,
123+
allow_fill=True)
123124
expected = DecimalArray([decimal.Decimal('2'),
124125
decimal.Decimal('3'),
125126
decimal.Decimal('NaN')])

pandas/tests/extension/json/array.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -94,20 +94,22 @@ def nbytes(self):
9494
def isna(self):
9595
return np.array([x == self.dtype.na_value for x in self.data])
9696

97-
def take(self, indexer, fill_value=None):
97+
def take(self, indexer, fill_value=None, allow_fill=None):
9898
# re-implement here, since NumPy has trouble setting
9999
# sized objects like UserDicts into scalar slots of
100100
# an ndarary.
101101
indexer = np.asarray(indexer)
102102
msg = ("Index is out of bounds or cannot do a "
103103
"non-empty take from an empty array.")
104104

105-
if fill_value is None:
105+
if allow_fill is None:
106106
try:
107107
output = [self.data[loc] for loc in indexer]
108108
except IndexError:
109109
raise IndexError(msg)
110110
else:
111+
if fill_value is None:
112+
fill_value = self.dtype.na_value
111113
# bounds check
112114
if (indexer < -1).any():
113115
raise ValueError

0 commit comments

Comments
 (0)