Skip to content

Commit ecca28d

Browse files
committed
Merge pull request #4610 from jreback/series_reindex
CLN: refactor Series.reindex to core/generic
2 parents 3644370 + dc73315 commit ecca28d

23 files changed

+380
-181
lines changed

doc/source/release.rst

+8-3
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ pandas 0.13
115115
- ``MultiIndex.astype()`` now only allows ``np.object_``-like dtypes and
116116
now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`)
117117

118+
- Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
119+
118120
**Internal Refactoring**
119121

120122
In 0.13.0 there is a major refactor primarily to subclass ``Series`` from ``NDFrame``,
@@ -144,8 +146,6 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
144146
- support attribute access for setting
145147
- filter supports same api as original ``DataFrame`` filter
146148

147-
- Reindex called with no arguments will now return a copy of the input object
148-
149149
- Series now inherits from ``NDFrame`` rather than directly from ``ndarray``.
150150
There are several minor changes that affect the API.
151151

@@ -185,6 +185,9 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
185185

186186
- Indexing with dtype conversions fixed (:issue:`4463`, :issue:`4204`)
187187

188+
- Refactor Series.reindex to core/generic.py (:issue:`4604`, :issue:`4618`), allow ``method=`` in reindexing
189+
on a Series to work
190+
188191
**Experimental Features**
189192

190193
**Bug Fixes**
@@ -210,7 +213,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
210213
- In ``to_json``, raise if a passed ``orient`` would cause loss of data because
211214
of a duplicate index (:issue:`4359`)
212215
- In ``to_json``, fix date handling so milliseconds are the default timestamp
213-
as the docstring says (:issue:`4362`).
216+
as the docstring says (:issue:`4362`).
214217
- JSON NaT handling fixed, NaTs are now serialised to `null` (:issue:`4498`)
215218
- Fixed JSON handling of escapable characters in JSON object keys (:issue:`4593`)
216219
- Fixed passing ``keep_default_na=False`` when ``na_values=None`` (:issue:`4318`)
@@ -257,6 +260,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
257260
- Fix bug in ``pd.read_clipboard`` on windows with PY3 (:issue:`4561`); not decoding properly
258261
- ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise
259262
if code argument out of range (:issue:`4519`, :issue:`4520`)
263+
- Fix reindexing with multiple axes; if an axes match was not replacing the current axes, leading
264+
to a possible lazay frequency inference issue (:issue:`3317`)
260265

261266
pandas 0.12
262267
===========

doc/source/v0.13.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ API changes
9696
# and all methods take an inplace kwarg
9797
index.set_names(["bob", "cranberry"], inplace=True)
9898

99+
- Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
100+
99101
Enhancements
100102
~~~~~~~~~~~~
101103

@@ -237,6 +239,9 @@ and behaviors. Series formerly subclassed directly from ``ndarray``. (:issue:`40
237239

238240
- Indexing with dtype conversions fixed (:issue:`4463`, :issue:`4204`)
239241

242+
- Refactor Series.reindex to core/generic.py (:issue:`4604`, :issue:`4618`), allow ``method=`` in reindexing
243+
on a Series to work
244+
240245
Bug Fixes
241246
~~~~~~~~~
242247

pandas/core/common.py

+58-11
Original file line numberDiff line numberDiff line change
@@ -961,14 +961,47 @@ def _possibly_downcast_to_dtype(result, dtype):
961961
""" try to cast to the specified dtype (e.g. convert back to bool/int
962962
or could be an astype of float64->float32 """
963963

964-
if np.isscalar(result):
964+
if np.isscalar(result) or not len(result):
965965
return result
966966

967+
if isinstance(dtype, compat.string_types):
968+
if dtype == 'infer':
969+
inferred_type = lib.infer_dtype(_ensure_object(result.ravel()))
970+
if inferred_type == 'boolean':
971+
dtype = 'bool'
972+
elif inferred_type == 'integer':
973+
dtype = 'int64'
974+
elif inferred_type == 'datetime64':
975+
dtype = 'datetime64[ns]'
976+
elif inferred_type == 'timedelta64':
977+
dtype = 'timedelta64[ns]'
978+
979+
# try to upcast here
980+
elif inferred_type == 'floating':
981+
dtype = 'int64'
982+
983+
else:
984+
dtype = 'object'
985+
986+
if isinstance(dtype, compat.string_types):
987+
dtype = np.dtype(dtype)
988+
967989
try:
968990
if issubclass(dtype.type, np.floating):
969991
return result.astype(dtype)
970992
elif dtype == np.bool_ or issubclass(dtype.type, np.integer):
971-
if issubclass(result.dtype.type, np.number) and notnull(result).all():
993+
994+
# do a test on the first element, if it fails then we are done
995+
r = result.ravel()
996+
arr = np.array([ r[0] ])
997+
if (arr != arr.astype(dtype)).item():
998+
return result
999+
1000+
# a comparable, e.g. a Decimal may slip in here
1001+
elif not isinstance(r[0], (np.integer,np.floating,np.bool,int,float,bool)):
1002+
return result
1003+
1004+
if issubclass(result.dtype.type, (np.object_,np.number)) and notnull(result).all():
9721005
new_result = result.astype(dtype)
9731006
if (new_result == result).all():
9741007
return new_result
@@ -1052,6 +1085,9 @@ def pad_1d(values, limit=None, mask=None):
10521085
_method = getattr(algos, 'pad_inplace_%s' % dtype, None)
10531086
elif is_datetime64_dtype(values):
10541087
_method = _pad_1d_datetime
1088+
elif is_integer_dtype(values):
1089+
values = _ensure_float64(values)
1090+
_method = algos.pad_inplace_float64
10551091
elif values.dtype == np.object_:
10561092
_method = algos.pad_inplace_object
10571093

@@ -1062,7 +1098,7 @@ def pad_1d(values, limit=None, mask=None):
10621098
mask = isnull(values)
10631099
mask = mask.view(np.uint8)
10641100
_method(values, mask, limit=limit)
1065-
1101+
return values
10661102

10671103
def backfill_1d(values, limit=None, mask=None):
10681104

@@ -1072,6 +1108,9 @@ def backfill_1d(values, limit=None, mask=None):
10721108
_method = getattr(algos, 'backfill_inplace_%s' % dtype, None)
10731109
elif is_datetime64_dtype(values):
10741110
_method = _backfill_1d_datetime
1111+
elif is_integer_dtype(values):
1112+
values = _ensure_float64(values)
1113+
_method = algos.backfill_inplace_float64
10751114
elif values.dtype == np.object_:
10761115
_method = algos.backfill_inplace_object
10771116

@@ -1083,7 +1122,7 @@ def backfill_1d(values, limit=None, mask=None):
10831122
mask = mask.view(np.uint8)
10841123

10851124
_method(values, mask, limit=limit)
1086-
1125+
return values
10871126

10881127
def pad_2d(values, limit=None, mask=None):
10891128

@@ -1093,6 +1132,9 @@ def pad_2d(values, limit=None, mask=None):
10931132
_method = getattr(algos, 'pad_2d_inplace_%s' % dtype, None)
10941133
elif is_datetime64_dtype(values):
10951134
_method = _pad_2d_datetime
1135+
elif is_integer_dtype(values):
1136+
values = _ensure_float64(values)
1137+
_method = algos.pad_2d_inplace_float64
10961138
elif values.dtype == np.object_:
10971139
_method = algos.pad_2d_inplace_object
10981140

@@ -1108,7 +1150,7 @@ def pad_2d(values, limit=None, mask=None):
11081150
else:
11091151
# for test coverage
11101152
pass
1111-
1153+
return values
11121154

11131155
def backfill_2d(values, limit=None, mask=None):
11141156

@@ -1118,6 +1160,9 @@ def backfill_2d(values, limit=None, mask=None):
11181160
_method = getattr(algos, 'backfill_2d_inplace_%s' % dtype, None)
11191161
elif is_datetime64_dtype(values):
11201162
_method = _backfill_2d_datetime
1163+
elif is_integer_dtype(values):
1164+
values = _ensure_float64(values)
1165+
_method = algos.backfill_2d_inplace_float64
11211166
elif values.dtype == np.object_:
11221167
_method = algos.backfill_2d_inplace_object
11231168

@@ -1133,9 +1178,9 @@ def backfill_2d(values, limit=None, mask=None):
11331178
else:
11341179
# for test coverage
11351180
pass
1181+
return values
11361182

1137-
1138-
def interpolate_2d(values, method='pad', axis=0, limit=None, missing=None):
1183+
def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None):
11391184
""" perform an actual interpolation of values, values will be make 2-d if needed
11401185
fills inplace, returns the result """
11411186

@@ -1148,15 +1193,16 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, missing=None):
11481193
raise Exception("cannot interpolate on a ndim == 1 with axis != 0")
11491194
values = values.reshape(tuple((1,) + values.shape))
11501195

1151-
if missing is None:
1196+
if fill_value is None:
11521197
mask = None
11531198
else: # todo create faster fill func without masking
1154-
mask = mask_missing(transf(values), missing)
1199+
mask = mask_missing(transf(values), fill_value)
11551200

1201+
method = _clean_fill_method(method)
11561202
if method == 'pad':
1157-
pad_2d(transf(values), limit=limit, mask=mask)
1203+
values = transf(pad_2d(transf(values), limit=limit, mask=mask))
11581204
else:
1159-
backfill_2d(transf(values), limit=limit, mask=mask)
1205+
values = transf(backfill_2d(transf(values), limit=limit, mask=mask))
11601206

11611207
# reshape back
11621208
if ndim == 1:
@@ -1830,6 +1876,7 @@ def _astype_nansafe(arr, dtype, copy=True):
18301876

18311877

18321878
def _clean_fill_method(method):
1879+
if method is None: return None
18331880
method = method.lower()
18341881
if method == 'ffill':
18351882
method = 'pad'

pandas/core/frame.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2280,12 +2280,9 @@ def _reindex_multi(self, axes, copy, fill_value):
22802280
fill_value=fill_value)
22812281
return self._constructor(new_values, index=new_index,
22822282
columns=new_columns)
2283-
elif row_indexer is not None:
2284-
return self._reindex_with_indexers({0: [new_index, row_indexer]}, copy=copy, fill_value=fill_value)
2285-
elif col_indexer is not None:
2286-
return self._reindex_with_indexers({1: [new_columns, col_indexer]}, copy=copy, fill_value=fill_value)
22872283
else:
2288-
return self.copy() if copy else self
2284+
return self._reindex_with_indexers({0: [new_index, row_indexer],
2285+
1: [new_columns, col_indexer]}, copy=copy, fill_value=fill_value)
22892286

22902287
def reindex_like(self, other, method=None, copy=True, limit=None,
22912288
fill_value=NA):

pandas/core/generic.py

+37-17
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ def reindex(self, *args, **kwargs):
987987

988988
# construct the args
989989
axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
990-
method = kwargs.get('method')
990+
method = com._clean_fill_method(kwargs.get('method'))
991991
level = kwargs.get('level')
992992
copy = kwargs.get('copy', True)
993993
limit = kwargs.get('limit')
@@ -1003,11 +1003,15 @@ def reindex(self, *args, **kwargs):
10031003
except:
10041004
pass
10051005

1006-
# perform the reindex on the axes
1007-
if copy and not com._count_not_none(*axes.values()):
1008-
return self.copy()
1006+
# if all axes that are requested to reindex are equal, then only copy if indicated
1007+
# must have index names equal here as well as values
1008+
if all([ self._get_axis(axis).identical(ax) for axis, ax in axes.items() if ax is not None ]):
1009+
if copy:
1010+
return self.copy()
1011+
return self
10091012

1010-
return self._reindex_axes(axes, level, limit, method, fill_value, copy, takeable=takeable)
1013+
# perform the reindex on the axes
1014+
return self._reindex_axes(axes, level, limit, method, fill_value, copy, takeable=takeable)._propogate_attributes(self)
10111015

10121016
def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=False):
10131017
""" perform the reinxed for all the axes """
@@ -1025,7 +1029,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=F
10251029
new_index, indexer = self._get_axis(a).reindex(
10261030
labels, level=level, limit=limit, takeable=takeable)
10271031
obj = obj._reindex_with_indexers(
1028-
{axis: [labels, indexer]}, method, fill_value, copy)
1032+
{axis: [new_index, indexer]}, method=method, fill_value=fill_value,
1033+
limit=limit, copy=copy)
10291034

10301035
return obj
10311036

@@ -1077,23 +1082,29 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
10771082

10781083
axis_name = self._get_axis_name(axis)
10791084
axis_values = self._get_axis(axis_name)
1085+
method = com._clean_fill_method(method)
10801086
new_index, indexer = axis_values.reindex(labels, method, level,
10811087
limit=limit, copy_if_needed=True)
1082-
return self._reindex_with_indexers({axis: [new_index, indexer]}, method, fill_value, copy)
1088+
return self._reindex_with_indexers({axis: [new_index, indexer]}, method=method, fill_value=fill_value,
1089+
limit=limit, copy=copy)._propogate_attributes(self)
10831090

1084-
def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, copy=False):
1091+
def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, limit=None, copy=False):
10851092

10861093
# reindex doing multiple operations on different axes if indiciated
10871094
new_data = self._data
10881095
for axis in sorted(reindexers.keys()):
10891096
index, indexer = reindexers[axis]
10901097
baxis = self._get_block_manager_axis(axis)
10911098

1099+
if index is None:
1100+
continue
1101+
index = _ensure_index(index)
1102+
10921103
# reindex the axis
10931104
if method is not None:
10941105
new_data = new_data.reindex_axis(
1095-
index, method=method, axis=baxis,
1096-
fill_value=fill_value, copy=copy)
1106+
index, indexer=indexer, method=method, axis=baxis,
1107+
fill_value=fill_value, limit=limit, copy=copy)
10971108

10981109
elif indexer is not None:
10991110
# TODO: speed up on homogeneous DataFrame objects
@@ -1409,7 +1420,8 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
14091420
limit : int, default None
14101421
Maximum size gap to forward or backward fill
14111422
downcast : dict, default is None, a dict of item->dtype of what to
1412-
downcast if possible
1423+
downcast if possible, or the string 'infer' which will try to
1424+
downcast to an appropriate equal type (e.g. float64 to int64 if possible)
14131425
14141426
See also
14151427
--------
@@ -1428,21 +1440,28 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
14281440
if axis + 1 > self._AXIS_LEN:
14291441
raise ValueError(
14301442
"invalid axis passed for object type {0}".format(type(self)))
1443+
method = com._clean_fill_method(method)
14311444

14321445
if value is None:
14331446
if method is None:
14341447
raise ValueError('must specify a fill method or value')
14351448
if self._is_mixed_type and axis == 1:
14361449
if inplace:
14371450
raise NotImplementedError()
1438-
return self.T.fillna(method=method, limit=limit).T
1451+
result = self.T.fillna(method=method, limit=limit).T
1452+
1453+
# need to downcast here because of all of the transposes
1454+
result._data = result._data.downcast()
1455+
1456+
return result
14391457

14401458
method = com._clean_fill_method(method)
14411459
new_data = self._data.interpolate(method=method,
14421460
axis=axis,
14431461
limit=limit,
14441462
inplace=inplace,
1445-
coerce=True)
1463+
coerce=True,
1464+
downcast=downcast)
14461465
else:
14471466
if method is not None:
14481467
raise ValueError('cannot specify both a fill method and value')
@@ -1472,13 +1491,13 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
14721491
else:
14731492
return self._constructor(new_data)
14741493

1475-
def ffill(self, axis=0, inplace=False, limit=None):
1494+
def ffill(self, axis=0, inplace=False, limit=None, downcast=None):
14761495
return self.fillna(method='ffill', axis=axis, inplace=inplace,
1477-
limit=limit)
1496+
limit=limit, downcast=downcast)
14781497

1479-
def bfill(self, axis=0, inplace=False, limit=None):
1498+
def bfill(self, axis=0, inplace=False, limit=None, downcast=None):
14801499
return self.fillna(method='bfill', axis=axis, inplace=inplace,
1481-
limit=limit)
1500+
limit=limit, downcast=downcast)
14821501

14831502
def replace(self, to_replace=None, value=None, inplace=False, limit=None,
14841503
regex=False, method=None, axis=None):
@@ -2030,6 +2049,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
20302049
Aligned objects
20312050
"""
20322051
from pandas import DataFrame, Series
2052+
method = com._clean_fill_method(method)
20332053

20342054
if isinstance(other, DataFrame):
20352055
return self._align_frame(other, join=join, axis=axis, level=level,

0 commit comments

Comments
 (0)