pandas-dev · jreback · Aug 21, 2013 · Aug 19, 2013 · Aug 19, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -115,6 +115,8 @@ pandas 0.13
     - ``MultiIndex.astype()`` now only allows ``np.object_``-like dtypes and
       now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`)
 
+  - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
+
 **Internal Refactoring**
 
 In 0.13.0 there is a major refactor primarily to subclass ``Series`` from ``NDFrame``,
@@ -144,8 +146,6 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
   - support attribute access for setting
   - filter supports same api as original ``DataFrame`` filter
 
-- Reindex called with no arguments will now return a copy of the input object
-
 - Series now inherits from ``NDFrame`` rather than directly from ``ndarray``.
   There are several minor changes that affect the API.
 
@@ -185,6 +185,9 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
 
 - Indexing with dtype conversions fixed (:issue:`4463`, :issue:`4204`)
 
+- Refactor Series.reindex to core/generic.py (:issue:`4604`, :issue:`4618`), allow ``method=`` in reindexing
+  on a Series to work
+
 **Experimental Features**
 
 **Bug Fixes**
@@ -210,7 +213,7 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
   - In ``to_json``, raise if a passed ``orient`` would cause loss of data because
     of a duplicate index (:issue:`4359`)
   - In ``to_json``, fix date handling so milliseconds are the default timestamp
-    as the docstring says (:issue:`4362`). 
+    as the docstring says (:issue:`4362`).
   - JSON NaT handling fixed, NaTs are now serialised to `null` (:issue:`4498`)
   - Fixed passing ``keep_default_na=False`` when ``na_values=None`` (:issue:`4318`)
   - Fixed bug with ``values`` raising an error on a DataFrame with duplicate columns and mixed
@@ -256,6 +259,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
   - Fix bug in ``pd.read_clipboard`` on windows with PY3 (:issue:`4561`); not decoding properly
   - ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise
     if code argument out of range (:issue:`4519`, :issue:`4520`)
+  - Fix reindexing with multiple axes; if an axes match was not replacing the current axes, leading
+    to a possible lazay frequency inference issue (:issue:`3317`)
 
 pandas 0.12
 ===========

diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -96,6 +96,8 @@ API changes
         # and all methods take an inplace kwarg
         index.set_names(["bob", "cranberry"], inplace=True)
 
+  - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
+
 Enhancements
 ~~~~~~~~~~~~
 
@@ -237,6 +239,9 @@ and behaviors. Series formerly subclassed directly from ``ndarray``. (:issue:`40
 
 - Indexing with dtype conversions fixed (:issue:`4463`, :issue:`4204`)
 
+- Refactor Series.reindex to core/generic.py (:issue:`4604`, :issue:`4618`), allow ``method=`` in reindexing
+  on a Series to work
+
 Bug Fixes
 ~~~~~~~~~
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -961,14 +961,47 @@ def _possibly_downcast_to_dtype(result, dtype):
     """ try to cast to the specified dtype (e.g. convert back to bool/int
         or could be an astype of float64->float32 """
 
-    if np.isscalar(result):
+    if np.isscalar(result) or not len(result):
         return result
 
+    if isinstance(dtype, compat.string_types):
+        if dtype == 'infer':
+            inferred_type = lib.infer_dtype(_ensure_object(result.ravel()))
+            if inferred_type == 'boolean':
+                dtype = 'bool'
+            elif inferred_type == 'integer':
+                dtype = 'int64'
+            elif inferred_type == 'datetime64':
+                dtype = 'datetime64[ns]'
+            elif inferred_type == 'timedelta64':
+                dtype = 'timedelta64[ns]'
+
+            # try to upcast here
+            elif inferred_type == 'floating':
+                dtype = 'int64'
+
+            else:
+                dtype = 'object'
+
+    if isinstance(dtype, compat.string_types):
+        dtype = np.dtype(dtype)
+
     try:
         if issubclass(dtype.type, np.floating):
             return result.astype(dtype)
         elif dtype == np.bool_ or issubclass(dtype.type, np.integer):
-            if issubclass(result.dtype.type, np.number) and notnull(result).all():
+
+            # do a test on the first element, if it fails then we are done
+            r = result.ravel()
+            arr = np.array([ r[0] ])
+            if (arr != arr.astype(dtype)).item():
+                return result
+
+            # a comparable, e.g. a Decimal may slip in here
+            elif not isinstance(r[0], (np.integer,np.floating,np.bool,int,float,bool)):
+                return result
+
+            if issubclass(result.dtype.type, (np.object_,np.number)) and notnull(result).all():
                 new_result = result.astype(dtype)
                 if (new_result == result).all():
                     return new_result
@@ -1052,6 +1085,9 @@ def pad_1d(values, limit=None, mask=None):
         _method = getattr(algos, 'pad_inplace_%s' % dtype, None)
     elif is_datetime64_dtype(values):
         _method = _pad_1d_datetime
+    elif is_integer_dtype(values):
+        values = _ensure_float64(values)
+        _method = algos.pad_inplace_float64
     elif values.dtype == np.object_:
         _method = algos.pad_inplace_object
 
@@ -1062,7 +1098,7 @@ def pad_1d(values, limit=None, mask=None):
         mask = isnull(values)
     mask = mask.view(np.uint8)
     _method(values, mask, limit=limit)
-
+    return values
 
 def backfill_1d(values, limit=None, mask=None):
 
@@ -1072,6 +1108,9 @@ def backfill_1d(values, limit=None, mask=None):
         _method = getattr(algos, 'backfill_inplace_%s' % dtype, None)
     elif is_datetime64_dtype(values):
         _method = _backfill_1d_datetime
+    elif is_integer_dtype(values):
+        values = _ensure_float64(values)
+        _method = algos.backfill_inplace_float64
     elif values.dtype == np.object_:
         _method = algos.backfill_inplace_object
 
@@ -1083,7 +1122,7 @@ def backfill_1d(values, limit=None, mask=None):
     mask = mask.view(np.uint8)
 
     _method(values, mask, limit=limit)
-
+    return values
 
 def pad_2d(values, limit=None, mask=None):
 
@@ -1093,6 +1132,9 @@ def pad_2d(values, limit=None, mask=None):
         _method = getattr(algos, 'pad_2d_inplace_%s' % dtype, None)
     elif is_datetime64_dtype(values):
         _method = _pad_2d_datetime
+    elif is_integer_dtype(values):
+        values = _ensure_float64(values)
+        _method = algos.pad_2d_inplace_float64
     elif values.dtype == np.object_:
         _method = algos.pad_2d_inplace_object
 
@@ -1108,7 +1150,7 @@ def pad_2d(values, limit=None, mask=None):
     else:
         # for test coverage
         pass
-
+    return values
 
 def backfill_2d(values, limit=None, mask=None):
 
@@ -1118,6 +1160,9 @@ def backfill_2d(values, limit=None, mask=None):
         _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype, None)
     elif is_datetime64_dtype(values):
         _method = _backfill_2d_datetime
+    elif is_integer_dtype(values):
+        values = _ensure_float64(values)
+        _method = algos.backfill_2d_inplace_float64
     elif values.dtype == np.object_:
         _method = algos.backfill_2d_inplace_object
 
@@ -1133,9 +1178,9 @@ def backfill_2d(values, limit=None, mask=None):
     else:
         # for test coverage
         pass
+    return values
 
-
-def interpolate_2d(values, method='pad', axis=0, limit=None, missing=None):
+def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None):
     """ perform an actual interpolation of values, values will be make 2-d if needed
         fills inplace, returns the result """
 
@@ -1148,15 +1193,16 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, missing=None):
             raise Exception("cannot interpolate on a ndim == 1 with axis != 0")
         values = values.reshape(tuple((1,) + values.shape))
 
-    if missing is None:
+    if fill_value is None:
         mask = None
     else:  # todo create faster fill func without masking
-        mask = mask_missing(transf(values), missing)
+        mask = mask_missing(transf(values), fill_value)
 
+    method = _clean_fill_method(method)
     if method == 'pad':
-        pad_2d(transf(values), limit=limit, mask=mask)
+        values = transf(pad_2d(transf(values), limit=limit, mask=mask))
     else:
-        backfill_2d(transf(values), limit=limit, mask=mask)
+        values = transf(backfill_2d(transf(values), limit=limit, mask=mask))
 
     # reshape back
     if ndim == 1:
@@ -1830,6 +1876,7 @@ def _astype_nansafe(arr, dtype, copy=True):
 
 
 def _clean_fill_method(method):
+    if method is None: return None
     method = method.lower()
     if method == 'ffill':
         method = 'pad'

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2280,12 +2280,9 @@ def _reindex_multi(self, axes, copy, fill_value):
                                            fill_value=fill_value)
             return self._constructor(new_values, index=new_index,
                                      columns=new_columns)
-        elif row_indexer is not None:
-            return self._reindex_with_indexers({0: [new_index,   row_indexer]}, copy=copy, fill_value=fill_value)
-        elif col_indexer is not None:
-            return self._reindex_with_indexers({1: [new_columns, col_indexer]}, copy=copy, fill_value=fill_value)
         else:
-            return self.copy() if copy else self
+            return self._reindex_with_indexers({0: [new_index,   row_indexer],
+                                                1: [new_columns, col_indexer]}, copy=copy, fill_value=fill_value)
 
     def reindex_like(self, other, method=None, copy=True, limit=None,
                      fill_value=NA):

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -987,7 +987,7 @@ def reindex(self, *args, **kwargs):
 
         # construct the args
         axes, kwargs = self._construct_axes_from_arguments(args, kwargs)
-        method = kwargs.get('method')
+        method = com._clean_fill_method(kwargs.get('method'))
         level = kwargs.get('level')
         copy = kwargs.get('copy', True)
         limit = kwargs.get('limit')
@@ -1003,11 +1003,15 @@ def reindex(self, *args, **kwargs):
             except:
                 pass
 
-        # perform the reindex on the axes
-        if copy and not com._count_not_none(*axes.values()):
-            return self.copy()
+        # if all axes that are requested to reindex are equal, then only copy if indicated
+        # must have index names equal here as well as values
+        if all([ self._get_axis(axis).identical(ax) for axis, ax in axes.items() if ax is not None ]):
+            if copy:
+                return self.copy()
+            return self
 
-        return self._reindex_axes(axes, level, limit, method, fill_value, copy, takeable=takeable)
+        # perform the reindex on the axes
+        return self._reindex_axes(axes, level, limit, method, fill_value, copy, takeable=takeable)._propogate_attributes(self)
 
     def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=False):
         """ perform the reinxed for all the axes """
@@ -1025,7 +1029,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy, takeable=F
             new_index, indexer = self._get_axis(a).reindex(
                 labels, level=level, limit=limit, takeable=takeable)
             obj = obj._reindex_with_indexers(
-                {axis: [labels, indexer]}, method, fill_value, copy)
+                {axis: [new_index, indexer]}, method=method, fill_value=fill_value,
+                limit=limit, copy=copy)
 
         return obj
 
@@ -1077,23 +1082,29 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
 
         axis_name = self._get_axis_name(axis)
         axis_values = self._get_axis(axis_name)
+        method = com._clean_fill_method(method)
         new_index, indexer = axis_values.reindex(labels, method, level,
                                                  limit=limit, copy_if_needed=True)
-        return self._reindex_with_indexers({axis: [new_index, indexer]}, method, fill_value, copy)
+        return self._reindex_with_indexers({axis: [new_index, indexer]}, method=method, fill_value=fill_value,
+                                           limit=limit, copy=copy)._propogate_attributes(self)
 
-    def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, copy=False):
+    def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, limit=None, copy=False):
 
         # reindex doing multiple operations on different axes if indiciated
         new_data = self._data
         for axis in sorted(reindexers.keys()):
             index, indexer = reindexers[axis]
             baxis = self._get_block_manager_axis(axis)
 
+            if index is None:
+                continue
+            index = _ensure_index(index)
+
             # reindex the axis
             if method is not None:
                 new_data = new_data.reindex_axis(
-                    index, method=method, axis=baxis,
-                    fill_value=fill_value, copy=copy)
+                    index, indexer=indexer, method=method, axis=baxis,
+                    fill_value=fill_value, limit=limit, copy=copy)
 
             elif indexer is not None:
                 # TODO: speed up on homogeneous DataFrame objects
@@ -1409,7 +1420,8 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
         limit : int, default None
             Maximum size gap to forward or backward fill
         downcast : dict, default is None, a dict of item->dtype of what to
-            downcast if possible
+            downcast if possible, or the string 'infer' which will try to
+            downcast to an appropriate equal type (e.g. float64 to int64 if possible)
 
         See also
         --------
@@ -1428,21 +1440,28 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
         if axis + 1 > self._AXIS_LEN:
             raise ValueError(
                 "invalid axis passed for object type {0}".format(type(self)))
+        method = com._clean_fill_method(method)
 
         if value is None:
             if method is None:
                 raise ValueError('must specify a fill method or value')
             if self._is_mixed_type and axis == 1:
                 if inplace:
                     raise NotImplementedError()
-                return self.T.fillna(method=method, limit=limit).T
+                result = self.T.fillna(method=method, limit=limit).T
+
+                # need to downcast here because of all of the transposes
+                result._data = result._data.downcast()
+
+                return result
 
             method = com._clean_fill_method(method)
             new_data = self._data.interpolate(method=method,
                                               axis=axis,
                                               limit=limit,
                                               inplace=inplace,
-                                              coerce=True)
+                                              coerce=True,
+                                              downcast=downcast)
         else:
             if method is not None:
                 raise ValueError('cannot specify both a fill method and value')
@@ -1472,13 +1491,13 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
         else:
             return self._constructor(new_data)
 
-    def ffill(self, axis=0, inplace=False, limit=None):
+    def ffill(self, axis=0, inplace=False, limit=None, downcast=None):
         return self.fillna(method='ffill', axis=axis, inplace=inplace,
-                           limit=limit)
+                           limit=limit, downcast=downcast)
 
-    def bfill(self, axis=0, inplace=False, limit=None):
+    def bfill(self, axis=0, inplace=False, limit=None, downcast=None):
         return self.fillna(method='bfill', axis=axis, inplace=inplace,
-                           limit=limit)
+                           limit=limit, downcast=downcast)
 
     def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                 regex=False, method=None, axis=None):
@@ -2030,6 +2049,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True,
             Aligned objects
         """
         from pandas import DataFrame, Series
+        method = com._clean_fill_method(method)
 
         if isinstance(other, DataFrame):
             return self._align_frame(other, join=join, axis=axis, level=level,