BUG/CLN: remove infer_types

cpcloud · cpcloud · commit 9f218eb08453 · 2013-06-15T05:37:03.000-04:00
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -3,6 +3,7 @@
 """
 
 import itertools
+import re
 from datetime import datetime
 
 from numpy.lib.format import read_array, write_array
@@ -1585,8 +1586,16 @@ def is_complex_dtype(arr_or_dtype):
     return issubclass(tipo, np.complexfloating)
 
 
+def is_re(obj):
+    return isinstance(obj, re._pattern_type)
+
+
+def is_re_compilable(obj):
+    return is_re(obj) or isinstance(obj, basestring)
+
+
 def is_list_like(arg):
-    return hasattr(arg, '__iter__') and not isinstance(arg, basestring) or hasattr(arg,'len')
+    return hasattr(arg, '__iter__') and not isinstance(arg, basestring)
 
 def _is_sequence(x):
     try:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -33,8 +33,7 @@
                                   _maybe_convert_indices)
 from pandas.core.internals import (BlockManager,
                                    create_block_manager_from_arrays,
-                                   create_block_manager_from_blocks,
-                                   _re_compilable)
+                                   create_block_manager_from_blocks)
 from pandas.core.series import Series, _radd_compat
 import pandas.core.expressions as expressions
 from pandas.compat.scipy import scoreatpercentile as _quantile
@@ -3483,7 +3482,7 @@ def bfill(self, axis=0, inplace=False, limit=None):
                            limit=limit)
 
     def replace(self, to_replace=None, value=None, inplace=False, limit=None,
-                regex=False, infer_types=False, method=None, axis=None):
+                regex=False, method=None, axis=None):
         """
         Replace values given in 'to_replace' with 'value'.
 
@@ -3545,8 +3544,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
             string. Otherwise, `to_replace` must be ``None`` because this
             parameter will be interpreted as a regular expression or a list,
             dict, or array of regular expressions.
-        infer_types : bool, default True
-            If ``True`` attempt to convert object blocks to a better dtype.
 
         See also
         --------
@@ -3582,7 +3579,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
           and play with this method to gain intuition about how it works.
 
         """
-        if not isinstance(regex, bool) and to_replace is not None:
+        if not com.is_bool(regex) and to_replace is not None:
             raise AssertionError("'to_replace' must be 'None' if 'regex' is "
                                  "not a bool")
         if method is not None:
@@ -3628,8 +3625,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                 to_replace, value = keys, values
 
             return self.replace(to_replace, value, inplace=inplace,
-                                limit=limit, regex=regex,
-                                infer_types=infer_types)
+                                limit=limit, regex=regex)
         else:
             if not len(self.columns):
                 return self
@@ -3673,14 +3669,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                     new_data = self._data.replace(to_replace, value,
                                                   inplace=inplace, regex=regex)
             elif to_replace is None:
-                if not (_re_compilable(regex) or
+                if not (com.is_re_compilable(regex) or
                         isinstance(regex, (list, dict, np.ndarray, Series))):
                     raise TypeError("'regex' must be a string or a compiled "
                                     "regular expression or a list or dict of "
                                     "strings or regular expressions, you "
                                     "passed a {0}".format(type(regex)))
                 return self.replace(regex, value, inplace=inplace, limit=limit,
-                                    regex=True, infer_types=infer_types)
+                                    regex=True)
             else:
 
                 # dest iterable dict-like
@@ -3701,8 +3697,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
                     raise TypeError('Invalid "to_replace" type: '
                                     '{0}'.format(type(to_replace)))  # pragma: no cover
 
-        if infer_types:
-            new_data = new_data.convert()
+        new_data = new_data.convert(copy=not inplace, convert_numeric=False)
 
         if inplace:
             self._data = new_data
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1,13 +1,14 @@
 import itertools
 import re
 from datetime import datetime
-import collections
 
 from numpy import nan
 import numpy as np
 
-from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE
-from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes
+from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE,
+                                _TD_DTYPE)
+from pandas.core.index import (Index, MultiIndex, _ensure_index,
+                               _handle_legacy_indexes)
 from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
 import pandas.core.common as com
 import pandas.lib as lib
@@ -18,10 +19,6 @@
 from pandas.util import py3compat
 
 
-def _re_compilable(ex):
-    return isinstance(ex, (basestring, re._pattern_type))
-
-
 class Block(object):
     """
     Canonical n-dimensional unit of homogeneous dtype contained in a pandas
@@ -744,14 +741,16 @@ def should_store(self, value):
     def replace(self, to_replace, value, inplace=False, filter=None,
                 regex=False):
         blk = [self]
-        to_rep_is_list = (isinstance(to_replace, collections.Iterable) and not
-                          isinstance(to_replace, basestring))
-        value_is_list = (isinstance(value, collections.Iterable) and not
-                         isinstance(to_replace, basestring))
+        to_rep_is_list = com.is_list_like(to_replace)
+        value_is_list = com.is_list_like(value)
         both_lists = to_rep_is_list and value_is_list
         either_list = to_rep_is_list or value_is_list
 
-        if not either_list and not regex:
+        if not either_list and com.is_re_compilable(to_replace):
+            blk[0], = blk[0]._replace_single(to_replace, value,
+                                             inplace=inplace, filter=filter,
+                                             regex=True)
+        elif not (either_list or regex):
             blk = super(ObjectBlock, self).replace(to_replace, value,
                                                    inplace=inplace,
                                                    filter=filter, regex=regex)
@@ -773,15 +772,18 @@ def replace(self, to_replace, value, inplace=False, filter=None,
     def _replace_single(self, to_replace, value, inplace=False, filter=None,
                         regex=False):
         # to_replace is regex compilable
-        to_rep_re = _re_compilable(to_replace)
+        to_rep_re = com.is_re_compilable(to_replace)
 
         # regex is regex compilable
-        regex_re = _re_compilable(regex)
+        regex_re = com.is_re_compilable(regex)
 
+        # only one will survive
         if to_rep_re and regex_re:
             raise AssertionError('only one of to_replace and regex can be '
                                  'regex compilable')
 
+        # if regex was passed as something that can be a regex (rather than a
+        # boolean)
         if regex_re:
             to_replace = regex
 
@@ -1668,7 +1670,6 @@ def get(self, item):
                 mgr._consolidate_inplace()
                 return mgr
 
-
     def iget(self, i):
         item = self.items[i]
         if self.items.is_unique:
@@ -1970,7 +1971,6 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=np.nan):
     def _reindex_indexer_items(self, new_items, indexer, fill_value):
         # TODO: less efficient than I'd like
 
-        is_unique = self.items.is_unique
         item_order = com.take_1d(self.items.values, indexer)
 
         # keep track of what items aren't found anywhere
@@ -2141,7 +2141,6 @@ def rename_axis(self, mapper, axis=1):
 
     def rename_items(self, mapper, copydata=True):
         new_items = Index([mapper(x) for x in self.items])
-        is_unique = new_items.is_unique
 
         new_blocks = []
         for block in self.blocks:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -6696,7 +6696,7 @@ def test_regex_replace_list_to_scalar(self):
         res3 = df.copy()
         res2.replace([r'\s*\.\s*', 'a|b'], nan, regex=True, inplace=True)
         res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=nan, inplace=True)
-        expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4, object),
+        expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4),
                            'c': [nan, nan, nan, 'd']})
         assert_frame_equal(res, expec)
         assert_frame_equal(res2, expec)
@@ -6772,6 +6772,31 @@ def test_replace(self):
         df = DataFrame(index=['a', 'b'])
         assert_frame_equal(df, df.replace(5, 7))
 
+    def test_replace_list(self):
+        obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
+        dfobj = DataFrame(obj)
+
+        ## lists of regexes and values
+        # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
+        import ipdb; ipdb.set_trace()
+        to_replace_res = [r'.', r'e']
+        values = [nan, 'crap']
+        res = dfobj.replace(to_replace_res, values)
+        expec = DataFrame({'a': ['a', 'b', nan, nan],
+                           'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap',
+                                                               'l', 'o']})
+        assert_frame_equal(res, expec)
+
+        # list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
+        to_replace_res = [r'.', r'f']
+        values = [r'..', r'crap']
+        res = dfobj.replace(to_replace_res, values)
+        expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e', 'crap', 'g',
+                                                              'h'],
+                           'c': ['h', 'e', 'l', 'o']})
+
+        assert_frame_equal(res, expec)
+
     def test_replace_series_dict(self):
         # from GH 3064
         df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
@@ -6792,10 +6817,24 @@ def test_replace_series_dict(self):
         result = df.replace(s, df.mean())
         assert_frame_equal(result, expected)
 
+    def test_replace_convert(self):
+        # gh 3907
+        df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']])
+        m = {'foo': 1, 'bar': 2, 'bah': 3}
+        rep = df.replace(m)
+        expec = Series([np.int_, np.int_, np.int_])
+        res = rep.dtypes
+        assert_series_equal(expec, res)
+
     def test_replace_mixed(self):
         self.mixed_frame['foo'][5:20] = nan
         self.mixed_frame['A'][-10:] = nan
 
+        result = self.mixed_frame.replace(np.nan, -18)
+        expected = self.mixed_frame.fillna(value=-18)
+        assert_frame_equal(result, expected)
+        assert_frame_equal(result.replace(-18, nan), self.mixed_frame)
+
         result = self.mixed_frame.replace(np.nan, -1e8)
         expected = self.mixed_frame.fillna(value=-1e8)
         assert_frame_equal(result, expected)