Skip to content

Commit 9f218eb

Browse files
committed
BUG/CLN: remove infer_types
1 parent 4e02cae commit 9f218eb

File tree

4 files changed

+73
-31
lines changed

4 files changed

+73
-31
lines changed

pandas/core/common.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import itertools
6+
import re
67
from datetime import datetime
78

89
from numpy.lib.format import read_array, write_array
@@ -1585,8 +1586,16 @@ def is_complex_dtype(arr_or_dtype):
15851586
return issubclass(tipo, np.complexfloating)
15861587

15871588

1589+
def is_re(obj):
1590+
return isinstance(obj, re._pattern_type)
1591+
1592+
1593+
def is_re_compilable(obj):
1594+
return is_re(obj) or isinstance(obj, basestring)
1595+
1596+
15881597
def is_list_like(arg):
1589-
return hasattr(arg, '__iter__') and not isinstance(arg, basestring) or hasattr(arg,'len')
1598+
return hasattr(arg, '__iter__') and not isinstance(arg, basestring)
15901599

15911600
def _is_sequence(x):
15921601
try:

pandas/core/frame.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
_maybe_convert_indices)
3434
from pandas.core.internals import (BlockManager,
3535
create_block_manager_from_arrays,
36-
create_block_manager_from_blocks,
37-
_re_compilable)
36+
create_block_manager_from_blocks)
3837
from pandas.core.series import Series, _radd_compat
3938
import pandas.core.expressions as expressions
4039
from pandas.compat.scipy import scoreatpercentile as _quantile
@@ -3483,7 +3482,7 @@ def bfill(self, axis=0, inplace=False, limit=None):
34833482
limit=limit)
34843483

34853484
def replace(self, to_replace=None, value=None, inplace=False, limit=None,
3486-
regex=False, infer_types=False, method=None, axis=None):
3485+
regex=False, method=None, axis=None):
34873486
"""
34883487
Replace values given in 'to_replace' with 'value'.
34893488
@@ -3545,8 +3544,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
35453544
string. Otherwise, `to_replace` must be ``None`` because this
35463545
parameter will be interpreted as a regular expression or a list,
35473546
dict, or array of regular expressions.
3548-
infer_types : bool, default True
3549-
If ``True`` attempt to convert object blocks to a better dtype.
35503547
35513548
See also
35523549
--------
@@ -3582,7 +3579,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
35823579
and play with this method to gain intuition about how it works.
35833580
35843581
"""
3585-
if not isinstance(regex, bool) and to_replace is not None:
3582+
if not com.is_bool(regex) and to_replace is not None:
35863583
raise AssertionError("'to_replace' must be 'None' if 'regex' is "
35873584
"not a bool")
35883585
if method is not None:
@@ -3628,8 +3625,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
36283625
to_replace, value = keys, values
36293626

36303627
return self.replace(to_replace, value, inplace=inplace,
3631-
limit=limit, regex=regex,
3632-
infer_types=infer_types)
3628+
limit=limit, regex=regex)
36333629
else:
36343630
if not len(self.columns):
36353631
return self
@@ -3673,14 +3669,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
36733669
new_data = self._data.replace(to_replace, value,
36743670
inplace=inplace, regex=regex)
36753671
elif to_replace is None:
3676-
if not (_re_compilable(regex) or
3672+
if not (com.is_re_compilable(regex) or
36773673
isinstance(regex, (list, dict, np.ndarray, Series))):
36783674
raise TypeError("'regex' must be a string or a compiled "
36793675
"regular expression or a list or dict of "
36803676
"strings or regular expressions, you "
36813677
"passed a {0}".format(type(regex)))
36823678
return self.replace(regex, value, inplace=inplace, limit=limit,
3683-
regex=True, infer_types=infer_types)
3679+
regex=True)
36843680
else:
36853681

36863682
# dest iterable dict-like
@@ -3701,8 +3697,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
37013697
raise TypeError('Invalid "to_replace" type: '
37023698
'{0}'.format(type(to_replace))) # pragma: no cover
37033699

3704-
if infer_types:
3705-
new_data = new_data.convert()
3700+
new_data = new_data.convert(copy=not inplace, convert_numeric=False)
37063701

37073702
if inplace:
37083703
self._data = new_data

pandas/core/internals.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import itertools
22
import re
33
from datetime import datetime
4-
import collections
54

65
from numpy import nan
76
import numpy as np
87

9-
from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE
10-
from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes
8+
from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE,
9+
_TD_DTYPE)
10+
from pandas.core.index import (Index, MultiIndex, _ensure_index,
11+
_handle_legacy_indexes)
1112
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
1213
import pandas.core.common as com
1314
import pandas.lib as lib
@@ -18,10 +19,6 @@
1819
from pandas.util import py3compat
1920

2021

21-
def _re_compilable(ex):
22-
return isinstance(ex, (basestring, re._pattern_type))
23-
24-
2522
class Block(object):
2623
"""
2724
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
@@ -744,14 +741,16 @@ def should_store(self, value):
744741
def replace(self, to_replace, value, inplace=False, filter=None,
745742
regex=False):
746743
blk = [self]
747-
to_rep_is_list = (isinstance(to_replace, collections.Iterable) and not
748-
isinstance(to_replace, basestring))
749-
value_is_list = (isinstance(value, collections.Iterable) and not
750-
isinstance(to_replace, basestring))
744+
to_rep_is_list = com.is_list_like(to_replace)
745+
value_is_list = com.is_list_like(value)
751746
both_lists = to_rep_is_list and value_is_list
752747
either_list = to_rep_is_list or value_is_list
753748

754-
if not either_list and not regex:
749+
if not either_list and com.is_re_compilable(to_replace):
750+
blk[0], = blk[0]._replace_single(to_replace, value,
751+
inplace=inplace, filter=filter,
752+
regex=True)
753+
elif not (either_list or regex):
755754
blk = super(ObjectBlock, self).replace(to_replace, value,
756755
inplace=inplace,
757756
filter=filter, regex=regex)
@@ -773,15 +772,18 @@ def replace(self, to_replace, value, inplace=False, filter=None,
773772
def _replace_single(self, to_replace, value, inplace=False, filter=None,
774773
regex=False):
775774
# to_replace is regex compilable
776-
to_rep_re = _re_compilable(to_replace)
775+
to_rep_re = com.is_re_compilable(to_replace)
777776

778777
# regex is regex compilable
779-
regex_re = _re_compilable(regex)
778+
regex_re = com.is_re_compilable(regex)
780779

780+
# only one will survive
781781
if to_rep_re and regex_re:
782782
raise AssertionError('only one of to_replace and regex can be '
783783
'regex compilable')
784784

785+
# if regex was passed as something that can be a regex (rather than a
786+
# boolean)
785787
if regex_re:
786788
to_replace = regex
787789

@@ -1668,7 +1670,6 @@ def get(self, item):
16681670
mgr._consolidate_inplace()
16691671
return mgr
16701672

1671-
16721673
def iget(self, i):
16731674
item = self.items[i]
16741675
if self.items.is_unique:
@@ -1970,7 +1971,6 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=np.nan):
19701971
def _reindex_indexer_items(self, new_items, indexer, fill_value):
19711972
# TODO: less efficient than I'd like
19721973

1973-
is_unique = self.items.is_unique
19741974
item_order = com.take_1d(self.items.values, indexer)
19751975

19761976
# keep track of what items aren't found anywhere
@@ -2141,7 +2141,6 @@ def rename_axis(self, mapper, axis=1):
21412141

21422142
def rename_items(self, mapper, copydata=True):
21432143
new_items = Index([mapper(x) for x in self.items])
2144-
is_unique = new_items.is_unique
21452144

21462145
new_blocks = []
21472146
for block in self.blocks:

pandas/tests/test_frame.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6696,7 +6696,7 @@ def test_regex_replace_list_to_scalar(self):
66966696
res3 = df.copy()
66976697
res2.replace([r'\s*\.\s*', 'a|b'], nan, regex=True, inplace=True)
66986698
res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=nan, inplace=True)
6699-
expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4, object),
6699+
expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4),
67006700
'c': [nan, nan, nan, 'd']})
67016701
assert_frame_equal(res, expec)
67026702
assert_frame_equal(res2, expec)
@@ -6772,6 +6772,31 @@ def test_replace(self):
67726772
df = DataFrame(index=['a', 'b'])
67736773
assert_frame_equal(df, df.replace(5, 7))
67746774

6775+
def test_replace_list(self):
6776+
obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
6777+
dfobj = DataFrame(obj)
6778+
6779+
## lists of regexes and values
6780+
# list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
6781+
import ipdb; ipdb.set_trace()
6782+
to_replace_res = [r'.', r'e']
6783+
values = [nan, 'crap']
6784+
res = dfobj.replace(to_replace_res, values)
6785+
expec = DataFrame({'a': ['a', 'b', nan, nan],
6786+
'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap',
6787+
'l', 'o']})
6788+
assert_frame_equal(res, expec)
6789+
6790+
# list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
6791+
to_replace_res = [r'.', r'f']
6792+
values = [r'..', r'crap']
6793+
res = dfobj.replace(to_replace_res, values)
6794+
expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e', 'crap', 'g',
6795+
'h'],
6796+
'c': ['h', 'e', 'l', 'o']})
6797+
6798+
assert_frame_equal(res, expec)
6799+
67756800
def test_replace_series_dict(self):
67766801
# from GH 3064
67776802
df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
@@ -6792,10 +6817,24 @@ def test_replace_series_dict(self):
67926817
result = df.replace(s, df.mean())
67936818
assert_frame_equal(result, expected)
67946819

6820+
def test_replace_convert(self):
6821+
# gh 3907
6822+
df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']])
6823+
m = {'foo': 1, 'bar': 2, 'bah': 3}
6824+
rep = df.replace(m)
6825+
expec = Series([np.int_, np.int_, np.int_])
6826+
res = rep.dtypes
6827+
assert_series_equal(expec, res)
6828+
67956829
def test_replace_mixed(self):
67966830
self.mixed_frame['foo'][5:20] = nan
67976831
self.mixed_frame['A'][-10:] = nan
67986832

6833+
result = self.mixed_frame.replace(np.nan, -18)
6834+
expected = self.mixed_frame.fillna(value=-18)
6835+
assert_frame_equal(result, expected)
6836+
assert_frame_equal(result.replace(-18, nan), self.mixed_frame)
6837+
67996838
result = self.mixed_frame.replace(np.nan, -1e8)
68006839
expected = self.mixed_frame.fillna(value=-1e8)
68016840
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)