Skip to content

Commit e4b8ed4

Browse files
committedJun 15, 2013
Merge branch 'replace-convert-dtypes-3907' of https://github.com/cpcloud/pandas into cpcloud-replace-convert-dtypes-3907
Conflicts: RELEASE.rst
2 parents aed9996 + 7b59332 commit e4b8ed4

File tree

7 files changed

+132
-31
lines changed

7 files changed

+132
-31
lines changed
 

‎RELEASE.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ pandas 0.11.1
127127
- ``DataFrame.interpolate()`` is now deprecated. Please use
128128
``DataFrame.fillna()`` and ``DataFrame.replace()`` instead (GH3582_,
129129
GH3675_, GH3676_).
130+
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
131+
deprecated
132+
- ``DataFrame.replace`` 's ``infer_types`` parameter is removed and now
133+
performs conversion by default. (GH3907_)
130134
- Deprecated display.height, display.width is now only a formatting option
131135
does not control triggering of summary, similar to < 0.11.0.
132136
- Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
@@ -141,6 +145,8 @@ pandas 0.11.1
141145
``to_pickle`` instance method, ``save`` and ``load`` will give deprecation warning.
142146
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
143147
deprecated
148+
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
149+
deprecated
144150
- Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_)
145151
- ``as_matrix`` with mixed signed and unsigned dtypes will result in 2 x the lcd of the unsigned
146152
as an int, maxing with ``int64``, to avoid precision issues (GH3733_)
@@ -239,6 +245,8 @@ pandas 0.11.1
239245
- Fix index name not propogating when using ``loc/ix`` (GH3880_)
240246
- Fix groupby when applying a custom function resulting in a returned DataFrame was
241247
not converting dtypes (GH3911_)
248+
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
249+
in the ``to_replace`` argument wasn't working (GH3907_)
242250

243251
.. _GH3164: https://github.com/pydata/pandas/issues/3164
244252
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -334,7 +342,11 @@ pandas 0.11.1
334342
.. _GH3873: https://github.com/pydata/pandas/issues/3873
335343
.. _GH3877: https://github.com/pydata/pandas/issues/3877
336344
.. _GH3880: https://github.com/pydata/pandas/issues/3880
345+
<<<<<<< HEAD
337346
.. _GH3911: https://github.com/pydata/pandas/issues/3911
347+
=======
348+
.. _GH3907: https://github.com/pydata/pandas/issues/3907
349+
>>>>>>> 7b5933247b80174de4ba571e95a1add809dd9d09
338350

339351

340352
pandas 0.11.0

‎doc/source/v0.11.1.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ API changes
9898
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
9999
deprecated
100100

101+
- ``DataFrame.replace`` 's ``infer_types`` parameter is removed and now
102+
performs conversion by default. (GH3907_)
103+
101104
- Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
102105
to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_)
103106
- Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_)
@@ -356,6 +359,8 @@ Bug Fixes
356359

357360
- ``DataFrame.from_records`` did not accept empty recarrays (GH3682_)
358361
- ``read_html`` now correctly skips tests (GH3741_)
362+
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
363+
in the ``to_replace`` argument wasn't working (GH3907_)
359364

360365
See the `full release notes
361366
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
@@ -410,3 +415,4 @@ on GitHub for a complete list.
410415
.. _GH3877: https://github.com/pydata/pandas/issues/3877
411416
.. _GH3659: https://github.com/pydata/pandas/issues/3659
412417
.. _GH3679: https://github.com/pydata/pandas/issues/3679
418+
.. _GH3907: https://github.com/pydata/pandas/issues/3907

‎pandas/core/common.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import itertools
6+
import re
67
from datetime import datetime
78

89
from numpy.lib.format import read_array, write_array
@@ -1585,8 +1586,21 @@ def is_complex_dtype(arr_or_dtype):
15851586
return issubclass(tipo, np.complexfloating)
15861587

15871588

1589+
def is_re(obj):
1590+
return isinstance(obj, re._pattern_type)
1591+
1592+
1593+
def is_re_compilable(obj):
1594+
try:
1595+
re.compile(obj)
1596+
except TypeError:
1597+
return False
1598+
else:
1599+
return True
1600+
1601+
15881602
def is_list_like(arg):
1589-
return hasattr(arg, '__iter__') and not isinstance(arg, basestring) or hasattr(arg,'len')
1603+
return hasattr(arg, '__iter__') and not isinstance(arg, basestring)
15901604

15911605
def _is_sequence(x):
15921606
try:

‎pandas/core/frame.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@
3333
_maybe_convert_indices)
3434
from pandas.core.internals import (BlockManager,
3535
create_block_manager_from_arrays,
36-
create_block_manager_from_blocks,
37-
_re_compilable)
36+
create_block_manager_from_blocks)
3837
from pandas.core.series import Series, _radd_compat
3938
import pandas.core.expressions as expressions
4039
from pandas.compat.scipy import scoreatpercentile as _quantile
@@ -3483,7 +3482,7 @@ def bfill(self, axis=0, inplace=False, limit=None):
34833482
limit=limit)
34843483

34853484
def replace(self, to_replace=None, value=None, inplace=False, limit=None,
3486-
regex=False, infer_types=False, method=None, axis=None):
3485+
regex=False, method=None, axis=None):
34873486
"""
34883487
Replace values given in 'to_replace' with 'value'.
34893488
@@ -3545,8 +3544,6 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
35453544
string. Otherwise, `to_replace` must be ``None`` because this
35463545
parameter will be interpreted as a regular expression or a list,
35473546
dict, or array of regular expressions.
3548-
infer_types : bool, default True
3549-
If ``True`` attempt to convert object blocks to a better dtype.
35503547
35513548
See also
35523549
--------
@@ -3582,7 +3579,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
35823579
and play with this method to gain intuition about how it works.
35833580
35843581
"""
3585-
if not isinstance(regex, bool) and to_replace is not None:
3582+
if not com.is_bool(regex) and to_replace is not None:
35863583
raise AssertionError("'to_replace' must be 'None' if 'regex' is "
35873584
"not a bool")
35883585
if method is not None:
@@ -3628,8 +3625,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
36283625
to_replace, value = keys, values
36293626

36303627
return self.replace(to_replace, value, inplace=inplace,
3631-
limit=limit, regex=regex,
3632-
infer_types=infer_types)
3628+
limit=limit, regex=regex)
36333629
else:
36343630
if not len(self.columns):
36353631
return self
@@ -3673,14 +3669,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
36733669
new_data = self._data.replace(to_replace, value,
36743670
inplace=inplace, regex=regex)
36753671
elif to_replace is None:
3676-
if not (_re_compilable(regex) or
3672+
if not (com.is_re_compilable(regex) or
36773673
isinstance(regex, (list, dict, np.ndarray, Series))):
36783674
raise TypeError("'regex' must be a string or a compiled "
36793675
"regular expression or a list or dict of "
36803676
"strings or regular expressions, you "
36813677
"passed a {0}".format(type(regex)))
36823678
return self.replace(regex, value, inplace=inplace, limit=limit,
3683-
regex=True, infer_types=infer_types)
3679+
regex=True)
36843680
else:
36853681

36863682
# dest iterable dict-like
@@ -3701,8 +3697,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
37013697
raise TypeError('Invalid "to_replace" type: '
37023698
'{0}'.format(type(to_replace))) # pragma: no cover
37033699

3704-
if infer_types:
3705-
new_data = new_data.convert()
3700+
new_data = new_data.convert(copy=not inplace, convert_numeric=False)
37063701

37073702
if inplace:
37083703
self._data = new_data

‎pandas/core/internals.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import itertools
22
import re
33
from datetime import datetime
4-
import collections
54

65
from numpy import nan
76
import numpy as np
87

9-
from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE
10-
from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes
8+
from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE,
9+
_TD_DTYPE)
10+
from pandas.core.index import (Index, MultiIndex, _ensure_index,
11+
_handle_legacy_indexes)
1112
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
1213
import pandas.core.common as com
1314
import pandas.lib as lib
@@ -18,10 +19,6 @@
1819
from pandas.util import py3compat
1920

2021

21-
def _re_compilable(ex):
22-
return isinstance(ex, (basestring, re._pattern_type))
23-
24-
2522
class Block(object):
2623
"""
2724
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
@@ -744,14 +741,16 @@ def should_store(self, value):
744741
def replace(self, to_replace, value, inplace=False, filter=None,
745742
regex=False):
746743
blk = [self]
747-
to_rep_is_list = (isinstance(to_replace, collections.Iterable) and not
748-
isinstance(to_replace, basestring))
749-
value_is_list = (isinstance(value, collections.Iterable) and not
750-
isinstance(to_replace, basestring))
744+
to_rep_is_list = com.is_list_like(to_replace)
745+
value_is_list = com.is_list_like(value)
751746
both_lists = to_rep_is_list and value_is_list
752747
either_list = to_rep_is_list or value_is_list
753748

754-
if not either_list and not regex:
749+
if not either_list and com.is_re(to_replace):
750+
blk[0], = blk[0]._replace_single(to_replace, value,
751+
inplace=inplace, filter=filter,
752+
regex=True)
753+
elif not (either_list or regex):
755754
blk = super(ObjectBlock, self).replace(to_replace, value,
756755
inplace=inplace,
757756
filter=filter, regex=regex)
@@ -773,15 +772,18 @@ def replace(self, to_replace, value, inplace=False, filter=None,
773772
def _replace_single(self, to_replace, value, inplace=False, filter=None,
774773
regex=False):
775774
# to_replace is regex compilable
776-
to_rep_re = _re_compilable(to_replace)
775+
to_rep_re = com.is_re_compilable(to_replace)
777776

778777
# regex is regex compilable
779-
regex_re = _re_compilable(regex)
778+
regex_re = com.is_re_compilable(regex)
780779

780+
# only one will survive
781781
if to_rep_re and regex_re:
782782
raise AssertionError('only one of to_replace and regex can be '
783783
'regex compilable')
784784

785+
# if regex was passed as something that can be a regex (rather than a
786+
# boolean)
785787
if regex_re:
786788
to_replace = regex
787789

@@ -1668,7 +1670,6 @@ def get(self, item):
16681670
mgr._consolidate_inplace()
16691671
return mgr
16701672

1671-
16721673
def iget(self, i):
16731674
item = self.items[i]
16741675
if self.items.is_unique:
@@ -1970,7 +1971,6 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=np.nan):
19701971
def _reindex_indexer_items(self, new_items, indexer, fill_value):
19711972
# TODO: less efficient than I'd like
19721973

1973-
is_unique = self.items.is_unique
19741974
item_order = com.take_1d(self.items.values, indexer)
19751975

19761976
# keep track of what items aren't found anywhere
@@ -2141,7 +2141,6 @@ def rename_axis(self, mapper, axis=1):
21412141

21422142
def rename_items(self, mapper, copydata=True):
21432143
new_items = Index([mapper(x) for x in self.items])
2144-
is_unique = new_items.is_unique
21452144

21462145
new_blocks = []
21472146
for block in self.blocks:

‎pandas/tests/test_common.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import datetime
22
import sys
3+
import re
34

45
import nose
56
import unittest
@@ -244,6 +245,18 @@ def test_groupby():
244245
assert v == expected[k]
245246

246247

248+
def test_is_list_like():
249+
passes = ([], [1], (1,), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
250+
Series([]), Series(['a']).str)
251+
fails = (1, '2', object())
252+
253+
for p in passes:
254+
assert com.is_list_like(p)
255+
256+
for f in fails:
257+
assert not com.is_list_like(f)
258+
259+
247260
def test_ensure_int32():
248261
values = np.arange(10, dtype=np.int32)
249262
result = com._ensure_int32(values)
@@ -288,6 +301,30 @@ def test_ensure_platform_int():
288301
# expected = u"\u05d0".encode('utf-8')
289302
# assert (result == expected)
290303

304+
305+
def test_is_re():
306+
passes = re.compile('ad'),
307+
fails = 'x', 2, 3, object()
308+
309+
for p in passes:
310+
assert com.is_re(p)
311+
312+
for f in fails:
313+
assert not com.is_re(f)
314+
315+
316+
def test_is_recompilable():
317+
passes = (r'a', u'x', r'asdf', re.compile('adsf'), ur'\u2233\s*',
318+
re.compile(r''))
319+
fails = 1, [], object()
320+
321+
for p in passes:
322+
assert com.is_re_compilable(p)
323+
324+
for f in fails:
325+
assert not com.is_re_compilable(f)
326+
327+
291328
class TestTake(unittest.TestCase):
292329

293330
_multiprocess_can_split_ = True

‎pandas/tests/test_frame.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6696,7 +6696,7 @@ def test_regex_replace_list_to_scalar(self):
66966696
res3 = df.copy()
66976697
res2.replace([r'\s*\.\s*', 'a|b'], nan, regex=True, inplace=True)
66986698
res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=nan, inplace=True)
6699-
expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4, object),
6699+
expec = DataFrame({'a': mix['a'], 'b': np.array([nan] * 4),
67006700
'c': [nan, nan, nan, 'd']})
67016701
assert_frame_equal(res, expec)
67026702
assert_frame_equal(res2, expec)
@@ -6772,6 +6772,30 @@ def test_replace(self):
67726772
df = DataFrame(index=['a', 'b'])
67736773
assert_frame_equal(df, df.replace(5, 7))
67746774

6775+
def test_replace_list(self):
6776+
obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
6777+
dfobj = DataFrame(obj)
6778+
6779+
## lists of regexes and values
6780+
# list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
6781+
to_replace_res = [r'.', r'e']
6782+
values = [nan, 'crap']
6783+
res = dfobj.replace(to_replace_res, values)
6784+
expec = DataFrame({'a': ['a', 'b', nan, nan],
6785+
'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap',
6786+
'l', 'o']})
6787+
assert_frame_equal(res, expec)
6788+
6789+
# list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
6790+
to_replace_res = [r'.', r'f']
6791+
values = [r'..', r'crap']
6792+
res = dfobj.replace(to_replace_res, values)
6793+
expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e', 'crap', 'g',
6794+
'h'],
6795+
'c': ['h', 'e', 'l', 'o']})
6796+
6797+
assert_frame_equal(res, expec)
6798+
67756799
def test_replace_series_dict(self):
67766800
# from GH 3064
67776801
df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
@@ -6792,10 +6816,24 @@ def test_replace_series_dict(self):
67926816
result = df.replace(s, df.mean())
67936817
assert_frame_equal(result, expected)
67946818

6819+
def test_replace_convert(self):
6820+
# gh 3907
6821+
df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']])
6822+
m = {'foo': 1, 'bar': 2, 'bah': 3}
6823+
rep = df.replace(m)
6824+
expec = Series([np.int_, np.int_, np.int_])
6825+
res = rep.dtypes
6826+
assert_series_equal(expec, res)
6827+
67956828
def test_replace_mixed(self):
67966829
self.mixed_frame['foo'][5:20] = nan
67976830
self.mixed_frame['A'][-10:] = nan
67986831

6832+
result = self.mixed_frame.replace(np.nan, -18)
6833+
expected = self.mixed_frame.fillna(value=-18)
6834+
assert_frame_equal(result, expected)
6835+
assert_frame_equal(result.replace(-18, nan), self.mixed_frame)
6836+
67996837
result = self.mixed_frame.replace(np.nan, -1e8)
68006838
expected = self.mixed_frame.fillna(value=-1e8)
68016839
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)
Please sign in to comment.