Merge pull request #3639 from cpcloud/replace-with-regex-2285

jreback · jreback · commit c468d2f3d69f · 2013-05-19T09:53:11.000-07:00
single example for release notes
diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
@@ -406,7 +406,7 @@ or you can pass the nested dictionary like so
 
 .. ipython:: python
 
-   df.replace(regex={'b': {'b': r'\s*\.\s*'}})
+   df.replace(regex={'b': {r'\s*\.\s*': nan}})
 
 You can also use the group of a regular expression match when replacing (dict
 of regex -> dict of regex), this works for lists as well
@@ -420,7 +420,7 @@ will be replaced with a scalar (list of regex -> regex)
 
 .. ipython:: python
 
-   df.replace([r'\s*\.\*', r'a|b'], nan, regex=True)
+   df.replace([r'\s*\.\s*', r'a|b'], nan, regex=True)
 
 All of the regular expression examples can also be passed with the
 ``to_replace`` argument as the ``regex`` argument. In this case the ``value``
@@ -429,7 +429,7 @@ dictionary. The previous example, in this case, would then be
 
 .. ipython:: python
 
-   df.replace(regex=[r'\s*\.\*', r'a|b'], value=nan)
+   df.replace(regex=[r'\s*\.\s*', r'a|b'], value=nan)
 
 This can be convenient if you do not want to pass ``regex=True`` every time you
 want to use a regular expression.
diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt
@@ -155,6 +155,24 @@ Bug Fixes
     - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
     - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
 
+    For example you can do
+
+    .. ipython :: python
+
+        df = DataFrame({'a': list('ab..'), 'b': [1, 2, 3, 4]})
+        df.replace(regex=r'\s*\.\s*', value=nan)
+
+    to replace all occurrences of the string ``'.'`` with zero or more
+    instances of surrounding whitespace with ``NaN``.
+
+    Regular string replacement still works as expected. For example, you can do
+
+    .. ipython :: python
+
+        df.replace('.', nan)
+
+    to replace all occurrences of the string ``'.'`` with ``NaN``.
+
 See the `full release notes
 <https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
 on GitHub for a complete list.
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -6653,12 +6653,16 @@ def test_regex_replace_dict_nested(self):
         dfmix = DataFrame(mix)
         res = dfmix.replace({'b': {r'\s*\.\s*': nan}}, regex=True)
         res2 = dfmix.copy()
+        res4 = dfmix.copy()
         res2.replace({'b': {r'\s*\.\s*': nan}}, inplace=True, regex=True)
-        print res2
+        res3 = dfmix.replace(regex={'b': {r'\s*\.\s*': nan}})
+        res4.replace(regex={'b': {r'\s*\.\s*': nan}}, inplace=True)
         expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', nan, nan], 'c':
                            mix['c']})
         assert_frame_equal(res, expec)
         assert_frame_equal(res2, expec)
+        assert_frame_equal(res3, expec)
+        assert_frame_equal(res4, expec)
 
     def test_regex_replace_list_to_scalar(self):
         mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']}