Merge remote-tracking branch 'upstream/master' into rdata_c

ParfaitG · ParfaitG · commit fa3dbc117f58 · 2021-05-17T09:13:30.000-05:00
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -326,6 +326,7 @@ Other enhancements
 - :meth:`pandas.read_csv` and :meth:`pandas.read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
+- :meth:`.GroupBy.rank` now supports object-dtype data (:issue:`38278`)
 - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8923,6 +8923,7 @@ def append(
         Returns
         -------
         DataFrame
+            A new DataFrame consisting of the rows of caller and the rows of `other`.
 
         See Also
         --------
@@ -8941,18 +8942,18 @@ def append(
 
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'))
+        >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y'])
         >>> df
            A  B
-        0  1  2
-        1  3  4
-        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))
+        x  1  2
+        y  3  4
+        >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y'])
         >>> df.append(df2)
            A  B
-        0  1  2
-        1  3  4
-        0  5  6
-        1  7  8
+        x  1  2
+        y  3  4
+        x  5  6
+        y  7  8
 
         With `ignore_index` set to True:
 
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -57,129 +57,61 @@ def test_replace_inplace(self, datetime_frame, float_string_frame):
         assert return_value is None
         tm.assert_frame_equal(tsframe, datetime_frame.fillna(0))
 
-    def test_regex_replace_list_obj(self):
-        obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")}
-        dfobj = DataFrame(obj)
-
-        # lists of regexes and values
-        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
-        to_replace_res = [r"\s*\.\s*", r"e|f|g"]
-        values = [np.nan, "crap"]
-        res = dfobj.replace(to_replace_res, values, regex=True)
-        expec = DataFrame(
-            {
-                "a": ["a", "b", np.nan, np.nan],
-                "b": ["crap"] * 3 + ["h"],
-                "c": ["h", "crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
-
-        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
-        to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"]
-        values = [r"\1\1", r"\1_crap"]
-        res = dfobj.replace(to_replace_res, values, regex=True)
-        expec = DataFrame(
-            {
-                "a": ["a", "b", "..", ".."],
-                "b": ["e_crap", "f_crap", "g_crap", "h"],
-                "c": ["h", "e_crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
-
-        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
-        # or vN)]
-        to_replace_res = [r"\s*(\.)\s*", r"e"]
-        values = [r"\1\1", r"crap"]
-        res = dfobj.replace(to_replace_res, values, regex=True)
-        expec = DataFrame(
-            {
-                "a": ["a", "b", "..", ".."],
-                "b": ["crap", "f", "g", "h"],
-                "c": ["h", "crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
-
-        to_replace_res = [r"\s*(\.)\s*", r"e"]
-        values = [r"\1\1", r"crap"]
-        res = dfobj.replace(value=values, regex=to_replace_res)
-        expec = DataFrame(
-            {
-                "a": ["a", "b", "..", ".."],
-                "b": ["crap", "f", "g", "h"],
-                "c": ["h", "crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
-
-    def test_regex_replace_list_obj_inplace(self):
-        # same as above with inplace=True
-        # lists of regexes and values
-        obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")}
-        dfobj = DataFrame(obj)
-
-        # lists of regexes and values
-        # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
-        to_replace_res = [r"\s*\.\s*", r"e|f|g"]
-        values = [np.nan, "crap"]
-        res = dfobj.copy()
-        return_value = res.replace(to_replace_res, values, inplace=True, regex=True)
-        assert return_value is None
-        expec = DataFrame(
-            {
-                "a": ["a", "b", np.nan, np.nan],
-                "b": ["crap"] * 3 + ["h"],
-                "c": ["h", "crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
+    @pytest.mark.parametrize(
+        "to_replace,values,expected",
+        [
+            # lists of regexes and values
+            # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
+            (
+                [r"\s*\.\s*", r"e|f|g"],
+                [np.nan, "crap"],
+                {
+                    "a": ["a", "b", np.nan, np.nan],
+                    "b": ["crap"] * 3 + ["h"],
+                    "c": ["h", "crap", "l", "o"],
+                },
+            ),
+            # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
+            (
+                [r"\s*(\.)\s*", r"(e|f|g)"],
+                [r"\1\1", r"\1_crap"],
+                {
+                    "a": ["a", "b", "..", ".."],
+                    "b": ["e_crap", "f_crap", "g_crap", "h"],
+                    "c": ["h", "e_crap", "l", "o"],
+                },
+            ),
+            # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
+            # or vN)]
+            (
+                [r"\s*(\.)\s*", r"e"],
+                [r"\1\1", r"crap"],
+                {
+                    "a": ["a", "b", "..", ".."],
+                    "b": ["crap", "f", "g", "h"],
+                    "c": ["h", "crap", "l", "o"],
+                },
+            ),
+        ],
+    )
+    @pytest.mark.parametrize("inplace", [True, False])
+    @pytest.mark.parametrize("use_value_regex_args", [True, False])
+    def test_regex_replace_list_obj(
+        self, to_replace, values, expected, inplace, use_value_regex_args
+    ):
+        df = DataFrame({"a": list("ab.."), "b": list("efgh"), "c": list("helo")})
 
-        # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
-        to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"]
-        values = [r"\1\1", r"\1_crap"]
-        res = dfobj.copy()
-        return_value = res.replace(to_replace_res, values, inplace=True, regex=True)
-        assert return_value is None
-        expec = DataFrame(
-            {
-                "a": ["a", "b", "..", ".."],
-                "b": ["e_crap", "f_crap", "g_crap", "h"],
-                "c": ["h", "e_crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
+        if use_value_regex_args:
+            result = df.replace(value=values, regex=to_replace, inplace=inplace)
+        else:
+            result = df.replace(to_replace, values, regex=True, inplace=inplace)
 
-        # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
-        # or vN)]
-        to_replace_res = [r"\s*(\.)\s*", r"e"]
-        values = [r"\1\1", r"crap"]
-        res = dfobj.copy()
-        return_value = res.replace(to_replace_res, values, inplace=True, regex=True)
-        assert return_value is None
-        expec = DataFrame(
-            {
-                "a": ["a", "b", "..", ".."],
-                "b": ["crap", "f", "g", "h"],
-                "c": ["h", "crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
+        if inplace:
+            assert result is None
+            result = df
 
-        to_replace_res = [r"\s*(\.)\s*", r"e"]
-        values = [r"\1\1", r"crap"]
-        res = dfobj.copy()
-        return_value = res.replace(value=values, regex=to_replace_res, inplace=True)
-        assert return_value is None
-        expec = DataFrame(
-            {
-                "a": ["a", "b", "..", ".."],
-                "b": ["crap", "f", "g", "h"],
-                "c": ["h", "crap", "l", "o"],
-            }
-        )
-        tm.assert_frame_equal(res, expec)
+        expected = DataFrame(expected)
+        tm.assert_frame_equal(result, expected)
 
     def test_regex_replace_list_mixed(self, mix_ab):
         # mixed frame to make sure this doesn't break things