Deprecate Groupby.dtypes (#13453)

galipremsagar · web-flow · commit c1e78b9665fc · 2023-05-26T15:26:10.000-05:00
This PR deprecates `Groupby.dtypes` since it is deprecated in `pandas-2.1`

This PR fixes 5 pytests:
```
= 474 failed, 95510 passed, 2044 skipped, 763 xfailed, 300 xpassed in 459.93s (0:07:39) =
```

On `pandas_2.0_feature_branch`:
```
= 479 failed, 95505 passed, 2044 skipped, 763 xfailed, 300 xpassed in 471.66s (0:07:51) =
```
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
@@ -11,3 +11,4 @@
 PANDAS_LT_153 = PANDAS_VERSION < version.parse("1.5.3")
 PANDAS_EQ_200 = PANDAS_VERSION == version.parse("2.0.0")
 PANDAS_GE_200 = PANDAS_VERSION >= version.parse("2.0.0")
+PANDAS_GE_210 = PANDAS_VERSION >= version.parse("2.1.0")
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
@@ -291,6 +291,9 @@ def dtypes(self):
         """
         Return the dtypes in this group.
 
+        .. deprecated:: 23.08
+           Use `.dtypes` on base object instead.
+
         Returns
         -------
         pandas.DataFrame
@@ -302,17 +305,23 @@ def dtypes(self):
         >>> df = cudf.DataFrame({'a': [1, 2, 3, 3], 'b': ['x', 'y', 'z', 'a'],
         ...                      'c':[10, 11, 12, 12]})
         >>> df.groupby("a").dtypes
-                b      c
+               a       b      c
         a
-        1  object  int64
-        2  object  int64
-        3  object  int64
+        1  int64  object  int64
+        2  int64  object  int64
+        3  int64  object  int64
         """
+        warnings.warn(
+            f"{type(self).__name__}.dtypes is deprecated and will be "
+            "removed in a future version. Check the dtypes on the "
+            "base object instead",
+            FutureWarning,
+        )
         index = self.grouping.keys.unique().sort_values().to_pandas()
         return pd.DataFrame(
             {
                 name: [self.obj._dtypes[name]] * len(index)
-                for name in self.grouping.values._column_names
+                for name in self.obj._data.names
             },
             index=index,
         )
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
@@ -19,7 +19,12 @@
 
 import cudf
 from cudf import DataFrame, Series
-from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140, PANDAS_GE_200
+from cudf.core._compat import (
+    PANDAS_GE_150,
+    PANDAS_LT_140,
+    PANDAS_GE_200,
+    PANDAS_GE_210,
+)
 from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES
 from cudf.core.udf.utils import precompiled
 from cudf.testing._utils import (
@@ -3100,8 +3105,12 @@ def test_groupby_dtypes(groups):
         {"a": [1, 2, 3, 3], "b": ["x", "y", "z", "a"], "c": [10, 11, 12, 12]}
     )
     pdf = df.to_pandas()
+    with expect_warning_if(PANDAS_GE_210):
+        expected = pdf.groupby(groups).dtypes
+    with pytest.warns(FutureWarning):
+        actual = df.groupby(groups).dtypes
 
-    assert_eq(pdf.groupby(groups).dtypes, df.groupby(groups).dtypes)
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize("index_names", ["a", "b", "c", ["b", "c"]])