ENH: set __module__ for objects in pandas pd.DataFrame API (pandas-dev#55171)

aimlnerd · jorisvandenbossche · pmhatre1 · commit 22cc611691d1 · 2024-05-06T23:13:27.000-07:00
Co-authored-by: Joris Van den Bossche &lt;jorisvandenbossche@gmail.com&gt;
diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst
@@ -940,7 +940,7 @@ Finally, docstrings can also be appended to with the ``doc`` decorator.
 
 In this example, we'll create a parent docstring normally (this is like
 ``pandas.core.generic.NDFrame``). Then we'll have two children (like
-``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll
+``pandas.core.series.Series`` and ``pandas.DataFrame``). We'll
 substitute the class names in this docstring.
 
 .. code-block:: python
diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst
@@ -453,7 +453,7 @@ by evaluate arithmetic and boolean expression all at once for large :class:`~pan
    :func:`~pandas.eval` is many orders of magnitude slower for
    smaller expressions or objects than plain Python. A good rule of thumb is
    to only use :func:`~pandas.eval` when you have a
-   :class:`.DataFrame` with more than 10,000 rows.
+   :class:`~pandas.core.frame.DataFrame` with more than 10,000 rows.
 
 Supported syntax
 ~~~~~~~~~~~~~~~~
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -6400,7 +6400,7 @@ ignored.
    In [2]: df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz})
 
    In [3]: df.info()
-   <class 'pandas.core.frame.DataFrame'>
+   <class 'pandas.DataFrame'>
    RangeIndex: 1000000 entries, 0 to 999999
    Data columns (total 2 columns):
    A    1000000 non-null float64
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -840,7 +840,7 @@ then all the columns are dummy-encoded, and a :class:`SparseDataFrame` was retur
    In [2]: df = pd.DataFrame({"A": [1, 2], "B": ['a', 'b'], "C": ['a', 'a']})
 
    In [3]: type(pd.get_dummies(df, sparse=True))
-   Out[3]: pandas.core.frame.DataFrame
+   Out[3]: pandas.DataFrame
 
    In [4]: type(pd.get_dummies(df[['B', 'C']], sparse=True))
    Out[4]: pandas.core.sparse.frame.SparseDataFrame
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -414,7 +414,7 @@ Extended verbose info output for :class:`~pandas.DataFrame`
    ...                    "text_col": ["a", "b", "c"],
    ...                    "float_col": [0.0, 0.1, 0.2]})
    In [2]: df.info(verbose=True)
-   <class 'pandas.core.frame.DataFrame'>
+   <class 'pandas.DataFrame'>
    RangeIndex: 3 entries, 0 to 2
    Data columns (total 3 columns):
    int_col      3 non-null int64
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -125,7 +125,7 @@ def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None:
     item : pytest.Item
         pytest test item.
     path : str
-        Module path to Python object, e.g. "pandas.core.frame.DataFrame.append". A
+        Module path to Python object, e.g. "pandas.DataFrame.append". A
         warning will be filtered when item.name ends with in given path. So it is
         sufficient to specify e.g. "DataFrame.append".
     message : str
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -65,6 +65,7 @@
     Appender,
     Substitution,
     doc,
+    set_module,
 )
 from pandas.util._exceptions import (
     find_stack_level,
@@ -498,6 +499,7 @@
 # DataFrame class
 
 
+@set_module("pandas")
 class DataFrame(NDFrame, OpsMixin):
     """
     Two-dimensional, size-mutable, potentially heterogeneous tabular data.
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -227,7 +227,7 @@ def iloc(self) -> _iLocIndexer:
            a  b  c  d
         0  1  2  3  4
         >>> type(df.iloc[[0]])
-        <class 'pandas.core.frame.DataFrame'>
+        <class 'pandas.DataFrame'>
 
         >>> df.iloc[[0, 1]]
              a    b    c    d
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -855,7 +855,7 @@ class DataFrameRenderer:
         - to_csv
         - to_latex
 
-    Called in pandas.core.frame.DataFrame:
+    Called in pandas.DataFrame:
         - to_html
         - to_string
 
diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
@@ -72,7 +72,7 @@
     Prints information of all columns:
 
     >>> df.info(verbose=True)
-    <class 'pandas.core.frame.DataFrame'>
+    <class 'pandas.DataFrame'>
     RangeIndex: 5 entries, 0 to 4
     Data columns (total 3 columns):
      #   Column     Non-Null Count  Dtype
@@ -87,7 +87,7 @@
     information:
 
     >>> df.info(verbose=False)
-    <class 'pandas.core.frame.DataFrame'>
+    <class 'pandas.DataFrame'>
     RangeIndex: 5 entries, 0 to 4
     Columns: 3 entries, int_col to float_col
     dtypes: float64(1), int64(1), object(1)
@@ -115,7 +115,7 @@
     ...     'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
     ... })
     >>> df.info()
-    <class 'pandas.core.frame.DataFrame'>
+    <class 'pandas.DataFrame'>
     RangeIndex: 1000000 entries, 0 to 999999
     Data columns (total 3 columns):
      #   Column    Non-Null Count    Dtype
@@ -127,7 +127,7 @@
     memory usage: 22.9+ MB
 
     >>> df.info(memory_usage='deep')
-    <class 'pandas.core.frame.DataFrame'>
+    <class 'pandas.DataFrame'>
     RangeIndex: 1000000 entries, 0 to 999999
     Data columns (total 3 columns):
      #   Column    Non-Null Count    Dtype
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
@@ -401,3 +401,7 @@ def test_pandas_array_alias():
         res = pd.arrays.PandasArray
 
     assert res is pd.arrays.NumpyExtensionArray
+
+
+def test_set_module():
+    assert pd.DataFrame.__module__ == "pandas"
diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
@@ -40,7 +40,7 @@ def test_info_empty():
     result = buf.getvalue()
     expected = textwrap.dedent(
         """\
-        <class 'pandas.core.frame.DataFrame'>
+        <class 'pandas.DataFrame'>
         RangeIndex: 0 entries
         Empty DataFrame\n"""
     )
@@ -208,7 +208,7 @@ def test_info_memory():
     bytes = float(df.memory_usage().sum())
     expected = textwrap.dedent(
         f"""\
-    <class 'pandas.core.frame.DataFrame'>
+    <class 'pandas.DataFrame'>
     RangeIndex: 2 entries, 0 to 1
     Data columns (total 1 columns):
      #   Column  Non-Null Count  Dtype
@@ -501,7 +501,7 @@ def test_info_int_columns():
     result = buf.getvalue()
     expected = textwrap.dedent(
         """\
-        <class 'pandas.core.frame.DataFrame'>
+        <class 'pandas.DataFrame'>
         Index: 2 entries, A to B
         Data columns (total 2 columns):
          #   Column  Non-Null Count  Dtype
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -509,7 +509,7 @@ def test_groupby_with_datetime_key(self):
         assert len(gb.groups.keys()) == 4
 
     def test_grouping_error_on_multidim_input(self, df):
-        msg = "Grouper for '<class 'pandas.core.frame.DataFrame'>' not 1-dimensional"
+        msg = "Grouper for '<class 'pandas.DataFrame'>' not 1-dimensional"
         with pytest.raises(ValueError, match=msg):
             Grouping(df.index, df[["A", "A"]])
 
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
@@ -503,3 +503,24 @@ def indent(text: str | None, indents: int = 1) -> str:
     "future_version_msg",
     "Substitution",
 ]
+
+
+def set_module(module):
+    """Private decorator for overriding __module__ on a function or class.
+
+    Example usage::
+
+        @set_module("pandas")
+        def example():
+            pass
+
+
+        assert example.__module__ == "pandas"
+    """
+
+    def decorator(func):
+        if module is not None:
+            func.__module__ = module
+        return func
+
+    return decorator