Skip to content

Commit f9e4c8c

Browse files
authored
REF: implement _shared_docs to de-circularize dependencies (#34837)
1 parent 7dc86cc commit f9e4c8c

File tree

5 files changed

+125
-118
lines changed

5 files changed

+125
-118
lines changed

pandas/core/frame.py

+1-99
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
sanitize_index,
136136
to_arrays,
137137
)
138+
from pandas.core.reshape.melt import melt
138139
from pandas.core.series import Series
139140
from pandas.core.sorting import ensure_key_mapped
140141

@@ -7070,104 +7071,6 @@ def unstack(self, level=-1, fill_value=None):
70707071

70717072
return unstack(self, level, fill_value)
70727073

7073-
_shared_docs[
7074-
"melt"
7075-
] = """
7076-
Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.
7077-
7078-
This function is useful to massage a DataFrame into a format where one
7079-
or more columns are identifier variables (`id_vars`), while all other
7080-
columns, considered measured variables (`value_vars`), are "unpivoted" to
7081-
the row axis, leaving just two non-identifier columns, 'variable' and
7082-
'value'.
7083-
%(versionadded)s
7084-
Parameters
7085-
----------
7086-
id_vars : tuple, list, or ndarray, optional
7087-
Column(s) to use as identifier variables.
7088-
value_vars : tuple, list, or ndarray, optional
7089-
Column(s) to unpivot. If not specified, uses all columns that
7090-
are not set as `id_vars`.
7091-
var_name : scalar
7092-
Name to use for the 'variable' column. If None it uses
7093-
``frame.columns.name`` or 'variable'.
7094-
value_name : scalar, default 'value'
7095-
Name to use for the 'value' column.
7096-
col_level : int or str, optional
7097-
If columns are a MultiIndex then use this level to melt.
7098-
7099-
Returns
7100-
-------
7101-
DataFrame
7102-
Unpivoted DataFrame.
7103-
7104-
See Also
7105-
--------
7106-
%(other)s : Identical method.
7107-
pivot_table : Create a spreadsheet-style pivot table as a DataFrame.
7108-
DataFrame.pivot : Return reshaped DataFrame organized
7109-
by given index / column values.
7110-
DataFrame.explode : Explode a DataFrame from list-like
7111-
columns to long format.
7112-
7113-
Examples
7114-
--------
7115-
>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
7116-
... 'B': {0: 1, 1: 3, 2: 5},
7117-
... 'C': {0: 2, 1: 4, 2: 6}})
7118-
>>> df
7119-
A B C
7120-
0 a 1 2
7121-
1 b 3 4
7122-
2 c 5 6
7123-
7124-
>>> %(caller)sid_vars=['A'], value_vars=['B'])
7125-
A variable value
7126-
0 a B 1
7127-
1 b B 3
7128-
2 c B 5
7129-
7130-
>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'])
7131-
A variable value
7132-
0 a B 1
7133-
1 b B 3
7134-
2 c B 5
7135-
3 a C 2
7136-
4 b C 4
7137-
5 c C 6
7138-
7139-
The names of 'variable' and 'value' columns can be customized:
7140-
7141-
>>> %(caller)sid_vars=['A'], value_vars=['B'],
7142-
... var_name='myVarname', value_name='myValname')
7143-
A myVarname myValname
7144-
0 a B 1
7145-
1 b B 3
7146-
2 c B 5
7147-
7148-
If you have multi-index columns:
7149-
7150-
>>> df.columns = [list('ABC'), list('DEF')]
7151-
>>> df
7152-
A B C
7153-
D E F
7154-
0 a 1 2
7155-
1 b 3 4
7156-
2 c 5 6
7157-
7158-
>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B'])
7159-
A variable value
7160-
0 a B 1
7161-
1 b B 3
7162-
2 c B 5
7163-
7164-
>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')])
7165-
(A, D) variable_0 variable_1 value
7166-
0 a B E 1
7167-
1 b B E 3
7168-
2 c B E 5
7169-
"""
7170-
71717074
@Appender(
71727075
_shared_docs["melt"]
71737076
% dict(
@@ -7184,7 +7087,6 @@ def melt(
71847087
value_name="value",
71857088
col_level=None,
71867089
) -> "DataFrame":
7187-
from pandas.core.reshape.melt import melt
71887090

71897091
return melt(
71907092
self,

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
from pandas.core.internals import BlockManager
9898
from pandas.core.missing import find_valid_index
9999
from pandas.core.ops import _align_method_FRAME
100+
from pandas.core.shared_docs import _shared_docs
100101

101102
from pandas.io.formats import format as fmt
102103
from pandas.io.formats.format import DataFrameFormatter, format_percentiles
@@ -108,7 +109,6 @@
108109

109110
# goal is to be able to define the docs close to function, while still being
110111
# able to share
111-
_shared_docs: Dict[str, str] = dict()
112112
_shared_doc_kwargs = dict(
113113
axes="keywords for axes",
114114
klass="Series/DataFrame",

pandas/core/reshape/concat.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44

55
from collections import abc
6-
from typing import Iterable, List, Mapping, Union, overload
6+
from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, overload
77

88
import numpy as np
99

@@ -12,14 +12,14 @@
1212
from pandas.core.dtypes.concat import concat_compat
1313
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
1414

15-
from pandas import DataFrame, Index, MultiIndex, Series
1615
from pandas.core.arrays.categorical import (
1716
factorize_from_iterable,
1817
factorize_from_iterables,
1918
)
2019
import pandas.core.common as com
21-
from pandas.core.generic import NDFrame
2220
from pandas.core.indexes.api import (
21+
Index,
22+
MultiIndex,
2323
all_indexes_same,
2424
ensure_index,
2525
get_consensus_names,
@@ -28,6 +28,9 @@
2828
import pandas.core.indexes.base as ibase
2929
from pandas.core.internals import concatenate_block_managers
3030

31+
if TYPE_CHECKING:
32+
from pandas import DataFrame
33+
3134
# ---------------------------------------------------------------------
3235
# Concatenate DataFrame objects
3336

@@ -291,7 +294,7 @@ class _Concatenator:
291294

292295
def __init__(
293296
self,
294-
objs,
297+
objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]],
295298
axis=0,
296299
join: str = "outer",
297300
keys=None,
@@ -302,7 +305,7 @@ def __init__(
302305
copy: bool = True,
303306
sort=False,
304307
):
305-
if isinstance(objs, (NDFrame, str)):
308+
if isinstance(objs, (ABCSeries, ABCDataFrame, str)):
306309
raise TypeError(
307310
"first argument must be an iterable of pandas "
308311
f'objects, you passed an object of type "{type(objs).__name__}"'
@@ -348,7 +351,7 @@ def __init__(
348351
# consolidate data & figure out what our result ndim is going to be
349352
ndims = set()
350353
for obj in objs:
351-
if not isinstance(obj, (Series, DataFrame)):
354+
if not isinstance(obj, (ABCSeries, ABCDataFrame)):
352355
msg = (
353356
f"cannot concatenate object of type '{type(obj)}'; "
354357
"only Series and DataFrame objs are valid"
@@ -374,7 +377,7 @@ def __init__(
374377
# filter out the empties if we have not multi-index possibilities
375378
# note to keep empty Series as it affect to result columns / name
376379
non_empties = [
377-
obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series)
380+
obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries)
378381
]
379382

380383
if len(non_empties) and (
@@ -388,15 +391,15 @@ def __init__(
388391
self.objs = objs
389392

390393
# Standardize axis parameter to int
391-
if isinstance(sample, Series):
392-
axis = DataFrame._get_axis_number(axis)
394+
if isinstance(sample, ABCSeries):
395+
axis = sample._constructor_expanddim._get_axis_number(axis)
393396
else:
394397
axis = sample._get_axis_number(axis)
395398

396399
# Need to flip BlockManager axis in the DataFrame special case
397400
self._is_frame = isinstance(sample, ABCDataFrame)
398401
if self._is_frame:
399-
axis = DataFrame._get_block_manager_axis(axis)
402+
axis = sample._get_block_manager_axis(axis)
400403

401404
self._is_series = isinstance(sample, ABCSeries)
402405
if not 0 <= axis <= sample.ndim:
@@ -543,7 +546,7 @@ def _get_concat_axis(self) -> Index:
543546
num = 0
544547
has_names = False
545548
for i, x in enumerate(self.objs):
546-
if not isinstance(x, Series):
549+
if not isinstance(x, ABCSeries):
547550
raise TypeError(
548551
f"Cannot concatenate type 'Series' with "
549552
f"object of type '{type(x).__name__}'"

pandas/core/reshape/melt.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,27 @@
1111

1212
from pandas.core.arrays import Categorical
1313
import pandas.core.common as com
14-
from pandas.core.frame import DataFrame, _shared_docs
1514
from pandas.core.indexes.api import Index, MultiIndex
1615
from pandas.core.reshape.concat import concat
16+
from pandas.core.shared_docs import _shared_docs
1717
from pandas.core.tools.numeric import to_numeric
1818

1919
if TYPE_CHECKING:
20-
from pandas import Series # noqa: F401
20+
from pandas import DataFrame, Series # noqa: F401
2121

2222

2323
@Appender(
2424
_shared_docs["melt"]
2525
% dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt")
2626
)
2727
def melt(
28-
frame: DataFrame,
28+
frame: "DataFrame",
2929
id_vars=None,
3030
value_vars=None,
3131
var_name=None,
3232
value_name="value",
3333
col_level=None,
34-
) -> DataFrame:
34+
) -> "DataFrame":
3535
# TODO: what about the existing index?
3636
# If multiindex, gather names of columns on all level for checking presence
3737
# of `id_vars` and `value_vars`
@@ -125,7 +125,7 @@ def melt(
125125

126126

127127
@deprecate_kwarg(old_arg_name="label", new_arg_name=None)
128-
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame:
128+
def lreshape(data: "DataFrame", groups, dropna: bool = True, label=None) -> "DataFrame":
129129
"""
130130
Reshape long-format data to wide. Generalized inverse of DataFrame.pivot
131131
@@ -195,8 +195,8 @@ def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFr
195195

196196

197197
def wide_to_long(
198-
df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"
199-
) -> DataFrame:
198+
df: "DataFrame", stubnames, i, j, sep: str = "", suffix: str = r"\d+"
199+
) -> "DataFrame":
200200
r"""
201201
Wide panel to long format. Less flexible but more user-friendly than melt.
202202

pandas/core/shared_docs.py

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
from typing import Dict
2+
3+
_shared_docs: Dict[str, str] = dict()
4+
5+
6+
_shared_docs[
7+
"melt"
8+
] = """
9+
Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.
10+
11+
This function is useful to massage a DataFrame into a format where one
12+
or more columns are identifier variables (`id_vars`), while all other
13+
columns, considered measured variables (`value_vars`), are "unpivoted" to
14+
the row axis, leaving just two non-identifier columns, 'variable' and
15+
'value'.
16+
%(versionadded)s
17+
Parameters
18+
----------
19+
id_vars : tuple, list, or ndarray, optional
20+
Column(s) to use as identifier variables.
21+
value_vars : tuple, list, or ndarray, optional
22+
Column(s) to unpivot. If not specified, uses all columns that
23+
are not set as `id_vars`.
24+
var_name : scalar
25+
Name to use for the 'variable' column. If None it uses
26+
``frame.columns.name`` or 'variable'.
27+
value_name : scalar, default 'value'
28+
Name to use for the 'value' column.
29+
col_level : int or str, optional
30+
If columns are a MultiIndex then use this level to melt.
31+
32+
Returns
33+
-------
34+
DataFrame
35+
Unpivoted DataFrame.
36+
37+
See Also
38+
--------
39+
%(other)s : Identical method.
40+
pivot_table : Create a spreadsheet-style pivot table as a DataFrame.
41+
DataFrame.pivot : Return reshaped DataFrame organized
42+
by given index / column values.
43+
DataFrame.explode : Explode a DataFrame from list-like
44+
columns to long format.
45+
46+
Examples
47+
--------
48+
>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
49+
... 'B': {0: 1, 1: 3, 2: 5},
50+
... 'C': {0: 2, 1: 4, 2: 6}})
51+
>>> df
52+
A B C
53+
0 a 1 2
54+
1 b 3 4
55+
2 c 5 6
56+
57+
>>> %(caller)sid_vars=['A'], value_vars=['B'])
58+
A variable value
59+
0 a B 1
60+
1 b B 3
61+
2 c B 5
62+
63+
>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'])
64+
A variable value
65+
0 a B 1
66+
1 b B 3
67+
2 c B 5
68+
3 a C 2
69+
4 b C 4
70+
5 c C 6
71+
72+
The names of 'variable' and 'value' columns can be customized:
73+
74+
>>> %(caller)sid_vars=['A'], value_vars=['B'],
75+
... var_name='myVarname', value_name='myValname')
76+
A myVarname myValname
77+
0 a B 1
78+
1 b B 3
79+
2 c B 5
80+
81+
If you have multi-index columns:
82+
83+
>>> df.columns = [list('ABC'), list('DEF')]
84+
>>> df
85+
A B C
86+
D E F
87+
0 a 1 2
88+
1 b 3 4
89+
2 c 5 6
90+
91+
>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B'])
92+
A variable value
93+
0 a B 1
94+
1 b B 3
95+
2 c B 5
96+
97+
>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')])
98+
(A, D) variable_0 variable_1 value
99+
0 a B E 1
100+
1 b B E 3
101+
2 c B E 5
102+
"""

0 commit comments

Comments
 (0)