Skip to content

Commit 77e488b

Browse files
authored
REF: extract classes in pandas/core/describe.py (#39186)
1 parent 91433f8 commit 77e488b

File tree

1 file changed

+113
-106
lines changed

1 file changed

+113
-106
lines changed

pandas/core/describe.py

+113-106
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55
"""
66
from __future__ import annotations
77

8+
from abc import ABC, abstractmethod
89
from typing import TYPE_CHECKING, List, Optional, Sequence, Union, cast
910
import warnings
1011

1112
import numpy as np
1213

1314
from pandas._libs.tslibs import Timestamp
14-
from pandas._typing import FrameOrSeries, Hashable
15+
from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Hashable
1516
from pandas.util._validators import validate_percentile
1617

1718
from pandas.core.dtypes.common import (
@@ -62,106 +63,138 @@ def describe_ndframe(
6263
"""
6364
percentiles = refine_percentiles(percentiles)
6465

66+
describer: NDFrameDescriberAbstract
6567
if obj.ndim == 1:
66-
result_series = describe_series(
67-
cast("Series", obj),
68-
percentiles,
69-
datetime_is_numeric,
68+
describer = SeriesDescriber(
69+
obj=cast("Series", obj),
70+
datetime_is_numeric=datetime_is_numeric,
71+
)
72+
else:
73+
describer = DataFrameDescriber(
74+
obj=cast("DataFrame", obj),
75+
include=include,
76+
exclude=exclude,
77+
datetime_is_numeric=datetime_is_numeric,
7078
)
71-
return cast(FrameOrSeries, result_series)
72-
73-
frame = cast("DataFrame", obj)
74-
75-
if frame.ndim == 2 and frame.columns.size == 0:
76-
raise ValueError("Cannot describe a DataFrame without columns")
77-
78-
result_frame = describe_frame(
79-
frame=frame,
80-
include=include,
81-
exclude=exclude,
82-
percentiles=percentiles,
83-
datetime_is_numeric=datetime_is_numeric,
84-
)
85-
return cast(FrameOrSeries, result_frame)
8679

80+
result = describer.describe(percentiles=percentiles)
81+
return cast(FrameOrSeries, result)
8782

88-
def describe_series(
89-
series: "Series",
90-
percentiles: Sequence[float],
91-
datetime_is_numeric: bool,
92-
) -> Series:
93-
"""Describe series.
9483

95-
The reason for the delegation to ``describe_1d`` only:
96-
to allow for a proper stacklevel of the FutureWarning.
84+
class NDFrameDescriberAbstract(ABC):
85+
"""Abstract class for describing dataframe or series.
9786
9887
Parameters
9988
----------
100-
series : Series
101-
Series to be described.
102-
percentiles : list-like of numbers
103-
The percentiles to include in the output.
104-
datetime_is_numeric : bool, default False
89+
obj : Series or DataFrame
90+
Object to be described.
91+
datetime_is_numeric : bool
10592
Whether to treat datetime dtypes as numeric.
106-
107-
Returns
108-
-------
109-
Series
11093
"""
111-
return describe_1d(
112-
series,
113-
percentiles,
114-
datetime_is_numeric,
115-
is_series=True,
116-
)
11794

95+
def __init__(self, obj: "FrameOrSeriesUnion", datetime_is_numeric: bool):
96+
self.obj = obj
97+
self.datetime_is_numeric = datetime_is_numeric
11898

119-
def describe_frame(
120-
frame: "DataFrame",
121-
include: Optional[Union[str, Sequence[str]]],
122-
exclude: Optional[Union[str, Sequence[str]]],
123-
percentiles: Sequence[float],
124-
datetime_is_numeric: bool,
125-
) -> DataFrame:
126-
"""Describe DataFrame.
99+
@abstractmethod
100+
def describe(self, percentiles: Sequence[float]) -> FrameOrSeriesUnion:
101+
"""Do describe either series or dataframe.
102+
103+
Parameters
104+
----------
105+
percentiles : list-like of numbers
106+
The percentiles to include in the output.
107+
"""
108+
109+
110+
class SeriesDescriber(NDFrameDescriberAbstract):
111+
"""Class responsible for creating series description."""
112+
113+
obj: "Series"
114+
115+
def describe(self, percentiles: Sequence[float]) -> Series:
116+
return describe_1d(
117+
self.obj,
118+
percentiles=percentiles,
119+
datetime_is_numeric=self.datetime_is_numeric,
120+
is_series=True,
121+
)
122+
123+
124+
class DataFrameDescriber(NDFrameDescriberAbstract):
125+
"""Class responsible for creating dataobj description.
127126
128127
Parameters
129128
----------
130-
frame : DataFrame
129+
obj : DataFrame
131130
DataFrame to be described.
132-
include : 'all', list-like of dtypes or None (default), optional
131+
include : 'all', list-like of dtypes or None
133132
A white list of data types to include in the result.
134-
exclude : list-like of dtypes or None (default), optional,
133+
exclude : list-like of dtypes or None
135134
A black list of data types to omit from the result.
136-
percentiles : list-like of numbers
137-
The percentiles to include in the output.
138-
datetime_is_numeric : bool, default False
135+
datetime_is_numeric : bool
139136
Whether to treat datetime dtypes as numeric.
140-
141-
Returns
142-
-------
143-
DataFrame
144137
"""
145-
data = select_columns(
146-
frame=frame,
147-
include=include,
148-
exclude=exclude,
149-
datetime_is_numeric=datetime_is_numeric,
150-
)
151138

152-
ldesc = [
153-
describe_1d(s, percentiles, datetime_is_numeric, is_series=False)
154-
for _, s in data.items()
155-
]
156-
157-
col_names = reorder_columns(ldesc)
158-
d = concat(
159-
[x.reindex(col_names, copy=False) for x in ldesc],
160-
axis=1,
161-
sort=False,
162-
)
163-
d.columns = data.columns.copy()
164-
return d
139+
def __init__(
140+
self,
141+
obj: "DataFrame",
142+
*,
143+
include: Optional[Union[str, Sequence[str]]],
144+
exclude: Optional[Union[str, Sequence[str]]],
145+
datetime_is_numeric: bool,
146+
):
147+
self.include = include
148+
self.exclude = exclude
149+
150+
if obj.ndim == 2 and obj.columns.size == 0:
151+
raise ValueError("Cannot describe a DataFrame without columns")
152+
153+
super().__init__(obj, datetime_is_numeric=datetime_is_numeric)
154+
155+
def describe(self, percentiles: Sequence[float]) -> DataFrame:
156+
data = self._select_data()
157+
158+
ldesc = [
159+
describe_1d(
160+
series,
161+
percentiles=percentiles,
162+
datetime_is_numeric=self.datetime_is_numeric,
163+
is_series=False,
164+
)
165+
for _, series in data.items()
166+
]
167+
168+
col_names = reorder_columns(ldesc)
169+
d = concat(
170+
[x.reindex(col_names, copy=False) for x in ldesc],
171+
axis=1,
172+
sort=False,
173+
)
174+
d.columns = data.columns.copy()
175+
return d
176+
177+
def _select_data(self):
178+
"""Select columns to be described."""
179+
if (self.include is None) and (self.exclude is None):
180+
# when some numerics are found, keep only numerics
181+
default_include = [np.number]
182+
if self.datetime_is_numeric:
183+
default_include.append("datetime")
184+
data = self.obj.select_dtypes(include=default_include)
185+
if len(data.columns) == 0:
186+
data = self.obj
187+
elif self.include == "all":
188+
if self.exclude is not None:
189+
msg = "exclude must be None when include is 'all'"
190+
raise ValueError(msg)
191+
data = self.obj
192+
else:
193+
data = self.obj.select_dtypes(
194+
include=self.include,
195+
exclude=self.exclude,
196+
)
197+
return data
165198

166199

167200
def reorder_columns(ldesc: Sequence["Series"]) -> List[Hashable]:
@@ -175,32 +208,6 @@ def reorder_columns(ldesc: Sequence["Series"]) -> List[Hashable]:
175208
return names
176209

177210

178-
def select_columns(
179-
frame: "DataFrame",
180-
include: Optional[Union[str, Sequence[str]]],
181-
exclude: Optional[Union[str, Sequence[str]]],
182-
datetime_is_numeric: bool,
183-
) -> DataFrame:
184-
"""Select columns to be described."""
185-
if (include is None) and (exclude is None):
186-
# when some numerics are found, keep only numerics
187-
default_include = [np.number]
188-
if datetime_is_numeric:
189-
default_include.append("datetime")
190-
data = frame.select_dtypes(include=default_include)
191-
if len(data.columns) == 0:
192-
data = frame
193-
elif include == "all":
194-
if exclude is not None:
195-
msg = "exclude must be None when include is 'all'"
196-
raise ValueError(msg)
197-
data = frame
198-
else:
199-
data = frame.select_dtypes(include=include, exclude=exclude)
200-
201-
return data
202-
203-
204211
def describe_numeric_1d(series: "Series", percentiles: Sequence[float]) -> Series:
205212
"""Describe series containing numerical data.
206213

0 commit comments

Comments
 (0)