5
5
"""
6
6
from __future__ import annotations
7
7
8
+ from abc import ABC , abstractmethod
8
9
from typing import TYPE_CHECKING , List , Optional , Sequence , Union , cast
9
10
import warnings
10
11
11
12
import numpy as np
12
13
13
14
from pandas ._libs .tslibs import Timestamp
14
- from pandas ._typing import FrameOrSeries , Hashable
15
+ from pandas ._typing import FrameOrSeries , FrameOrSeriesUnion , Hashable
15
16
from pandas .util ._validators import validate_percentile
16
17
17
18
from pandas .core .dtypes .common import (
@@ -62,106 +63,138 @@ def describe_ndframe(
62
63
"""
63
64
percentiles = refine_percentiles (percentiles )
64
65
66
+ describer : NDFrameDescriberAbstract
65
67
if obj .ndim == 1 :
66
- result_series = describe_series (
67
- cast ("Series" , obj ),
68
- percentiles ,
69
- datetime_is_numeric ,
68
+ describer = SeriesDescriber (
69
+ obj = cast ("Series" , obj ),
70
+ datetime_is_numeric = datetime_is_numeric ,
71
+ )
72
+ else :
73
+ describer = DataFrameDescriber (
74
+ obj = cast ("DataFrame" , obj ),
75
+ include = include ,
76
+ exclude = exclude ,
77
+ datetime_is_numeric = datetime_is_numeric ,
70
78
)
71
- return cast (FrameOrSeries , result_series )
72
-
73
- frame = cast ("DataFrame" , obj )
74
-
75
- if frame .ndim == 2 and frame .columns .size == 0 :
76
- raise ValueError ("Cannot describe a DataFrame without columns" )
77
-
78
- result_frame = describe_frame (
79
- frame = frame ,
80
- include = include ,
81
- exclude = exclude ,
82
- percentiles = percentiles ,
83
- datetime_is_numeric = datetime_is_numeric ,
84
- )
85
- return cast (FrameOrSeries , result_frame )
86
79
80
+ result = describer .describe (percentiles = percentiles )
81
+ return cast (FrameOrSeries , result )
87
82
88
- def describe_series (
89
- series : "Series" ,
90
- percentiles : Sequence [float ],
91
- datetime_is_numeric : bool ,
92
- ) -> Series :
93
- """Describe series.
94
83
95
- The reason for the delegation to ``describe_1d`` only :
96
- to allow for a proper stacklevel of the FutureWarning .
84
+ class NDFrameDescriberAbstract ( ABC ) :
85
+ """Abstract class for describing dataframe or series .
97
86
98
87
Parameters
99
88
----------
100
- series : Series
101
- Series to be described.
102
- percentiles : list-like of numbers
103
- The percentiles to include in the output.
104
- datetime_is_numeric : bool, default False
89
+ obj : Series or DataFrame
90
+ Object to be described.
91
+ datetime_is_numeric : bool
105
92
Whether to treat datetime dtypes as numeric.
106
-
107
- Returns
108
- -------
109
- Series
110
93
"""
111
- return describe_1d (
112
- series ,
113
- percentiles ,
114
- datetime_is_numeric ,
115
- is_series = True ,
116
- )
117
94
95
+ def __init__ (self , obj : "FrameOrSeriesUnion" , datetime_is_numeric : bool ):
96
+ self .obj = obj
97
+ self .datetime_is_numeric = datetime_is_numeric
118
98
119
- def describe_frame (
120
- frame : "DataFrame" ,
121
- include : Optional [Union [str , Sequence [str ]]],
122
- exclude : Optional [Union [str , Sequence [str ]]],
123
- percentiles : Sequence [float ],
124
- datetime_is_numeric : bool ,
125
- ) -> DataFrame :
126
- """Describe DataFrame.
99
+ @abstractmethod
100
+ def describe (self , percentiles : Sequence [float ]) -> FrameOrSeriesUnion :
101
+ """Do describe either series or dataframe.
102
+
103
+ Parameters
104
+ ----------
105
+ percentiles : list-like of numbers
106
+ The percentiles to include in the output.
107
+ """
108
+
109
+
110
+ class SeriesDescriber (NDFrameDescriberAbstract ):
111
+ """Class responsible for creating series description."""
112
+
113
+ obj : "Series"
114
+
115
+ def describe (self , percentiles : Sequence [float ]) -> Series :
116
+ return describe_1d (
117
+ self .obj ,
118
+ percentiles = percentiles ,
119
+ datetime_is_numeric = self .datetime_is_numeric ,
120
+ is_series = True ,
121
+ )
122
+
123
+
124
+ class DataFrameDescriber (NDFrameDescriberAbstract ):
125
+ """Class responsible for creating dataobj description.
127
126
128
127
Parameters
129
128
----------
130
- frame : DataFrame
129
+ obj : DataFrame
131
130
DataFrame to be described.
132
- include : 'all', list-like of dtypes or None (default), optional
131
+ include : 'all', list-like of dtypes or None
133
132
A white list of data types to include in the result.
134
- exclude : list-like of dtypes or None (default), optional,
133
+ exclude : list-like of dtypes or None
135
134
A black list of data types to omit from the result.
136
- percentiles : list-like of numbers
137
- The percentiles to include in the output.
138
- datetime_is_numeric : bool, default False
135
+ datetime_is_numeric : bool
139
136
Whether to treat datetime dtypes as numeric.
140
-
141
- Returns
142
- -------
143
- DataFrame
144
137
"""
145
- data = select_columns (
146
- frame = frame ,
147
- include = include ,
148
- exclude = exclude ,
149
- datetime_is_numeric = datetime_is_numeric ,
150
- )
151
138
152
- ldesc = [
153
- describe_1d (s , percentiles , datetime_is_numeric , is_series = False )
154
- for _ , s in data .items ()
155
- ]
156
-
157
- col_names = reorder_columns (ldesc )
158
- d = concat (
159
- [x .reindex (col_names , copy = False ) for x in ldesc ],
160
- axis = 1 ,
161
- sort = False ,
162
- )
163
- d .columns = data .columns .copy ()
164
- return d
139
+ def __init__ (
140
+ self ,
141
+ obj : "DataFrame" ,
142
+ * ,
143
+ include : Optional [Union [str , Sequence [str ]]],
144
+ exclude : Optional [Union [str , Sequence [str ]]],
145
+ datetime_is_numeric : bool ,
146
+ ):
147
+ self .include = include
148
+ self .exclude = exclude
149
+
150
+ if obj .ndim == 2 and obj .columns .size == 0 :
151
+ raise ValueError ("Cannot describe a DataFrame without columns" )
152
+
153
+ super ().__init__ (obj , datetime_is_numeric = datetime_is_numeric )
154
+
155
+ def describe (self , percentiles : Sequence [float ]) -> DataFrame :
156
+ data = self ._select_data ()
157
+
158
+ ldesc = [
159
+ describe_1d (
160
+ series ,
161
+ percentiles = percentiles ,
162
+ datetime_is_numeric = self .datetime_is_numeric ,
163
+ is_series = False ,
164
+ )
165
+ for _ , series in data .items ()
166
+ ]
167
+
168
+ col_names = reorder_columns (ldesc )
169
+ d = concat (
170
+ [x .reindex (col_names , copy = False ) for x in ldesc ],
171
+ axis = 1 ,
172
+ sort = False ,
173
+ )
174
+ d .columns = data .columns .copy ()
175
+ return d
176
+
177
+ def _select_data (self ):
178
+ """Select columns to be described."""
179
+ if (self .include is None ) and (self .exclude is None ):
180
+ # when some numerics are found, keep only numerics
181
+ default_include = [np .number ]
182
+ if self .datetime_is_numeric :
183
+ default_include .append ("datetime" )
184
+ data = self .obj .select_dtypes (include = default_include )
185
+ if len (data .columns ) == 0 :
186
+ data = self .obj
187
+ elif self .include == "all" :
188
+ if self .exclude is not None :
189
+ msg = "exclude must be None when include is 'all'"
190
+ raise ValueError (msg )
191
+ data = self .obj
192
+ else :
193
+ data = self .obj .select_dtypes (
194
+ include = self .include ,
195
+ exclude = self .exclude ,
196
+ )
197
+ return data
165
198
166
199
167
200
def reorder_columns (ldesc : Sequence ["Series" ]) -> List [Hashable ]:
@@ -175,32 +208,6 @@ def reorder_columns(ldesc: Sequence["Series"]) -> List[Hashable]:
175
208
return names
176
209
177
210
178
- def select_columns (
179
- frame : "DataFrame" ,
180
- include : Optional [Union [str , Sequence [str ]]],
181
- exclude : Optional [Union [str , Sequence [str ]]],
182
- datetime_is_numeric : bool ,
183
- ) -> DataFrame :
184
- """Select columns to be described."""
185
- if (include is None ) and (exclude is None ):
186
- # when some numerics are found, keep only numerics
187
- default_include = [np .number ]
188
- if datetime_is_numeric :
189
- default_include .append ("datetime" )
190
- data = frame .select_dtypes (include = default_include )
191
- if len (data .columns ) == 0 :
192
- data = frame
193
- elif include == "all" :
194
- if exclude is not None :
195
- msg = "exclude must be None when include is 'all'"
196
- raise ValueError (msg )
197
- data = frame
198
- else :
199
- data = frame .select_dtypes (include = include , exclude = exclude )
200
-
201
- return data
202
-
203
-
204
211
def describe_numeric_1d (series : "Series" , percentiles : Sequence [float ]) -> Series :
205
212
"""Describe series containing numerical data.
206
213
0 commit comments