-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__init__.py
268 lines (217 loc) · 7.94 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import inspect
import operator
from functools import wraps
import pandas as pd
from pandas.core.accessor import CachedAccessor
from pandas.core.indexes.accessors import (
CombinedDatetimelikeProperties,
DatetimeProperties,
PeriodProperties,
)
try:
from pandas.core.strings import StringMethods
except ImportError: # moved in pandas 2
from pandas.core.strings.accessor import StringMethods
from pandas.util._decorators import doc
_str_boolean_methods = set(
[
'contains',
'endswith',
'isalnum',
'isalpha',
'isdecimal',
'isdigit',
'islower',
'isnumeric',
'isspace',
'istitle',
'isupper',
'match',
'startswith',
]
)
_date_boolean_methods = set(
[
'is_leap_year',
'is_month_end',
'is_month_start',
'is_quarter_end',
'is_quarter_start',
'is_year_end',
'is_year_start',
]
)
class StringSelectMethods(StringMethods):
def __init__(self, *args, **kwargs):
frame_or_series = args[0]
# the superclass will override _parent, so we need to use _parent_frame
self._parent_frame = frame_or_series._parent
self._series = frame_or_series._series
super().__init__(self._series, *args[1:], **kwargs)
def __getattribute__(self, attr):
if (
not attr.startswith("_")
and inspect.isroutine(getattr(StringMethods, attr, None)) # noqa
and attr not in _str_boolean_methods
): # noqa
raise NotImplementedError(
"Boolean selection with this method " "does not make sense."
)
else:
return super().__getattribute__(attr)
def _wrap_result(self, *args, **kwargs):
# remove methods that don't return boolean index
bool_idx = super()._wrap_result(*args, **kwargs)
return self._parent_frame.loc[bool_idx]
class SelectPeriodProperties(PeriodProperties):
def __init__(self, parent, *args, **kwargs):
self._parent_frame = parent
super().__init__(*args, **kwargs)
@property
def is_leap_year(self):
return self._parent_frame.loc[super().is_leap_year]
class DateSelectMethods(CombinedDatetimelikeProperties):
def __new__(cls, series):
properties = super().__new__(cls, series._series)
if isinstance(properties, DatetimeProperties):
return SelectDatetimeProperties(
series._parent, properties._parent, properties.orig
)
elif isinstance(properties, PeriodProperties):
return SelectPeriodProperties(
series._frame, properties._parent, properties.orig
)
raise AttributeError(
"Can only use select.dt accessor on"
"datetimelike and periodlike values."
)
def selector_wrapper(klass, method_name):
method = getattr(klass, method_name)
@wraps(method)
def selector(self, *args, **kwargs):
# for a series accessor series and parent are the same thing
# for a frame accessor we're indexing on the parent dataframe
series = self._series
idx = getattr(klass, method_name)(series, *args, **kwargs)
return self._parent.loc[idx]
return selector
class SelectableIndex:
def __init__(self, parent):
self._parent = parent
self._index = parent.index
def __getattr__(self, attr):
return getattr(self._index, attr)
def __repr__(self):
return pd.Index.__repr__(self)
def _compare(self, op, cmp):
idx = op(self._parent.index, cmp)
return self._parent.loc[idx]
def __lt__(self, cmp):
return self._compare(operator.lt, cmp)
def __le__(self, cmp):
return self._compare(operator.le, cmp)
def __eq__(self, cmp):
return self._compare(operator.eq, cmp)
def __ne__(self, cmp):
return self._compare(operator.ne, cmp)
def __gt__(self, cmp):
return self._compare(operator.gt, cmp)
def __ge__(self, cmp):
return self._compare(operator.ge, cmp)
@doc(pd.Index.isna)
def isna(self):
return self._parent.loc[self._parent.index.isna()]
@doc(pd.Index.isnull)
def isnull(self):
return self._parent.loc[self._parent.index.isnull()]
@doc(pd.Index.notnull)
def notnull(self):
return self._parent.loc[self._parent.index.notnull()]
@doc(pd.Index.notna)
def notna(self):
return self._parent.loc[self._parent.index.notna()]
@doc(pd.Index.isin)
def isin(self, values, levels=None):
idx = self._parent.index.isin(values, levels)
return self._parent.loc[idx]
@pd.api.extensions.register_series_accessor("select")
class SelectableColumn:
str = CachedAccessor("str", StringSelectMethods)
dt = CachedAccessor("dt", DateSelectMethods)
__lt__ = selector_wrapper(pd.Series, "__lt__")
__le__ = selector_wrapper(pd.Series, "__le__")
__eq__ = selector_wrapper(pd.Series, "__eq__")
__ne__ = selector_wrapper(pd.Series, "__ne__")
__gt__ = selector_wrapper(pd.Series, "__gt__")
__ge__ = selector_wrapper(pd.Series, "__ge__")
isna = selector_wrapper(pd.Series, "isna")
isnull = selector_wrapper(pd.Series, "isnull")
notna = selector_wrapper(pd.Series, "notna")
notnull = selector_wrapper(pd.Series, "notnull")
isin = selector_wrapper(pd.Series, "isin")
between = selector_wrapper(pd.Series, "between")
def __init__(self, parent, series=None):
# if accessed as the series accessor, parent is the series
# if returned by a selectable dataframe, parent is the frame
if series is None:
series = parent
self._parent = parent
self._series = series
def __getattr__(self, attr):
return getattr(self._series, attr)
def __repr__(self):
return pd.Series.__repr__(self)
@property
def index(self):
return SelectableIndex(self._parent)
@pd.api.extensions.register_dataframe_accessor('select')
class DataFrameSelectAccessor:
def __init__(self, frame):
self._frame = frame
def __repr__(self):
return pd.DataFrame.__repr__(self)
def __dir__(self):
return self._frame.columns.tolist() + ['index']
def __getattr__(self, attr):
if attr in self._frame.columns:
return SelectableColumn(self._frame, self._frame[attr])
return getattr(self._frame, attr)
def __getitem__(self, key):
try:
getattr(self, key)
except AttributeError:
raise KeyError(f"{key}")
@property
def index(self):
return SelectableIndex(self._frame)
class SelectDatetimeProperties(DatetimeProperties):
def __init__(self, parent, *args, **kwargs):
# datetime properties holds an attribute _parent
# we need to add the parent_frame (or series) to the subclass instances
self._parent_frame = parent
super().__init__(*args, **kwargs)
def __getattribute__(self, attr):
if (
not attr.startswith("_")
and inspect.isroutine( # noqa
getattr(DatetimeProperties, attr, None)
)
and attr not in _date_boolean_methods
): # noqa
raise NotImplementedError(
"Boolean selection with this method " "does not make sense."
)
elif attr in _date_boolean_methods:
idx = super().__getattribute__(attr)
return self._parent_frame.loc[idx]
else:
got_attr = super().__getattribute__(attr)
# this allows things like dt.day, dt.month to be selectable
# for the parent frame. assumes they're all properties.
if (
isinstance(got_attr, pd.Series)
and not attr.startswith('_')
and isinstance(getattr(self.__class__, attr), property)
):
return SelectableColumn(self._parent_frame, got_attr)
return got_attr