Skip to content

Commit 370f8c8

Browse files
jtratnerjreback
authored andcommitted
CLN: Add abstract base classes for certain objects
Instead of the `is_series`, `is_generic`, etc methods, can use the ABC* methods to check for certain pandas types. This is useful because it helps decrease issues with circular imports (since they can be easily imported from core/common). The checks take advantage of the `_typ` and `_subtyp` attributes to handle checks. (e.g. `DataFrame` now has `_typ` of `"dataframe"`, etc. See the code for specifics. PERF: register _cacher as an internal name BUG: fixed abstract base class type checking bug in py2.6 DOC: updates for abc type checking PERF: small perf gains in _get_item_cache
1 parent 7b09a3c commit 370f8c8

File tree

10 files changed

+65
-61
lines changed

10 files changed

+65
-61
lines changed

doc/source/release.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,13 @@ and behaviors. Series formerly subclassed directly from ``ndarray``. (:issue:`40
127127
- added _setup_axes to created generic NDFrame structures
128128
- moved methods
129129

130-
- from_axes,_wrap_array,axes,ix,shape,empty,swapaxes,transpose,pop
130+
- from_axes,_wrap_array,axes,ix,loc,iloc,shape,empty,swapaxes,transpose,pop
131131
- __iter__,keys,__contains__,__len__,__neg__,__invert__
132132
- convert_objects,as_blocks,as_matrix,values
133133
- __getstate__,__setstate__ (though compat remains in frame/panel)
134134
- __getattr__,__setattr__
135135
- _indexed_same,reindex_like,reindex,align,where,mask
136+
- fillna,replace
136137
- filter (also added axis argument to selectively filter on a different axis)
137138
- reindex,reindex_axis (which was the biggest change to make generic)
138139
- truncate (moved to become part of ``NDFrame``)
@@ -177,6 +178,9 @@ and behaviors. Series formerly subclassed directly from ``ndarray``. (:issue:`40
177178
values to propogate to a new object from an existing (e.g. name in ``Series`` will follow
178179
more automatically now)
179180

181+
- Internal type checking is now done via a suite of generated classes, allowing ``isinstance(value, klass)``
182+
without having to directly import the klass, courtesy of @jtratner
183+
180184
- Bug in Series update where the parent frame is not updating its cache based on
181185
changes (:issue:`4080`) or types (:issue:`3217`), fillna (:issue:`3386`)
182186

doc/source/v0.13.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ and behaviors. Series formerly subclassed directly from ``ndarray``. (:issue:`40
151151
- __getstate__,__setstate__ (though compat remains in frame/panel)
152152
- __getattr__,__setattr__
153153
- _indexed_same,reindex_like,reindex,align,where,mask
154+
- fillna,replace
154155
- filter (also added axis argument to selectively filter on a different axis)
155156
- reindex,reindex_axis (which was the biggest change to make generic)
156157
- truncate (moved to become part of ``NDFrame``)
@@ -195,6 +196,9 @@ and behaviors. Series formerly subclassed directly from ``ndarray``. (:issue:`40
195196
values to propogate to a new object from an existing (e.g. name in ``Series`` will follow
196197
more automatically now)
197198

199+
- Internal type checking is now done via a suite of generated classes, allowing ``isinstance(value, klass)``
200+
without having to directly import the klass, courtesy of @jtratner
201+
198202
- Bug in Series update where the parent frame is not updating its cached based on
199203
changes (:issue:`4080`) or types (:issue:`3217`), fillna (:issue:`3386`)
200204

pandas/core/common.py

+28-32
Original file line numberDiff line numberDiff line change
@@ -48,30 +48,26 @@ class AmbiguousIndexError(PandasError, KeyError):
4848
_INT64_DTYPE = np.dtype(np.int64)
4949
_DATELIKE_DTYPES = set([np.dtype(t) for t in ['M8[ns]', 'm8[ns]']])
5050

51-
52-
def is_series(obj):
53-
return getattr(obj, '_typ', None) == 'series'
54-
55-
56-
def is_sparse_series(obj):
57-
return getattr(obj, '_subtyp', None) in ('sparse_series', 'sparse_time_series')
58-
59-
60-
def is_sparse_array_like(obj):
61-
return getattr(obj, '_subtyp', None) in ['sparse_array', 'sparse_series', 'sparse_array']
62-
63-
64-
def is_dataframe(obj):
65-
return getattr(obj, '_typ', None) == 'dataframe'
66-
67-
68-
def is_panel(obj):
69-
return getattr(obj, '_typ', None) == 'panel'
70-
71-
72-
def is_generic(obj):
73-
return getattr(obj, '_data', None) is not None
74-
51+
# define abstract base classes to enable isinstance type checking on our objects
52+
def create_pandas_abc_type(name, attr, comp):
53+
@classmethod
54+
def _check(cls, inst):
55+
return getattr(inst, attr, None) in comp
56+
dct = dict(__instancecheck__=_check,
57+
__subclasscheck__=_check)
58+
meta = type("ABCBase", (type,), dct)
59+
return meta(name, tuple(), dct)
60+
61+
ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
62+
ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
63+
ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",))
64+
ABCSparseSeries = create_pandas_abc_type("ABCSparseSeries", "_subtyp", ('sparse_series', 'sparse_time_series'))
65+
ABCSparseArray = create_pandas_abc_type("ABCSparseArray", "_subtyp", ('sparse_array', 'sparse_series'))
66+
67+
class _ABCGeneric(type):
68+
def __instancecheck__(cls, inst):
69+
return hasattr(inst, "_data")
70+
ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {})
7571

7672
def isnull(obj):
7773
"""Detect missing values (NaN in numeric arrays, None/NaN in object arrays)
@@ -94,9 +90,9 @@ def _isnull_new(obj):
9490
if lib.isscalar(obj):
9591
return lib.checknull(obj)
9692

97-
if is_series(obj) or isinstance(obj, np.ndarray):
93+
if isinstance(obj, (ABCSeries, np.ndarray)):
9894
return _isnull_ndarraylike(obj)
99-
elif is_generic(obj):
95+
elif isinstance(obj, ABCGeneric):
10096
return obj.apply(isnull)
10197
elif isinstance(obj, list) or hasattr(obj, '__array__'):
10298
return _isnull_ndarraylike(np.asarray(obj))
@@ -119,9 +115,9 @@ def _isnull_old(obj):
119115
if lib.isscalar(obj):
120116
return lib.checknull_old(obj)
121117

122-
if is_series(obj) or isinstance(obj, np.ndarray):
118+
if isinstance(obj, (ABCSeries, np.ndarray)):
123119
return _isnull_ndarraylike_old(obj)
124-
elif is_generic(obj):
120+
elif isinstance(obj, ABCGeneric):
125121
return obj.apply(_isnull_old)
126122
elif isinstance(obj, list) or hasattr(obj, '__array__'):
127123
return _isnull_ndarraylike_old(np.asarray(obj))
@@ -182,7 +178,7 @@ def _isnull_ndarraylike(obj):
182178
else:
183179
result = np.isnan(obj)
184180

185-
if is_series(obj):
181+
if isinstance(obj, ABCSeries):
186182
from pandas import Series
187183
result = Series(result, index=obj.index, copy=False)
188184

@@ -213,7 +209,7 @@ def _isnull_ndarraylike_old(obj):
213209
else:
214210
result = -np.isfinite(obj)
215211

216-
if is_series(obj):
212+
if isinstance(obj, ABCSeries):
217213
from pandas import Series
218214
result = Series(result, index=obj.index, copy=False)
219215

@@ -1300,7 +1296,7 @@ def convert(td, type):
13001296
return np.array([ convert(v,dtype) for v in value ], dtype='m8[ns]')
13011297

13021298
# deal with numpy not being able to handle certain timedelta operations
1303-
if (isinstance(value, np.ndarray) or is_series(value)) and value.dtype.kind == 'm':
1299+
if isinstance(value, (ABCSeries, np.ndarray)) and value.dtype.kind == 'm':
13041300
if value.dtype != 'timedelta64[ns]':
13051301
value = value.astype('timedelta64[ns]')
13061302
return value
@@ -1384,7 +1380,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
13841380

13851381

13861382
def _is_bool_indexer(key):
1387-
if isinstance(key, np.ndarray) or is_series(key):
1383+
if isinstance(key, (ABCSeries, np.ndarray)):
13881384
if key.dtype == np.object_:
13891385
key = np.asarray(_values_from_object(key))
13901386

pandas/core/generic.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class NDFrame(PandasObject):
3636
copy : boolean, default False
3737
"""
3838
_internal_names = [
39-
'_data', 'name', '_subtyp', '_index', '_default_kind', '_default_fill_value']
39+
'_data', 'name', '_cacher', '_subtyp', '_index', '_default_kind', '_default_fill_value']
4040
_internal_names_set = set(_internal_names)
4141
_prop_attributes = []
4242

@@ -697,14 +697,13 @@ def __getitem__(self, item):
697697

698698
def _get_item_cache(self, item):
699699
cache = self._item_cache
700-
try:
701-
return cache[item]
702-
except Exception:
700+
res = cache.get(item)
701+
if res is None:
703702
values = self._data.get(item)
704703
res = self._box_item_values(item, values)
705704
cache[item] = res
706705
res._cacher = (item,weakref.ref(self))
707-
return res
706+
return res
708707

709708
def _box_item_values(self, key, values):
710709
raise NotImplementedError
@@ -1440,7 +1439,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
14401439

14411440
if len(self._get_axis(axis)) == 0:
14421441
return self
1443-
if isinstance(value, dict) or com.is_series(value):
1442+
if isinstance(value, (dict, com.ABCSeries)):
14441443
if axis == 1:
14451444
raise NotImplementedError('Currently only can fill '
14461445
'with dict/Series column '
@@ -1585,7 +1584,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
15851584
self._consolidate_inplace()
15861585

15871586
def is_dictlike(x):
1588-
return isinstance(x, dict) or com.is_series(x)
1587+
return isinstance(x, (dict, com.ABCSeries))
15891588

15901589
if value is None:
15911590
if not is_dictlike(to_replace):

pandas/core/indexing.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.compat import range, zip
77
import pandas.compat as compat
88
import pandas.core.common as com
9-
from pandas.core.common import _is_bool_indexer, is_series, is_dataframe
9+
from pandas.core.common import _is_bool_indexer, ABCSeries, ABCDataFrame
1010
import pandas.lib as lib
1111

1212
import numpy as np
@@ -111,7 +111,7 @@ def _setitem_with_indexer(self, indexer, value):
111111
if not isinstance(indexer, tuple):
112112
indexer = self._tuplify(indexer)
113113

114-
if is_series(value):
114+
if isinstance(value, ABCSeries):
115115
value = self._align_series(indexer, value)
116116

117117
info_axis = self.obj._info_axis_number
@@ -135,7 +135,7 @@ def setter(item, v):
135135
if _is_list_like(value):
136136

137137
# we have an equal len Frame
138-
if is_dataframe(value) and value.ndim > 1:
138+
if isinstance(value, ABCDataFrame) and value.ndim > 1:
139139

140140
for item in labels:
141141

@@ -176,10 +176,10 @@ def setter(item, v):
176176
if isinstance(indexer, tuple):
177177
indexer = _maybe_convert_ix(*indexer)
178178

179-
if is_series(value):
179+
if isinstance(value, ABCSeries):
180180
value = self._align_series(indexer, value)
181181

182-
elif is_dataframe(value):
182+
elif isinstance(value, ABCDataFrame):
183183
value = self._align_frame(indexer, value)
184184

185185
if isinstance(value, Panel):
@@ -396,7 +396,7 @@ def _getitem_lowerdim(self, tup):
396396

397397
# unfortunately need an odious kludge here because of
398398
# DataFrame transposing convention
399-
if (is_dataframe(section) and i > 0
399+
if (isinstance(section, ABCDataFrame) and i > 0
400400
and len(new_key) == 2):
401401
a, b = new_key
402402
new_key = b, a
@@ -1027,7 +1027,7 @@ def _check_bool_indexer(ax, key):
10271027
# this function assumes that com._is_bool_indexer(key) == True
10281028

10291029
result = key
1030-
if is_series(key) and not key.index.equals(ax):
1030+
if isinstance(key, ABCSeries) and not key.index.equals(ax):
10311031
result = result.reindex(ax)
10321032
mask = com.isnull(result.values)
10331033
if mask.any():
@@ -1042,6 +1042,7 @@ def _check_bool_indexer(ax, key):
10421042

10431043
return result
10441044

1045+
10451046
def _maybe_convert_indices(indices, n):
10461047
""" if we have negative indicies, translate to postive here
10471048
if have indicies that are out-of-bounds, raise an IndexError """
@@ -1063,7 +1064,7 @@ def _maybe_convert_ix(*args):
10631064

10641065
ixify = True
10651066
for arg in args:
1066-
if not (isinstance(arg, (np.ndarray, list)) or is_series(arg)):
1067+
if not isinstance(arg, (np.ndarray, list, ABCSeries)):
10671068
ixify = False
10681069

10691070
if ixify:

pandas/core/internals.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.core.base import PandasObject
99

1010
from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE,
11-
_TD_DTYPE, is_series, is_sparse_series)
11+
_TD_DTYPE, ABCSeries, ABCSparseSeries)
1212
from pandas.core.index import (Index, MultiIndex, _ensure_index,
1313
_handle_legacy_indexes)
1414
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
@@ -2945,7 +2945,7 @@ def form_blocks(arrays, names, axes):
29452945
datetime_items = []
29462946

29472947
for i, (k, v) in enumerate(zip(names, arrays)):
2948-
if isinstance(v, SparseArray) or is_sparse_series(v):
2948+
if isinstance(v, (SparseArray, ABCSparseSeries)):
29492949
sparse_items.append((i, k, v))
29502950
elif issubclass(v.dtype.type, np.floating):
29512951
float_items.append((i, k, v))
@@ -3075,13 +3075,13 @@ def _stack_arrays(tuples, ref_items, dtype):
30753075

30763076
# fml
30773077
def _asarray_compat(x):
3078-
if is_series(x):
3078+
if isinstance(x, ABCSeries):
30793079
return x.values
30803080
else:
30813081
return np.asarray(x)
30823082

30833083
def _shape_compat(x):
3084-
if is_series(x):
3084+
if isinstance(x, ABCSeries):
30853085
return len(x),
30863086
else:
30873087
return x.shape

pandas/core/series.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
_asarray_tuplesafe, is_integer_dtype,
2020
_NS_DTYPE, _TD_DTYPE,
2121
_infer_dtype_from_scalar, is_list_like, _values_from_object,
22-
is_sparse_array_like)
22+
ABCSparseArray)
2323
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
2424
_ensure_index, _handle_legacy_indexes)
2525
from pandas.core.indexing import (
@@ -584,7 +584,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
584584
else:
585585

586586
# handle sparse passed here (and force conversion)
587-
if is_sparse_array_like(data):
587+
if isinstance(data, ABCSparseArray):
588588
data = data.to_dense()
589589

590590
if index is None:
@@ -613,7 +613,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
613613
def from_array(cls, arr, index=None, name=None, copy=False, fastpath=False):
614614

615615
# return a sparse series here
616-
if is_sparse_array_like(arr):
616+
if isinstance(arr, ABCSparseArray):
617617
from pandas.sparse.series import SparseSeries
618618
cls = SparseSeries
619619

pandas/sparse/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ def _maybe_to_dense(obj):
489489

490490

491491
def _maybe_to_sparse(array):
492-
if com.is_sparse_series(array):
492+
if isinstance(array, com.ABCSparseSeries):
493493
array = SparseArray(
494494
array.values, sparse_index=array.sp_index, fill_value=array.fill_value, copy=True)
495495
if not isinstance(array, SparseArray):

pandas/tools/merge.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from pandas.core.internals import (IntBlock, BoolBlock, BlockManager,
1818
make_block, _consolidate)
1919
from pandas.util.decorators import cache_readonly, Appender, Substitution
20-
from pandas.core.common import PandasError
20+
from pandas.core.common import PandasError, ABCSeries
2121
import pandas.core.common as com
2222

2323
import pandas.lib as lib
@@ -304,8 +304,8 @@ def _get_merge_keys(self):
304304
left_drop = []
305305
left, right = self.left, self.right
306306

307-
is_lkey = lambda x: isinstance(x, (np.ndarray, Series)) and len(x) == len(left)
308-
is_rkey = lambda x: isinstance(x, (np.ndarray, Series)) and len(x) == len(right)
307+
is_lkey = lambda x: isinstance(x, (np.ndarray, ABCSeries)) and len(x) == len(left)
308+
is_rkey = lambda x: isinstance(x, (np.ndarray, ABCSeries)) and len(x) == len(right)
309309

310310
# ugh, spaghetti re #733
311311
if _any(self.left_on) and _any(self.right_on):
@@ -941,7 +941,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
941941
if isinstance(sample, DataFrame):
942942
axis = 1 if axis == 0 else 0
943943

944-
self._is_series = isinstance(sample, Series)
944+
self._is_series = isinstance(sample, ABCSeries)
945945
if not ((0 <= axis <= sample.ndim)):
946946
raise AssertionError()
947947

vb_suite/frame_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
# iteritems (monitor no-copying behaviour)
8484

8585
setup = common_setup + """
86-
df = DataFrame(randn(10000, 100))
86+
df = DataFrame(randn(10000, 1000))
8787
8888
def f():
8989
if hasattr(df, '_item_cache'):

0 commit comments

Comments
 (0)