Skip to content

Commit b3829f4

Browse files
committed
ENH: Cythonize AxisProperty to speed up DataFrame.index/columns properties etc., GH #528
1 parent 50c4a08 commit b3829f4

File tree

5 files changed

+56
-53
lines changed

5 files changed

+56
-53
lines changed

pandas/core/frame.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@
2323
import numpy.ma as ma
2424

2525
from pandas.core.common import (isnull, notnull, PandasError, _try_sort,
26-
_default_index, _stringify, _maybe_upcast,
27-
_max_rows, _max_columns)
26+
_default_index, _stringify, _maybe_upcast)
2827
from pandas.core.daterange import DateRange
29-
from pandas.core.generic import NDFrame, AxisProperty
28+
from pandas.core.generic import NDFrame
3029
from pandas.core.index import Index, MultiIndex, NULL_INDEX, _ensure_index
3130
from pandas.core.indexing import _NDFrameIndexer, _maybe_droplevels
3231
from pandas.core.internals import BlockManager, make_block, form_blocks
@@ -894,8 +893,8 @@ def get_dtype_counts(self):
894893
#----------------------------------------------------------------------
895894
# properties for index and columns
896895

897-
columns = AxisProperty(0)
898-
index = AxisProperty(1)
896+
columns = lib.AxisProperty(0)
897+
index = lib.AxisProperty(1)
899898

900899
def as_matrix(self, columns=None):
901900
"""
@@ -3144,7 +3143,7 @@ def clip_lower(self, threshold):
31443143

31453144
def rank(self, axis=0):
31463145
"""
3147-
Compute numericaldata ranks (1 through n) along axis. Equal values are
3146+
Compute numerical data ranks (1 through n) along axis. Equal values are
31483147
assigned a rank that is the average of the ranks of those values
31493148
31503149
Parameters

pandas/core/generic.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,6 @@ def load(cls, path):
2121
class PandasError(Exception):
2222
pass
2323

24-
class AxisProperty(object):
25-
26-
def __init__(self, axis=0):
27-
self.axis = axis
28-
29-
def __get__(self, obj, type=None):
30-
data = getattr(obj, '_data')
31-
return data.axes[self.axis]
32-
33-
def __set__(self, obj, value):
34-
obj._set_axis(self.axis, value)
35-
3624
class PandasObject(Picklable):
3725

3826
_AXIS_NUMBERS = {

pandas/core/panel.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
from pandas.core.indexing import _NDFrameIndexer
1313
from pandas.core.internals import BlockManager, make_block, form_blocks
1414
from pandas.core.frame import DataFrame, _union_indexes
15-
from pandas.core.generic import AxisProperty, NDFrame
15+
from pandas.core.generic import NDFrame
1616
from pandas.core.series import Series
1717
from pandas.util import py3compat
1818
import pandas.core.common as com
19-
import pandas._tseries as _tseries
19+
import pandas._tseries as lib
2020

2121

2222
def _ensure_like_indices(time, panels):
@@ -165,9 +165,9 @@ class Panel(NDFrame):
165165
_default_stat_axis = 1
166166
_het_axis = 0
167167

168-
items = AxisProperty(0)
169-
major_axis = AxisProperty(1)
170-
minor_axis = AxisProperty(2)
168+
items = lib.AxisProperty(0)
169+
major_axis = lib.AxisProperty(1)
170+
minor_axis = lib.AxisProperty(2)
171171

172172
__add__ = _arith_method(operator.add, '__add__')
173173
__sub__ = _arith_method(operator.sub, '__sub__')
@@ -1031,11 +1031,11 @@ def median(self, axis='major', skipna=True):
10311031
def f(arr):
10321032
mask = com.notnull(arr)
10331033
if skipna:
1034-
return _tseries.median(arr[mask])
1034+
return lib.median(arr[mask])
10351035
else:
10361036
if not mask.all():
10371037
return np.nan
1038-
return _tseries.median(arr)
1038+
return lib.median(arr)
10391039
return self.apply(f, axis=axis)
10401040

10411041
_add_docs(median, 'median', 'median')

pandas/src/properties.pyx

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from cpython cimport PyDict_Contains, PyDict_GetItem, PyDict_GetItem
2+
3+
cdef class cache_readonly(object):
4+
5+
cdef readonly:
6+
object fget, name
7+
8+
def __init__(self, func):
9+
self.fget = func
10+
self.name = func.__name__
11+
12+
def __get__(self, obj, type):
13+
if obj is None:
14+
return self.fget
15+
16+
# Get the cache or set a default one if needed
17+
18+
cache = getattr(obj, '_cache', None)
19+
if cache is None:
20+
cache = obj._cache = {}
21+
22+
if PyDict_Contains(cache, self.name):
23+
# not necessary to Py_INCREF
24+
val = <object> PyDict_GetItem(cache, self.name)
25+
return val
26+
else:
27+
val = self.fget(obj)
28+
PyDict_SetItem(cache, self.name, val)
29+
return val
30+
31+
cdef class AxisProperty(object):
32+
cdef:
33+
Py_ssize_t axis
34+
35+
def __init__(self, axis=0):
36+
self.axis = axis
37+
38+
def __get__(self, obj, type):
39+
cdef list axes = obj._data.axes
40+
return axes[self.axis]
41+
42+
def __set__(self, obj, value):
43+
obj._set_axis(self.axis, value)

pandas/src/tseries.pyx

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -468,34 +468,6 @@ def fast_zip(list ndarrays):
468468

469469
return result
470470

471-
cdef class cache_readonly(object):
472-
473-
cdef readonly:
474-
object fget, name
475-
476-
def __init__(self, func):
477-
self.fget = func
478-
self.name = func.__name__
479-
480-
def __get__(self, obj, type):
481-
if obj is None:
482-
return self.fget
483-
484-
# Get the cache or set a default one if needed
485-
486-
cache = getattr(obj, '_cache', None)
487-
if cache is None:
488-
cache = obj._cache = {}
489-
490-
if PyDict_Contains(cache, self.name):
491-
# not necessary to Py_INCREF
492-
val = <object> PyDict_GetItem(cache, self.name)
493-
return val
494-
else:
495-
val = self.fget(obj)
496-
PyDict_SetItem(cache, self.name, val)
497-
return val
498-
499471
cpdef is_array(object o):
500472
return np.PyArray_Check(o)
501473

@@ -525,3 +497,4 @@ include "generated.pyx"
525497
include "parsing.pyx"
526498
include "reduce.pyx"
527499
include "stats.pyx"
500+
include "properties.pyx"

0 commit comments

Comments
 (0)