Skip to content

PERF: perf improvments in dtypes/ftypes methods (GH5968) #5970

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 16, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ Improvements to existing features
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
limit precision based on the values in the array (:issue:`3401`)
- perf improvements to Series.str.extract (:issue:`5944`)
- perf improvments in ``dtypes/ftypes`` methods (:issue:`5968`)

.. _release.bug_fixes-0.13.1:

Expand Down
8 changes: 0 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,14 +1441,6 @@ def info(self, verbose=True, buf=None, max_cols=None):
lines.append('dtypes: %s' % ', '.join(dtypes))
_put_lines(buf, lines)

@property
def dtypes(self):
return self.apply(lambda x: x.dtype, reduce=False)

@property
def ftypes(self):
return self.apply(lambda x: x.ftype, reduce=False)

def transpose(self):
"""Transpose index and columns"""
return super(DataFrame, self).transpose(1, 0)
Expand Down
16 changes: 14 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1748,15 +1748,27 @@ def get_values(self):
return self.as_matrix()

def get_dtype_counts(self):
""" return the counts of dtypes in this frame """
""" return the counts of dtypes in this object """
from pandas import Series
return Series(self._data.get_dtype_counts())

def get_ftype_counts(self):
""" return the counts of ftypes in this frame """
""" return the counts of ftypes in this object """
from pandas import Series
return Series(self._data.get_ftype_counts())

@property
def dtypes(self):
""" return the counts of dtypes in this object """
from pandas import Series
return Series(self._data.get_dtypes(),index=self._info_axis)

@property
def ftypes(self):
""" return the counts of ftypes in this object """
from pandas import Series
return Series(self._data.get_ftypes(),index=self._info_axis)

def as_blocks(self, columns=None):
"""
Convert the frame to a dict of dtype -> Constructor Types that each has
Expand Down
40 changes: 31 additions & 9 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2157,21 +2157,43 @@ def _get_items(self):
return self.axes[0]
items = property(fget=_get_items)

def get_dtype_counts(self):
""" return a dict of the counts of dtypes in BlockManager """
def _get_counts(self, f):
""" return a dict of the counts of the function in BlockManager """
self._consolidate_inplace()
counts = dict()
for b in self.blocks:
counts[b.dtype.name] = counts.get(b.dtype.name, 0) + b.shape[0]
v = f(b)
counts[v] = counts.get(v, 0) + b.shape[0]
return counts

def get_ftype_counts(self):
""" return a dict of the counts of dtypes in BlockManager """
def _get_types(self, f):
""" return a list of the f per item """
self._consolidate_inplace()
counts = dict()
for b in self.blocks:
counts[b.ftype] = counts.get(b.ftype, 0) + b.shape[0]
return counts

# unique
if self.items.is_unique:
l = [ None ] * len(self.items)
for b in self.blocks:
v = f(b)
for rl in b.ref_locs:
l[rl] = v
return l

# non-unique
ref_locs = self._set_ref_locs()
return [ f(ref_locs[i][0]) for i, item in enumerate(self.items) ]

def get_dtype_counts(self):
return self._get_counts(lambda b: b.dtype.name)

def get_ftype_counts(self):
return self._get_counts(lambda b: b.ftype)

def get_dtypes(self):
return self._get_types(lambda b: b.dtype)

def get_ftypes(self):
return self._get_types(lambda b: b.ftype)

def __getstate__(self):
block_values = [b.values for b in self.blocks]
Expand Down
9 changes: 9 additions & 0 deletions vb_suite/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,3 +326,12 @@ def f(K=100):
frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup,
start_date=datetime(2012,1,1))

#----------------------------------------------------------------------
# dtypes

setup = common_setup + """
df = DataFrame(np.random.randn(1000,1000))
"""
frame_dtypes = Benchmark('df.dtypes', setup,
start_date=datetime(2012,1,1))