diff --git a/doc/source/release.rst b/doc/source/release.rst index 0666eb7f88675..6cef0a040485b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -80,6 +80,7 @@ Improvements to existing features - The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently limit precision based on the values in the array (:issue:`3401`) - perf improvements to Series.str.extract (:issue:`5944`) + - perf improvments in ``dtypes/ftypes`` methods (:issue:`5968`) .. _release.bug_fixes-0.13.1: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 61d59e8f93c83..624921f573fbd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1441,14 +1441,6 @@ def info(self, verbose=True, buf=None, max_cols=None): lines.append('dtypes: %s' % ', '.join(dtypes)) _put_lines(buf, lines) - @property - def dtypes(self): - return self.apply(lambda x: x.dtype, reduce=False) - - @property - def ftypes(self): - return self.apply(lambda x: x.ftype, reduce=False) - def transpose(self): """Transpose index and columns""" return super(DataFrame, self).transpose(1, 0) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 549a199e9e3dd..bdd2e3a2683cc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1748,15 +1748,27 @@ def get_values(self): return self.as_matrix() def get_dtype_counts(self): - """ return the counts of dtypes in this frame """ + """ return the counts of dtypes in this object """ from pandas import Series return Series(self._data.get_dtype_counts()) def get_ftype_counts(self): - """ return the counts of ftypes in this frame """ + """ return the counts of ftypes in this object """ from pandas import Series return Series(self._data.get_ftype_counts()) + @property + def dtypes(self): + """ return the counts of dtypes in this object """ + from pandas import Series + return Series(self._data.get_dtypes(),index=self._info_axis) + + @property + def ftypes(self): + """ return the counts of ftypes in this object """ + from pandas import Series + return Series(self._data.get_ftypes(),index=self._info_axis) + def as_blocks(self, columns=None): """ Convert the frame to a dict of dtype -> Constructor Types that each has diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 5c77930a206b7..354eadc7c7ba1 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2157,21 +2157,43 @@ def _get_items(self): return self.axes[0] items = property(fget=_get_items) - def get_dtype_counts(self): - """ return a dict of the counts of dtypes in BlockManager """ + def _get_counts(self, f): + """ return a dict of the counts of the function in BlockManager """ self._consolidate_inplace() counts = dict() for b in self.blocks: - counts[b.dtype.name] = counts.get(b.dtype.name, 0) + b.shape[0] + v = f(b) + counts[v] = counts.get(v, 0) + b.shape[0] return counts - def get_ftype_counts(self): - """ return a dict of the counts of dtypes in BlockManager """ + def _get_types(self, f): + """ return a list of the f per item """ self._consolidate_inplace() - counts = dict() - for b in self.blocks: - counts[b.ftype] = counts.get(b.ftype, 0) + b.shape[0] - return counts + + # unique + if self.items.is_unique: + l = [ None ] * len(self.items) + for b in self.blocks: + v = f(b) + for rl in b.ref_locs: + l[rl] = v + return l + + # non-unique + ref_locs = self._set_ref_locs() + return [ f(ref_locs[i][0]) for i, item in enumerate(self.items) ] + + def get_dtype_counts(self): + return self._get_counts(lambda b: b.dtype.name) + + def get_ftype_counts(self): + return self._get_counts(lambda b: b.ftype) + + def get_dtypes(self): + return self._get_types(lambda b: b.dtype) + + def get_ftypes(self): + return self._get_types(lambda b: b.ftype) def __getstate__(self): block_values = [b.values for b in self.blocks] diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index fd03d512125e7..88d773319817d 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -326,3 +326,12 @@ def f(K=100): frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup, start_date=datetime(2012,1,1)) +#---------------------------------------------------------------------- +# dtypes + +setup = common_setup + """ +df = DataFrame(np.random.randn(1000,1000)) +""" +frame_dtypes = Benchmark('df.dtypes', setup, + start_date=datetime(2012,1,1)) +