Skip to content

Commit c323daf

Browse files
committed
Merge pull request #5970 from jreback/dtypes_perf
PERF: perf improvments in dtypes/ftypes methods (GH5968)
2 parents a1fff60 + c1a2efa commit c323daf

File tree

5 files changed

+55
-19
lines changed

5 files changed

+55
-19
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ Improvements to existing features
8080
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
8181
limit precision based on the values in the array (:issue:`3401`)
8282
- perf improvements to Series.str.extract (:issue:`5944`)
83+
- perf improvments in ``dtypes/ftypes`` methods (:issue:`5968`)
8384

8485
.. _release.bug_fixes-0.13.1:
8586

pandas/core/frame.py

-8
Original file line numberDiff line numberDiff line change
@@ -1441,14 +1441,6 @@ def info(self, verbose=True, buf=None, max_cols=None):
14411441
lines.append('dtypes: %s' % ', '.join(dtypes))
14421442
_put_lines(buf, lines)
14431443

1444-
@property
1445-
def dtypes(self):
1446-
return self.apply(lambda x: x.dtype, reduce=False)
1447-
1448-
@property
1449-
def ftypes(self):
1450-
return self.apply(lambda x: x.ftype, reduce=False)
1451-
14521444
def transpose(self):
14531445
"""Transpose index and columns"""
14541446
return super(DataFrame, self).transpose(1, 0)

pandas/core/generic.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1748,15 +1748,27 @@ def get_values(self):
17481748
return self.as_matrix()
17491749

17501750
def get_dtype_counts(self):
1751-
""" return the counts of dtypes in this frame """
1751+
""" return the counts of dtypes in this object """
17521752
from pandas import Series
17531753
return Series(self._data.get_dtype_counts())
17541754

17551755
def get_ftype_counts(self):
1756-
""" return the counts of ftypes in this frame """
1756+
""" return the counts of ftypes in this object """
17571757
from pandas import Series
17581758
return Series(self._data.get_ftype_counts())
17591759

1760+
@property
1761+
def dtypes(self):
1762+
""" return the counts of dtypes in this object """
1763+
from pandas import Series
1764+
return Series(self._data.get_dtypes(),index=self._info_axis)
1765+
1766+
@property
1767+
def ftypes(self):
1768+
""" return the counts of ftypes in this object """
1769+
from pandas import Series
1770+
return Series(self._data.get_ftypes(),index=self._info_axis)
1771+
17601772
def as_blocks(self, columns=None):
17611773
"""
17621774
Convert the frame to a dict of dtype -> Constructor Types that each has

pandas/core/internals.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -2157,21 +2157,43 @@ def _get_items(self):
21572157
return self.axes[0]
21582158
items = property(fget=_get_items)
21592159

2160-
def get_dtype_counts(self):
2161-
""" return a dict of the counts of dtypes in BlockManager """
2160+
def _get_counts(self, f):
2161+
""" return a dict of the counts of the function in BlockManager """
21622162
self._consolidate_inplace()
21632163
counts = dict()
21642164
for b in self.blocks:
2165-
counts[b.dtype.name] = counts.get(b.dtype.name, 0) + b.shape[0]
2165+
v = f(b)
2166+
counts[v] = counts.get(v, 0) + b.shape[0]
21662167
return counts
21672168

2168-
def get_ftype_counts(self):
2169-
""" return a dict of the counts of dtypes in BlockManager """
2169+
def _get_types(self, f):
2170+
""" return a list of the f per item """
21702171
self._consolidate_inplace()
2171-
counts = dict()
2172-
for b in self.blocks:
2173-
counts[b.ftype] = counts.get(b.ftype, 0) + b.shape[0]
2174-
return counts
2172+
2173+
# unique
2174+
if self.items.is_unique:
2175+
l = [ None ] * len(self.items)
2176+
for b in self.blocks:
2177+
v = f(b)
2178+
for rl in b.ref_locs:
2179+
l[rl] = v
2180+
return l
2181+
2182+
# non-unique
2183+
ref_locs = self._set_ref_locs()
2184+
return [ f(ref_locs[i][0]) for i, item in enumerate(self.items) ]
2185+
2186+
def get_dtype_counts(self):
2187+
return self._get_counts(lambda b: b.dtype.name)
2188+
2189+
def get_ftype_counts(self):
2190+
return self._get_counts(lambda b: b.ftype)
2191+
2192+
def get_dtypes(self):
2193+
return self._get_types(lambda b: b.dtype)
2194+
2195+
def get_ftypes(self):
2196+
return self._get_types(lambda b: b.ftype)
21752197

21762198
def __getstate__(self):
21772199
block_values = [b.values for b in self.blocks]

vb_suite/frame_methods.py

+9
Original file line numberDiff line numberDiff line change
@@ -326,3 +326,12 @@ def f(K=100):
326326
frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup,
327327
start_date=datetime(2012,1,1))
328328

329+
#----------------------------------------------------------------------
330+
# dtypes
331+
332+
setup = common_setup + """
333+
df = DataFrame(np.random.randn(1000,1000))
334+
"""
335+
frame_dtypes = Benchmark('df.dtypes', setup,
336+
start_date=datetime(2012,1,1))
337+

0 commit comments

Comments
 (0)