Skip to content

PERF: speed-up DateFrame.itertuples() with namedtuples #11625

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,16 @@ def j(self):
self.df3[0]


class frame_itertuples(object):

def setup(self):
self.df = DataFrame(np.random.randn(50000, 10))

def time_frame_itertuples(self):
for row in self.df.itertuples():
pass


class frame_mask_bools(object):
goal_time = 0.2

Expand Down
2 changes: 2 additions & 0 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@
# extlinks alias
extlinks = {'issue': ('https://github.com/pydata/pandas/issues/%s',
'GH'),
'pr': ('https://github.com/pydata/pandas/pull/%s',
'GH-PR'),
'wiki': ('https://github.com/pydata/pandas/wiki/%s',
'wiki ')}

Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ API changes
- Indexing with a null key will raise a ``TypeError``, instead of a ``ValueError`` (:issue:`11356`)
- ``Series.sort_index()`` now correctly handles the ``inplace`` option (:issue:`11402`)

- ``DataFrame.itertuples()`` now returns ``namedtuple`` objects, when possible. (:issue:`11269`)
- ``DataFrame.itertuples()`` now returns ``namedtuple`` objects, when possible. (:issue:`11269`, :pr:`11625`)
- ``Series.ptp`` will now ignore missing values by default (:issue:`11163`)

.. _whatsnew_0171.deprecations:
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import pandas.computation.expressions as expressions
from pandas.computation.eval import eval as _eval
from numpy import percentile as _quantile
from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u,
from pandas.compat import(range, map, zip, lrange, lmap, lzip, StringIO, u,
OrderedDict, raise_with_traceback)
from pandas import compat
from pandas.sparse.array import SparseArray
Expand Down Expand Up @@ -664,7 +664,7 @@ def itertuples(self, index=True, name="Pandas"):
index : boolean, default True
If True, return the index as the first element of the tuple.
name : string, default "Pandas"
The name of the returned namedtuple.
The name of the returned namedtuples or None to return regular tuples.

Notes
-----
Expand Down Expand Up @@ -703,13 +703,13 @@ def itertuples(self, index=True, name="Pandas"):

# Python 3 supports at most 255 arguments to constructor, and
# things get slow with this many fields in Python 2
if len(self.columns) + index < 256:
if name is not None and len(self.columns) + index < 256:
# `rename` is unsupported in Python 2.6
try:
itertuple = collections.namedtuple(
name, fields+list(self.columns), rename=True)
return (itertuple(*row) for row in zip(*arrays))
except:
return map(itertuple._make, zip(*arrays))
except Exception:
pass

# fallback to regular tuples
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5545,6 +5545,8 @@ def test_itertuples(self):
dfaa = df[['a', 'a']]
self.assertEqual(list(dfaa.itertuples()), [(0, 1, 1), (1, 2, 2), (2, 3, 3)])

self.assertEqual(repr(list(df.itertuples(name=None))), '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')

tup = next(df.itertuples(name='TestName'))

# no support for field renaming in Python 2.6, regular tuples are returned
Expand Down