From d3fc8533bb813239ef0ff1c24d1bdcfadea715da Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Sep 2020 09:26:35 -0700 Subject: [PATCH] PERF: TimedeltaArray.__iter__ --- asv_bench/benchmarks/timeseries.py | 16 +++++++++++--- pandas/core/arrays/datetimelike.py | 5 ++++- pandas/core/arrays/datetimes.py | 28 +++++++++++++----------- pandas/core/arrays/timedeltas.py | 21 +++++++++++++++++- pandas/tests/arrays/test_datetimelike.py | 9 ++++++++ 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 27c904dda5b45..4ed542b3a28e3 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -3,7 +3,14 @@ import dateutil import numpy as np -from pandas import DataFrame, Series, date_range, period_range, to_datetime +from pandas import ( + DataFrame, + Series, + date_range, + period_range, + timedelta_range, + to_datetime, +) from pandas.tseries.frequencies import infer_freq @@ -121,12 +128,15 @@ def time_convert(self): class Iteration: - params = [date_range, period_range] + params = [date_range, period_range, timedelta_range] param_names = ["time_index"] def setup(self, time_index): N = 10 ** 6 - self.idx = time_index(start="20140101", freq="T", periods=N) + if time_index is timedelta_range: + self.idx = time_index(start=0, freq="T", periods=N) + else: + self.idx = time_index(start="20140101", freq="T", periods=N) self.exit = 10000 def time_iter(self, time_index): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7051507f9a90e..e573d9e8f0504 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -469,7 +469,10 @@ def _box_values(self, values): return lib.map_infer(values, self._box_func) def __iter__(self): - return (self._box_func(v) for v in self.asi8) + if self.ndim > 1: + return (self[n] for n in range(len(self))) + else: + return (self._box_func(v) for v in self.asi8) @property def asi8(self) -> np.ndarray: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b1f98199f9fba..da641265d1d20 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -558,19 +558,21 @@ def __iter__(self): ------ tstamp : Timestamp """ - - # convert in chunks of 10k for efficiency - data = self.asi8 - length = len(self) - chunksize = 10000 - chunks = int(length / chunksize) + 1 - for i in range(chunks): - start_i = i * chunksize - end_i = min((i + 1) * chunksize, length) - converted = ints_to_pydatetime( - data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp" - ) - yield from converted + if self.ndim > 1: + return (self[n] for n in range(len(self))) + else: + # convert in chunks of 10k for efficiency + data = self.asi8 + length = len(self) + chunksize = 10000 + chunks = int(length / chunksize) + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, length) + converted = ints_to_pydatetime( + data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp" + ) + yield from converted def astype(self, dtype, copy=True): # We handle diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 3eaf428bc64b2..fa90f86d328c4 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -16,7 +16,11 @@ ) from pandas._libs.tslibs.conversion import precision_from_unit from pandas._libs.tslibs.fields import get_timedelta_field -from pandas._libs.tslibs.timedeltas import array_to_timedelta64, parse_timedelta_unit +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, + ints_to_pytimedelta, + parse_timedelta_unit, +) from pandas.compat.numpy import function as nv from pandas.core.dtypes.common import ( @@ -328,6 +332,21 @@ def astype(self, dtype, copy=True): return self return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) + def __iter__(self): + if self.ndim > 1: + return (self[n] for n in range(len(self))) + else: + # convert in chunks of 10k for efficiency + data = self.asi8 + length = len(self) + chunksize = 10000 + chunks = int(length / chunksize) + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, length) + converted = ints_to_pytimedelta(data[start_i:end_i], box=True) + yield from converted + # ---------------------------------------------------------------- # Reductions diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index f512b168d2795..dbe3f9f2fd5c5 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -310,6 +310,15 @@ def test_getitem_2d(self, arr1d): expected = arr1d[-1] assert result == expected + def test_iter_2d(self, arr1d): + data2d = arr1d._data[:3, np.newaxis] + arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) + result = list(arr2d) + for x in result: + assert isinstance(x, type(arr1d)) + assert x.ndim == 1 + assert x.dtype == arr1d.dtype + def test_setitem(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 arr = self.array_cls(data, freq="D")