Skip to content

Commit 488037b

Browse files
committed
Merge pull request #6153 from jreback/index_perf
PERF: perf regression index construction from series (GH6150)
2 parents f59b6a1 + 20e3fcc commit 488037b

File tree

4 files changed

+22
-11
lines changed

4 files changed

+22
-11
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ Improvements to existing features
104104
- add ability to recognize '%p' format code (am/pm) to date parsers when the specific format
105105
is supplied (:issue:`5361`)
106106
- Fix performance regression in JSON IO (:issue:`5765`)
107+
- performance regression in Index construction from Series (:issue:`6150`)
107108

108109
.. _release.bug_fixes-0.13.1:
109110

pandas/core/index.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pandas.util.decorators import cache_readonly, deprecate
1616
from pandas.core.common import isnull
1717
import pandas.core.common as com
18-
from pandas.core.common import _values_from_object, is_float, is_integer
18+
from pandas.core.common import _values_from_object, is_float, is_integer, ABCSeries
1919
from pandas.core.config import get_option
2020

2121
# simplify
@@ -105,7 +105,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
105105
return subarr
106106

107107
from pandas.tseries.period import PeriodIndex
108-
if isinstance(data, np.ndarray):
108+
if isinstance(data, (np.ndarray, ABCSeries)):
109109
if issubclass(data.dtype.type, np.datetime64):
110110
from pandas.tseries.index import DatetimeIndex
111111
result = DatetimeIndex(data, copy=copy, name=name, **kwargs)
@@ -212,7 +212,7 @@ def _coerce_to_ndarray(cls, data):
212212
cls._scalar_data_error(data)
213213

214214
# other iterable of some kind
215-
if not isinstance(data, (list, tuple)):
215+
if not isinstance(data, (ABCSeries, list, tuple)):
216216
data = list(data)
217217
data = np.asarray(data)
218218
return data
@@ -767,7 +767,7 @@ def asof(self, label):
767767
For a sorted index, return the most recent label up to and including
768768
the passed label. Return NaN if not found
769769
"""
770-
if isinstance(label, (Index, np.ndarray)):
770+
if isinstance(label, (Index, ABCSeries, np.ndarray)):
771771
raise TypeError('%s' % type(label))
772772

773773
if label not in self:
@@ -1535,7 +1535,7 @@ def slice_locs(self, start=None, end=None):
15351535

15361536
# get_loc will return a boolean array for non_uniques
15371537
# if we are not monotonic
1538-
if isinstance(start_slice, np.ndarray):
1538+
if isinstance(start_slice, (ABCSeries, np.ndarray)):
15391539
raise KeyError("cannot peform a slice operation "
15401540
"on a non-unique non-monotonic index")
15411541

pandas/tseries/index.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
1010
is_list_like,_values_from_object, _maybe_box,
11-
notnull)
11+
notnull, ABCSeries)
1212
from pandas.core.index import Index, Int64Index, _Identity
1313
import pandas.compat as compat
1414
from pandas.compat import u
@@ -52,9 +52,9 @@ def f(self):
5252
def _join_i8_wrapper(joinf, with_indexers=True):
5353
@staticmethod
5454
def wrapper(left, right):
55-
if isinstance(left, np.ndarray):
55+
if isinstance(left, (np.ndarray, ABCSeries)):
5656
left = left.view('i8', type=np.ndarray)
57-
if isinstance(right, np.ndarray):
57+
if isinstance(right, (np.ndarray, ABCSeries)):
5858
right = right.view('i8', type=np.ndarray)
5959
results = joinf(left, right)
6060
if with_indexers:
@@ -77,7 +77,7 @@ def wrapper(self, other):
7777
other = DatetimeIndex(other)
7878
elif isinstance(other, compat.string_types):
7979
other = _to_m8(other, tz=self.tz)
80-
elif not isinstance(other, np.ndarray):
80+
elif not isinstance(other, (np.ndarray, ABCSeries)):
8181
other = _ensure_datetime64(other)
8282
result = func(other)
8383

@@ -195,7 +195,7 @@ def __new__(cls, data=None,
195195
tz=tz, normalize=normalize, closed=closed,
196196
infer_dst=infer_dst)
197197

198-
if not isinstance(data, np.ndarray):
198+
if not isinstance(data, (np.ndarray, ABCSeries)):
199199
if np.isscalar(data):
200200
raise ValueError('DatetimeIndex() must be called with a '
201201
'collection of some kind, %s was passed'
@@ -228,6 +228,8 @@ def __new__(cls, data=None,
228228
yearfirst=yearfirst)
229229

230230
if issubclass(data.dtype.type, np.datetime64):
231+
if isinstance(data, ABCSeries):
232+
data = data.values
231233
if isinstance(data, DatetimeIndex):
232234
if tz is None:
233235
tz = data.tz
@@ -1400,7 +1402,7 @@ def freqstr(self):
14001402
nanosecond = _field_accessor('nanosecond', 'ns')
14011403
weekofyear = _field_accessor('weekofyear', 'woy')
14021404
week = weekofyear
1403-
dayofweek = _field_accessor('dayofweek', 'dow',
1405+
dayofweek = _field_accessor('dayofweek', 'dow',
14041406
"The day of the week with Monday=0, Sunday=6")
14051407
weekday = dayofweek
14061408
dayofyear = _field_accessor('dayofyear', 'doy')

vb_suite/ctors.py

+8
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,11 @@
2929
"""
3030

3131
ctor_index_array_string = Benchmark('Index(data)', setup=setup)
32+
33+
# index constructors
34+
setup = common_setup + """
35+
s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]*1000)
36+
"""
37+
index_from_series_ctor = Benchmark('Index(s)', setup=setup)
38+
39+
dtindex_from_series_ctor = Benchmark('DatetimeIndex(s)', setup=setup)

0 commit comments

Comments
 (0)