Skip to content

Commit 4bf00d8

Browse files
Merge pull request pandas-dev#10 from jbrockmendel/disown8
Another rebase
2 parents 09c2c91 + e91bc09 commit 4bf00d8

File tree

21 files changed

+281
-64
lines changed

21 files changed

+281
-64
lines changed

asv_bench/benchmarks/period.py

+31-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from pandas import (
22
DataFrame, Period, PeriodIndex, Series, date_range, period_range)
3+
from pandas.tseries.frequencies import to_offset
34

45

56
class PeriodProperties(object):
@@ -35,25 +36,48 @@ def time_asfreq(self, freq):
3536
self.per.asfreq('A')
3637

3738

39+
class PeriodConstructor(object):
40+
params = [['D'], [True, False]]
41+
param_names = ['freq', 'is_offset']
42+
43+
def setup(self, freq, is_offset):
44+
if is_offset:
45+
self.freq = to_offset(freq)
46+
else:
47+
self.freq = freq
48+
49+
def time_period_constructor(self, freq, is_offset):
50+
Period('2012-06-01', freq=freq)
51+
52+
3853
class PeriodIndexConstructor(object):
3954

40-
params = ['D']
41-
param_names = ['freq']
55+
params = [['D'], [True, False]]
56+
param_names = ['freq', 'is_offset']
4257

43-
def setup(self, freq):
58+
def setup(self, freq, is_offset):
4459
self.rng = date_range('1985', periods=1000)
4560
self.rng2 = date_range('1985', periods=1000).to_pydatetime()
4661
self.ints = list(range(2000, 3000))
47-
48-
def time_from_date_range(self, freq):
62+
self.daily_ints = date_range('1/1/2000', periods=1000,
63+
freq=freq).strftime('%Y%m%d').map(int)
64+
if is_offset:
65+
self.freq = to_offset(freq)
66+
else:
67+
self.freq = freq
68+
69+
def time_from_date_range(self, freq, is_offset):
4970
PeriodIndex(self.rng, freq=freq)
5071

51-
def time_from_pydatetime(self, freq):
72+
def time_from_pydatetime(self, freq, is_offset):
5273
PeriodIndex(self.rng2, freq=freq)
5374

54-
def time_from_ints(self, freq):
75+
def time_from_ints(self, freq, is_offset):
5576
PeriodIndex(self.ints, freq=freq)
5677

78+
def time_from_ints_daily(self, freq, is_offset):
79+
PeriodIndex(self.daily_ints, freq=freq)
80+
5781

5882
class DataFramePeriodColumn(object):
5983

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,7 @@ Performance Improvements
13201320
- Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`)
13211321
- Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators
13221322
without internally allocating lists of all elements (:issue:`20783`)
1323+
- Improved performance of :class:`Period` constructor, additionally benefitting ``PeriodArray`` and ``PeriodIndex`` creation (:issue:`24084` and :issue:`24118`)
13231324

13241325
.. _whatsnew_0240.docs:
13251326

pandas/_libs/tslibs/parsing.pyx

+14-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,20 @@ cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
4747

4848
# ----------------------------------------------------------------------
4949

50+
_get_option = None
51+
52+
53+
def get_option(param):
54+
""" Defer import of get_option to break an import cycle that caused
55+
significant performance degradation in Period construction. See
56+
GH#24118 for details
57+
"""
58+
global _get_option
59+
if _get_option is None:
60+
from pandas.core.config import get_option
61+
_get_option = get_option
62+
return _get_option(param)
63+
5064

5165
def parse_datetime_string(date_string, freq=None, dayfirst=False,
5266
yearfirst=False, **kwargs):
@@ -117,7 +131,6 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
117131
freq = freq.rule_code
118132

119133
if dayfirst is None or yearfirst is None:
120-
from pandas.core.config import get_option
121134
if dayfirst is None:
122135
dayfirst = get_option("display.date_dayfirst")
123136
if yearfirst is None:

pandas/conftest.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,12 @@ def join_type(request):
275275

276276

277277
@pytest.fixture
278-
def datapath(request):
278+
def strict_data_files(pytestconfig):
279+
return pytestconfig.getoption("--strict-data-files")
280+
281+
282+
@pytest.fixture
283+
def datapath(strict_data_files):
279284
"""Get the path to a data file.
280285
281286
Parameters
@@ -297,7 +302,7 @@ def datapath(request):
297302
def deco(*args):
298303
path = os.path.join(BASE_PATH, *args)
299304
if not os.path.exists(path):
300-
if request.config.getoption("--strict-data-files"):
305+
if strict_data_files:
301306
msg = "Could not find file {} and --strict-data-files is set."
302307
raise ValueError(msg.format(path))
303308
else:

pandas/core/arrays/datetimelike.py

+4
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,10 @@ def __setitem__(
499499

500500
if is_list_like(value):
501501
is_slice = isinstance(key, slice)
502+
503+
if lib.is_scalar(key):
504+
raise ValueError("setting an array element with a sequence.")
505+
502506
if (not is_slice
503507
and len(key) != len(value)
504508
and not com.is_bool_indexer(key)):

pandas/core/base.py

+37-9
Original file line numberDiff line numberDiff line change
@@ -973,10 +973,16 @@ def _ndarray_values(self):
973973
def empty(self):
974974
return not self.size
975975

976-
def max(self):
976+
def max(self, axis=None, skipna=True):
977977
"""
978978
Return the maximum value of the Index.
979979
980+
Parameters
981+
----------
982+
axis : int, optional
983+
For compatibility with NumPy. Only 0 or None are allowed.
984+
skipna : bool, default True
985+
980986
Returns
981987
-------
982988
scalar
@@ -1004,22 +1010,36 @@ def max(self):
10041010
>>> idx.max()
10051011
('b', 2)
10061012
"""
1007-
return nanops.nanmax(self.values)
1013+
nv.validate_minmax_axis(axis)
1014+
return nanops.nanmax(self._values, skipna=skipna)
10081015

1009-
def argmax(self, axis=None):
1016+
def argmax(self, axis=None, skipna=True):
10101017
"""
10111018
Return a ndarray of the maximum argument indexer.
10121019
1020+
Parameters
1021+
----------
1022+
axis : {None}
1023+
Dummy argument for consistency with Series
1024+
skipna : bool, default True
1025+
10131026
See Also
10141027
--------
10151028
numpy.ndarray.argmax
10161029
"""
1017-
return nanops.nanargmax(self.values)
1030+
nv.validate_minmax_axis(axis)
1031+
return nanops.nanargmax(self._values, skipna=skipna)
10181032

1019-
def min(self):
1033+
def min(self, axis=None, skipna=True):
10201034
"""
10211035
Return the minimum value of the Index.
10221036
1037+
Parameters
1038+
----------
1039+
axis : {None}
1040+
Dummy argument for consistency with Series
1041+
skipna : bool, default True
1042+
10231043
Returns
10241044
-------
10251045
scalar
@@ -1047,17 +1067,25 @@ def min(self):
10471067
>>> idx.min()
10481068
('a', 1)
10491069
"""
1050-
return nanops.nanmin(self.values)
1070+
nv.validate_minmax_axis(axis)
1071+
return nanops.nanmin(self._values, skipna=skipna)
10511072

1052-
def argmin(self, axis=None):
1073+
def argmin(self, axis=None, skipna=True):
10531074
"""
10541075
Return a ndarray of the minimum argument indexer.
10551076
1077+
Parameters
1078+
----------
1079+
axis : {None}
1080+
Dummy argument for consistency with Series
1081+
skipna : bool, default True
1082+
10561083
See Also
10571084
--------
10581085
numpy.ndarray.argmin
10591086
"""
1060-
return nanops.nanargmin(self.values)
1087+
nv.validate_minmax_axis(axis)
1088+
return nanops.nanargmin(self._values, skipna=skipna)
10611089

10621090
def tolist(self):
10631091
"""
@@ -1110,7 +1138,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
11101138
if func is None:
11111139
raise TypeError("{klass} cannot perform the operation {op}".format(
11121140
klass=self.__class__.__name__, op=name))
1113-
return func(**kwds)
1141+
return func(skipna=skipna, **kwds)
11141142

11151143
def _map_values(self, mapper, na_action=None):
11161144
"""

pandas/core/dtypes/missing.py

+2
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ def _isna_ndarraylike(obj):
198198
else:
199199
values = obj
200200
result = values.isna()
201+
elif isinstance(obj, ABCDatetimeArray):
202+
return obj.isna()
201203
elif is_string_dtype(dtype):
202204
# Working around NumPy ticket 1542
203205
shape = values.shape

pandas/core/indexes/datetimelike.py

+25-12
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin):
4141
# override DatetimeLikeArrayMixin method
4242
copy = Index.copy
4343
view = Index.view
44+
__setitem__ = Index.__setitem__
4445

4546
# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
4647
# properties there. They can be made into cache_readonly for Index
@@ -324,35 +325,41 @@ def tolist(self):
324325
"""
325326
return list(self.astype(object))
326327

327-
def min(self, axis=None, *args, **kwargs):
328+
def min(self, axis=None, skipna=True, *args, **kwargs):
328329
"""
329330
Return the minimum value of the Index or minimum along
330331
an axis.
331332
332333
See Also
333334
--------
334335
numpy.ndarray.min
336+
Series.min : Return the minimum value in a Series.
335337
"""
336338
nv.validate_min(args, kwargs)
337339
nv.validate_minmax_axis(axis)
338340

339-
try:
340-
i8 = self.asi8
341+
if not len(self):
342+
return self._na_value
341343

344+
i8 = self.asi8
345+
try:
342346
# quick check
343347
if len(i8) and self.is_monotonic:
344348
if i8[0] != iNaT:
345349
return self._box_func(i8[0])
346350

347351
if self.hasnans:
348-
min_stamp = self[~self._isnan].asi8.min()
352+
if skipna:
353+
min_stamp = self[~self._isnan].asi8.min()
354+
else:
355+
return self._na_value
349356
else:
350357
min_stamp = i8.min()
351358
return self._box_func(min_stamp)
352359
except ValueError:
353360
return self._na_value
354361

355-
def argmin(self, axis=None, *args, **kwargs):
362+
def argmin(self, axis=None, skipna=True, *args, **kwargs):
356363
"""
357364
Returns the indices of the minimum values along an axis.
358365
@@ -369,41 +376,47 @@ def argmin(self, axis=None, *args, **kwargs):
369376
i8 = self.asi8
370377
if self.hasnans:
371378
mask = self._isnan
372-
if mask.all():
379+
if mask.all() or not skipna:
373380
return -1
374381
i8 = i8.copy()
375382
i8[mask] = np.iinfo('int64').max
376383
return i8.argmin()
377384

378-
def max(self, axis=None, *args, **kwargs):
385+
def max(self, axis=None, skipna=True, *args, **kwargs):
379386
"""
380387
Return the maximum value of the Index or maximum along
381388
an axis.
382389
383390
See Also
384391
--------
385392
numpy.ndarray.max
393+
Series.max : Return the maximum value in a Series.
386394
"""
387395
nv.validate_max(args, kwargs)
388396
nv.validate_minmax_axis(axis)
389397

390-
try:
391-
i8 = self.asi8
398+
if not len(self):
399+
return self._na_value
392400

401+
i8 = self.asi8
402+
try:
393403
# quick check
394404
if len(i8) and self.is_monotonic:
395405
if i8[-1] != iNaT:
396406
return self._box_func(i8[-1])
397407

398408
if self.hasnans:
399-
max_stamp = self[~self._isnan].asi8.max()
409+
if skipna:
410+
max_stamp = self[~self._isnan].asi8.max()
411+
else:
412+
return self._na_value
400413
else:
401414
max_stamp = i8.max()
402415
return self._box_func(max_stamp)
403416
except ValueError:
404417
return self._na_value
405418

406-
def argmax(self, axis=None, *args, **kwargs):
419+
def argmax(self, axis=None, skipna=True, *args, **kwargs):
407420
"""
408421
Returns the indices of the maximum values along an axis.
409422
@@ -420,7 +433,7 @@ def argmax(self, axis=None, *args, **kwargs):
420433
i8 = self.asi8
421434
if self.hasnans:
422435
mask = self._isnan
423-
if mask.all():
436+
if mask.all() or not skipna:
424437
return -1
425438
i8 = i8.copy()
426439
i8[mask] = 0

pandas/core/indexes/range.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,14 @@ def _minmax(self, meth):
297297

298298
return self._start + self._step * no_steps
299299

300-
def min(self):
300+
def min(self, axis=None, skipna=True):
301301
"""The minimum value of the RangeIndex"""
302+
nv.validate_minmax_axis(axis)
302303
return self._minmax('min')
303304

304-
def max(self):
305+
def max(self, axis=None, skipna=True):
305306
"""The maximum value of the RangeIndex"""
307+
nv.validate_minmax_axis(axis)
306308
return self._minmax('max')
307309

308310
def argsort(self, *args, **kwargs):

0 commit comments

Comments
 (0)