Skip to content

Commit 0b570b1

Browse files
Merge pull request pandas-dev#7 from jbrockmendel/disown3
Rebased pandas-dev#24024
2 parents 4522dfe + eb594e7 commit 0b570b1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+3600
-2831
lines changed

asv_bench/benchmarks/join_merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def setup(self, axis):
5050
self.empty_right = [df, DataFrame()]
5151

5252
def time_concat_series(self, axis):
53-
concat(self.series, axis=axis)
53+
concat(self.series, axis=axis, sort=False)
5454

5555
def time_concat_small_frames(self, axis):
5656
concat(self.small_frames, axis=axis)

asv_bench/benchmarks/panel_ctor.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import warnings
22
from datetime import datetime, timedelta
33

4-
from pandas import DataFrame, Panel, DatetimeIndex, date_range
4+
from pandas import DataFrame, Panel, date_range
55

66

77
class DifferentIndexes(object):
@@ -23,9 +23,9 @@ def time_from_dict(self):
2323
class SameIndexes(object):
2424

2525
def setup(self):
26-
idx = DatetimeIndex(start=datetime(1990, 1, 1),
27-
end=datetime(2012, 1, 1),
28-
freq='D')
26+
idx = date_range(start=datetime(1990, 1, 1),
27+
end=datetime(2012, 1, 1),
28+
freq='D')
2929
df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx)
3030
self.data_frames = dict(enumerate([df] * 100))
3131

@@ -40,10 +40,10 @@ def setup(self):
4040
start = datetime(1990, 1, 1)
4141
end = datetime(2012, 1, 1)
4242
df1 = DataFrame({'a': 0, 'b': 1, 'c': 2},
43-
index=DatetimeIndex(start=start, end=end, freq='D'))
43+
index=date_range(start=start, end=end, freq='D'))
4444
end += timedelta(days=1)
4545
df2 = DataFrame({'a': 0, 'b': 1, 'c': 2},
46-
index=DatetimeIndex(start=start, end=end, freq='D'))
46+
index=date_range(start=start, end=end, freq='D'))
4747
dfs = [df1] * 50 + [df2] * 50
4848
self.data_frames = dict(enumerate(dfs))
4949

asv_bench/benchmarks/reindex.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import numpy as np
22
import pandas.util.testing as tm
3-
from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index,
3+
from pandas import (DataFrame, Series, MultiIndex, Index,
44
date_range)
55
from .pandas_vb_common import lib
66

77

88
class Reindex(object):
99

1010
def setup(self):
11-
rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min')
11+
rng = date_range(start='1/1/1970', periods=10000, freq='1min')
1212
self.df = DataFrame(np.random.rand(10000, 10), index=rng,
1313
columns=range(10))
1414
self.df['foo'] = 'bar'

asv_bench/benchmarks/timedelta.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import datetime
22

33
import numpy as np
4-
from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
5-
Timedelta, TimedeltaIndex, DataFrame
4+
5+
from pandas import (
6+
DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta)
67

78

89
class TimedeltaConstructor(object):
@@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series):
122123
class TimedeltaIndexing(object):
123124

124125
def setup(self):
125-
self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
126-
self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
126+
self.index = timedelta_range(start='1985', periods=1000, freq='D')
127+
self.index2 = timedelta_range(start='1986', periods=1000, freq='D')
127128
self.series = Series(range(1000), index=self.index)
128129
self.timedelta = self.index[500]
129130

asv_bench/benchmarks/timestamp.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import datetime
22

3-
from pandas import Timestamp
4-
import pytz
53
import dateutil
4+
import pytz
5+
6+
from pandas import Timestamp
67

78

89
class TimestampConstruction(object):
@@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq):
4647
self.ts.dayofweek
4748

4849
def time_weekday_name(self, tz, freq):
49-
self.ts.weekday_name
50+
self.ts.day_name
5051

5152
def time_dayofyear(self, tz, freq):
5253
self.ts.dayofyear

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
145145
RET=$(($RET + $?)) ; echo $MSG "DONE"
146146

147147
MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG
148-
invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
148+
invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_util.py assert_raises_regex pandas
149149
RET=$(($RET + $?)) ; echo $MSG "DONE"
150150

151151
# Check that we use pytest.raises only as a context manager

doc/source/whatsnew/v0.24.0.rst

+12-19
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ Other Enhancements
371371
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
372372
- :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
373373
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue:`8839`)
374+
- The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`)
374375
- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
375376

376377
.. _whatsnew_0240.api_breaking:
@@ -673,7 +674,7 @@ changes were made:
673674
* The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified).
674675
* Passing a scalar for ``indices`` is no longer allowed.
675676

676-
- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
677+
- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
677678
- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
678679
- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
679680
- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
@@ -1124,12 +1125,14 @@ Other API Changes
11241125
has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
11251126
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
11261127
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
1128+
- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).
11271129
- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
11281130
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
11291131
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
11301132
- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
11311133
- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
11321134
- :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`)
1135+
- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (issue:`24372`).
11331136

11341137
.. _whatsnew_0240.deprecations:
11351138

@@ -1177,25 +1180,19 @@ Deprecations
11771180

11781181
.. _whatsnew_0240.deprecations.datetimelike_int_ops:
11791182

1180-
Integer Addition/Subtraction with Datetime-like Classes Is Deprecated
1181-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1182-
In the past, users could add or subtract integers or integer-dtypes arrays
1183-
from :class:`Period`, :class:`PeriodIndex`, and in some cases
1184-
:class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`.
1183+
Integer Addition/Subtraction with Datetimes and Timedeltas is Deprecated
1184+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1185+
1186+
In the past, users could—in some cases—add or subtract integers or integer-dtype
1187+
arrays from :class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`.
11851188

11861189
This usage is now deprecated. Instead add or subtract integer multiples of
1187-
the object's ``freq`` attribute. The result of subtraction of :class:`Period`
1188-
objects will be agnostic of the multiplier of the objects' ``freq`` attribute
1189-
(:issue:`21939`, :issue:`23878`).
1190+
the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`).
11901191

11911192
*Previous Behavior*:
11921193

11931194
.. code-block:: ipython
11941195
1195-
In [3]: per = pd.Period('2016Q1')
1196-
In [4]: per + 3
1197-
Out[4]: Period('2016Q4', 'Q-DEC')
1198-
11991196
In [5]: ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour())
12001197
In [6]: ts + 2
12011198
Out[6]: Timestamp('1994-05-06 14:15:16', freq='H')
@@ -1213,12 +1210,6 @@ objects will be agnostic of the multiplier of the objects' ``freq`` attribute
12131210
.. ipython:: python
12141211
:okwarning:
12151212
1216-
per = pd.Period('2016Q1')
1217-
per + 3
1218-
1219-
per = pd.Period('2016Q1')
1220-
per + 3 * per.freq
1221-
12221213
ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour())
12231214
ts + 2 * ts.freq
12241215
@@ -1428,6 +1419,7 @@ Numeric
14281419
- Added ``log10`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`)
14291420
- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`)
14301421
- Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`)
1422+
- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`).
14311423

14321424
Conversion
14331425
^^^^^^^^^^
@@ -1643,6 +1635,7 @@ Sparse
16431635
- Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
16441636
- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
16451637
- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
1638+
- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`)
16461639

16471640
Style
16481641
^^^^^

pandas/_libs/lib.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ def is_scalar(val: object) -> bool:
153153
"""
154154

155155
return (cnp.PyArray_IsAnyScalar(val)
156-
# As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3.
157-
or isinstance(val, (bytes, Fraction, Number))
158-
# We differ from numpy (as of 1.10), which claims that None is
159-
# not scalar in np.isscalar().
156+
# PyArray_IsAnyScalar is always False for bytearrays on Py3
157+
or isinstance(val, (Fraction, Number))
158+
# We differ from numpy, which claims that None is not scalar;
159+
# see np.isscalar
160160
or val is None
161161
or PyDate_Check(val)
162162
or PyDelta_Check(val)

pandas/_libs/tslib.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# -*- coding: utf-8 -*-
22
import cython
3-
from cython import Py_ssize_t
43

54
from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
65
PyDateTime_CheckExact,

pandas/_libs/tslibs/ccalendar.pyx

-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ Cython implementations of functions resembling the stdlib calendar module
55
"""
66

77
import cython
8-
from cython import Py_ssize_t
98

109
from numpy cimport int64_t, int32_t
1110

@@ -151,12 +150,9 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
151150
Assumes the inputs describe a valid date.
152151
"""
153152
cdef:
154-
bint isleap
155153
int32_t doy, dow
156154
int woy
157155

158-
isleap = is_leapyear(year)
159-
160156
doy = get_day_of_year(year, month, day)
161157
dow = dayofweek(year, month, day)
162158

pandas/_libs/tslibs/conversion.pyx

+2-12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# -*- coding: utf-8 -*-
22
import cython
3-
from cython import Py_ssize_t
43

54
import numpy as np
65
cimport numpy as cnp
@@ -1133,7 +1132,7 @@ def normalize_date(dt: object) -> datetime:
11331132

11341133
@cython.wraparound(False)
11351134
@cython.boundscheck(False)
1136-
def normalize_i8_timestamps(int64_t[:] stamps, object tz=None):
1135+
def normalize_i8_timestamps(int64_t[:] stamps, object tz):
11371136
"""
11381137
Normalize each of the (nanosecond) timezone aware timestamps in the given
11391138
array by rounding down to the beginning of the day (i.e. midnight).
@@ -1152,7 +1151,6 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz=None):
11521151
Py_ssize_t n = len(stamps)
11531152
int64_t[:] result = np.empty(n, dtype=np.int64)
11541153

1155-
tz = maybe_get_tz(tz)
11561154
result = _normalize_local(stamps, tz)
11571155

11581156
return result.base # .base to access underlying np.ndarray
@@ -1185,15 +1183,7 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, tzinfo tz):
11851183
npy_datetimestruct dts
11861184
int64_t delta, local_val
11871185

1188-
if is_utc(tz):
1189-
with nogil:
1190-
for i in range(n):
1191-
if stamps[i] == NPY_NAT:
1192-
result[i] = NPY_NAT
1193-
continue
1194-
dt64_to_dtstruct(stamps[i], &dts)
1195-
result[i] = _normalized_stamp(&dts)
1196-
elif is_tzlocal(tz):
1186+
if is_tzlocal(tz):
11971187
for i in range(n):
11981188
if stamps[i] == NPY_NAT:
11991189
result[i] = NPY_NAT

pandas/_libs/tslibs/fields.pyx

+6-12
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def get_time_micros(ndarray[int64_t] dtindex):
3737
ndarray[int64_t] micros
3838

3939
micros = np.mod(dtindex, DAY_SECONDS * 1000000000, dtype=np.int64)
40-
micros //= 1000LL
40+
micros //= 1000
4141
return micros
4242

4343

@@ -48,12 +48,10 @@ def build_field_sarray(int64_t[:] dtindex):
4848
Datetime as int64 representation to a structured array of fields
4949
"""
5050
cdef:
51-
Py_ssize_t i, count = 0
51+
Py_ssize_t i, count = len(dtindex)
5252
npy_datetimestruct dts
5353
ndarray[int32_t] years, months, days, hours, minutes, seconds, mus
5454

55-
count = len(dtindex)
56-
5755
sa_dtype = [('Y', 'i4'), # year
5856
('M', 'i4'), # month
5957
('D', 'i4'), # day
@@ -93,12 +91,11 @@ def get_date_name_field(int64_t[:] dtindex, object field, object locale=None):
9391
name based on requested field (e.g. weekday_name)
9492
"""
9593
cdef:
96-
Py_ssize_t i, count = 0
94+
Py_ssize_t i, count = len(dtindex)
9795
ndarray[object] out, names
9896
npy_datetimestruct dts
9997
int dow
10098

101-
count = len(dtindex)
10299
out = np.empty(count, dtype=object)
103100

104101
if field == 'day_name' or field == 'weekday_name':
@@ -147,7 +144,7 @@ def get_start_end_field(int64_t[:] dtindex, object field,
147144
"""
148145
cdef:
149146
Py_ssize_t i
150-
int count = 0
147+
int count = len(dtindex)
151148
bint is_business = 0
152149
int end_month = 12
153150
int start_month = 1
@@ -162,7 +159,6 @@ def get_start_end_field(int64_t[:] dtindex, object field,
162159
[0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]],
163160
dtype=np.int32)
164161

165-
count = len(dtindex)
166162
out = np.zeros(count, dtype='int8')
167163

168164
if freqstr:
@@ -388,11 +384,10 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
388384
field and return an array of these values.
389385
"""
390386
cdef:
391-
Py_ssize_t i, count = 0
387+
Py_ssize_t i, count = len(dtindex)
392388
ndarray[int32_t] out
393389
npy_datetimestruct dts
394390

395-
count = len(dtindex)
396391
out = np.empty(count, dtype='i4')
397392

398393
if field == 'Y':
@@ -551,11 +546,10 @@ def get_timedelta_field(int64_t[:] tdindex, object field):
551546
field and return an array of these values.
552547
"""
553548
cdef:
554-
Py_ssize_t i, count = 0
549+
Py_ssize_t i, count = len(tdindex)
555550
ndarray[int32_t] out
556551
pandas_timedeltastruct tds
557552

558-
count = len(tdindex)
559553
out = np.empty(count, dtype='i4')
560554

561555
if field == 'days':

pandas/_libs/tslibs/offsets.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# -*- coding: utf-8 -*-
22

33
import cython
4-
from cython import Py_ssize_t
54

65
import time
76
from cpython.datetime cimport (PyDateTime_IMPORT,

pandas/_libs/tslibs/parsing.pyx

-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ import sys
66
import re
77
import time
88

9-
from cython import Py_ssize_t
10-
119
from cpython.datetime cimport datetime
1210

1311

0 commit comments

Comments
 (0)