Skip to content

Commit 041368a

Browse files
author
Marco Gorelli
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into tz-localize-categories
2 parents 491c459 + c4de906 commit 041368a

38 files changed

+1087
-1000
lines changed

asv_bench/benchmarks/frame_methods.py

+11
Original file line numberDiff line numberDiff line change
@@ -609,4 +609,15 @@ def time_dataframe_describe(self):
609609
self.df.describe()
610610

611611

612+
class SelectDtypes:
613+
params = [100, 1000]
614+
param_names = ["n"]
615+
616+
def setup(self, n):
617+
self.df = DataFrame(np.random.randn(10, n))
618+
619+
def time_select_dtypes(self, n):
620+
self.df.select_dtypes(include="int")
621+
622+
612623
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/rolling.py

+19
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,25 @@ def peakmem_rolling(self, constructor, window, dtype, method):
2525
getattr(self.roll, method)()
2626

2727

28+
class Apply:
29+
params = (
30+
["DataFrame", "Series"],
31+
[10, 1000],
32+
["int", "float"],
33+
[sum, np.sum, lambda x: np.sum(x) + 5],
34+
[True, False],
35+
)
36+
param_names = ["contructor", "window", "dtype", "function", "raw"]
37+
38+
def setup(self, constructor, window, dtype, function, raw):
39+
N = 10 ** 5
40+
arr = (100 * np.random.random(N)).astype(dtype)
41+
self.roll = getattr(pd, constructor)(arr).rolling(window)
42+
43+
def time_rolling(self, constructor, window, dtype, function, raw):
44+
self.roll.apply(function, raw=raw)
45+
46+
2847
class ExpandingMethods:
2948

3049
params = (

doc/source/development/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ Development
1616
internals
1717
extending
1818
developer
19+
policies
1920
roadmap

doc/source/development/policies.rst

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
.. _develop.policies:
2+
3+
********
4+
Policies
5+
********
6+
7+
.. _policies.version:
8+
9+
Version Policy
10+
~~~~~~~~~~~~~~
11+
12+
.. versionchanged:: 1.0.0
13+
14+
Pandas uses a loose variant of semantic versioning (`SemVer`_) to govern
15+
deprecations, API compatibility, and version numbering.
16+
17+
A pandas release number is made up of ``MAJOR.MINOR.PATCH``.
18+
19+
API breaking changes should only occur in **major** releases. Theses changes
20+
will be documented, with clear guidance on what is changing, why it's changing,
21+
and how to migrate existing code to the new behavior.
22+
23+
Whenever possible, a deprecation path will be provided rather than an outright
24+
breaking change.
25+
26+
Pandas will introduce deprecations in **minor** releases. These deprecations
27+
will preserve the existing behavior while emitting a warning that provide
28+
guidance on:
29+
30+
* How to achieve similar behavior if an alternative is available
31+
* The pandas version in which the deprecation will be enforced.
32+
33+
We will not introduce new deprecations in patch releases.
34+
35+
Deprecations will only be enforced in **major** releases. For example, if a
36+
behavior is deprecated in pandas 1.2.0, it will continue to work, with a
37+
warning, for all releases in the 1.x series. The behavior will change and the
38+
deprecation removed in the next next major release (2.0.0).
39+
40+
.. note::
41+
42+
Pandas will sometimes make *behavior changing* bug fixes, as part of
43+
minor or patch releases. Whether or not a change is a bug fix or an
44+
API-breaking change is a judgement call. We'll do our best, and we
45+
invite you to participate in development discussion on the issue
46+
tracker or mailing list.
47+
48+
These policies do not apply to features marked as **experimental** in the documentation.
49+
Pandas may change the behavior of experimental features at any time.
50+
51+
Python Support
52+
~~~~~~~~~~~~~~
53+
54+
Pandas will only drop support for specific Python versions (e.g. 3.5.x, 3.6.x) in
55+
pandas **major** releases.
56+
57+
.. _SemVer: https://semver.org

doc/source/whatsnew/v0.25.2.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ Other
100100
^^^^^
101101

102102
- Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`)
103-
-
103+
- Fix to ensure that tab-completion in an IPython console does not raise
104+
warnings for deprecated attributes (:issue:`27900`).
104105

105106
.. _whatsnew_0.252.contributors:
106107

doc/source/whatsnew/v1.0.0.rst

+37-5
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,39 @@
33
What's new in 1.0.0 (??)
44
------------------------
55

6-
.. warning::
6+
New Deprecation Policy
7+
~~~~~~~~~~~~~~~~~~~~~~
8+
9+
Starting with Pandas 1.0.0, pandas will adopt a version of `SemVer`_.
10+
11+
Historically, pandas has used a "rolling" deprecation policy, with occasional
12+
outright breaking API changes. Where possible, we would deprecate the behavior
13+
we'd like to change, giving an option to adopt the new behavior (via a keyword
14+
or an alternative method), and issuing a warning for users of the old behavior.
15+
Sometimes, a deprecation was not possible, and we would make an outright API
16+
breaking change.
17+
18+
We'll continue to *introduce* deprecations in major and minor releases (e.g.
19+
1.0.0, 1.1.0, ...). Those deprecations will be *enforced* in the next major
20+
release.
21+
22+
Note that *behavior changes* and *API breaking changes* are not identical. API
23+
breaking changes will only be released in major versions. If we consider a
24+
behavior to be a bug, and fixing that bug induces a behavior change, we'll
25+
release that change in a minor release. This is a sometimes difficult judgment
26+
call that we'll do our best on.
727

8-
Starting with the 0.25.x series of releases, pandas only supports Python 3.5.3 and higher.
9-
See `Dropping Python 2.7 <https://pandas.pydata.org/pandas-docs/version/0.24/install.html#install-dropping-27>`_ for more details.
28+
This doesn't mean that pandas' pace of development will slow down. In the `2019
29+
Pandas User Survey`_, about 95% of the respondents said they considered pandas
30+
"stable enough". This indicates there's an appetite for new features, even if it
31+
comes at the cost of break API. The difference is that now API breaking changes
32+
will be accompanied with a bump in the major version number (e.g. pandas 1.5.1
33+
-> 2.0.0).
34+
35+
See :ref:`policies.version` for more.
36+
37+
.. _2019 Pandas User Survey: http://dev.pandas.io/pandas-blog/2019-pandas-user-survey.html
38+
.. _SemVer: https://semver.org
1039

1140
.. warning::
1241

@@ -37,7 +66,7 @@ Other enhancements
3766
pandas (so it will become an integer or float dtype depending on the presence of missing data).
3867
(:issue:`28368`)
3968
- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`)
40-
69+
- :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`)
4170

4271
Build Changes
4372
^^^^^^^^^^^^^
@@ -122,6 +151,7 @@ Performance improvements
122151
- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`)
123152
- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`)
124153
- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)
154+
- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`)
125155

126156
.. _whatsnew_1000.bug_fixes:
127157

@@ -152,7 +182,8 @@ Datetimelike
152182
- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
153183
- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`)
154184
- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`)
155-
185+
- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`)
186+
-
156187

157188
Timedelta
158189
^^^^^^^^^
@@ -234,6 +265,7 @@ Plotting
234265
- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`)
235266
- Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`)
236267
- Bug where :meth:`DataFrame.boxplot` would not accept a `color` parameter like `DataFrame.plot.box` (:issue:`26214`)
268+
- Bug in the ``xticks`` argument being ignored for :meth:`DataFrame.plot.bar` (:issue:`14119`)
237269
- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
238270

239271
Groupby/resample/rolling

pandas/_libs/lib.pyx

+13-5
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ cimport pandas._libs.util as util
5555
from pandas._libs.util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN
5656

5757
from pandas._libs.tslib import array_to_datetime
58-
from pandas._libs.tslibs.nattype cimport NPY_NAT
59-
from pandas._libs.tslibs.nattype import NaT
58+
from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT
6059
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
6160
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
6261
from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare
@@ -525,9 +524,18 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
525524

526525
# we are either not equal or both nan
527526
# I think None == None will be true here
528-
if not (PyObject_RichCompareBool(x, y, Py_EQ) or
529-
(x is None or is_nan(x)) and (y is None or is_nan(y))):
530-
return False
527+
try:
528+
if not (PyObject_RichCompareBool(x, y, Py_EQ) or
529+
(x is None or is_nan(x)) and (y is None or is_nan(y))):
530+
return False
531+
except TypeError as err:
532+
# Avoid raising TypeError on tzawareness mismatch
533+
# TODO: This try/except can be removed if/when Timestamp
534+
# comparisons are change dto match datetime, see GH#28507
535+
if "tz-naive and tz-aware" in str(err):
536+
return False
537+
raise
538+
531539
return True
532540

533541

pandas/core/algorithms.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
is_period_dtype,
4040
is_scalar,
4141
is_signed_integer_dtype,
42-
is_sparse,
4342
is_timedelta64_dtype,
4443
is_unsigned_integer_dtype,
4544
needs_i8_conversion,
@@ -743,7 +742,7 @@ def value_counts(
743742

744743
else:
745744

746-
if is_extension_array_dtype(values) or is_sparse(values):
745+
if is_extension_array_dtype(values):
747746

748747
# handle Categorical and sparse,
749748
result = Series(values)._values.value_counts(dropna=dropna)
@@ -1623,7 +1622,7 @@ def take_nd(
16231622
out : ndarray or None, default None
16241623
Optional output array, must be appropriate type to hold input and
16251624
fill_value together, if indexer has any -1 value entries; call
1626-
_maybe_promote to determine this type for any fill_value
1625+
maybe_promote to determine this type for any fill_value
16271626
fill_value : any, default np.nan
16281627
Fill value to replace -1 values with
16291628
mask_info : tuple of (ndarray, boolean)
@@ -1644,9 +1643,7 @@ def take_nd(
16441643
if is_extension_array_dtype(arr):
16451644
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
16461645

1647-
if is_sparse(arr):
1648-
arr = arr.to_dense()
1649-
elif isinstance(arr, (ABCIndexClass, ABCSeries)):
1646+
if isinstance(arr, (ABCIndexClass, ABCSeries)):
16501647
arr = arr._values
16511648

16521649
arr = np.asarray(arr)

pandas/core/construction.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
import numpy.ma as ma
1111

12-
from pandas._libs import lib, tslibs
12+
from pandas._libs import lib
1313
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
1414

1515
from pandas.core.dtypes.cast import (
@@ -36,7 +36,7 @@
3636
is_timedelta64_ns_dtype,
3737
pandas_dtype,
3838
)
39-
from pandas.core.dtypes.dtypes import ExtensionDtype, registry
39+
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, registry
4040
from pandas.core.dtypes.generic import (
4141
ABCExtensionArray,
4242
ABCIndexClass,
@@ -275,7 +275,7 @@ def array(
275275
if inferred_dtype == "period":
276276
try:
277277
return period_array(data, copy=copy)
278-
except tslibs.IncompatibleFrequency:
278+
except IncompatibleFrequency:
279279
# We may have a mixture of frequencies.
280280
# We choose to return an ndarray, rather than raising.
281281
pass
@@ -365,7 +365,9 @@ def extract_array(obj, extract_numpy=False):
365365
return obj
366366

367367

368-
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False):
368+
def sanitize_array(
369+
data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False
370+
):
369371
"""
370372
Sanitize input data to an ndarray, copy if specified, coerce to the
371373
dtype if specified.
@@ -486,13 +488,19 @@ def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False
486488
return subarr
487489

488490

489-
def _try_cast(arr, dtype, copy, raise_cast_failure):
491+
def _try_cast(
492+
arr,
493+
dtype: Optional[Union[np.dtype, "ExtensionDtype"]],
494+
copy: bool,
495+
raise_cast_failure: bool,
496+
):
490497
"""
491498
Convert input to numpy ndarray and optionally cast to a given dtype.
492499
493500
Parameters
494501
----------
495-
arr : array-like
502+
arr : ndarray, list, tuple, iterator (catchall)
503+
Excludes: ExtensionArray, Series, Index.
496504
dtype : np.dtype, ExtensionDtype or None
497505
copy : bool
498506
If False, don't copy the data if not needed.
@@ -528,11 +536,13 @@ def _try_cast(arr, dtype, copy, raise_cast_failure):
528536
if is_categorical_dtype(dtype):
529537
# We *do* allow casting to categorical, since we know
530538
# that Categorical is the only array type for 'category'.
539+
dtype = cast(CategoricalDtype, dtype)
531540
subarr = dtype.construct_array_type()(
532541
arr, dtype.categories, ordered=dtype._ordered
533542
)
534543
elif is_extension_array_dtype(dtype):
535544
# create an extension array from its dtype
545+
dtype = cast(ExtensionDtype, dtype)
536546
array_type = dtype.construct_array_type()._from_sequence
537547
subarr = array_type(arr, dtype=dtype, copy=copy)
538548
elif dtype is not None and raise_cast_failure:

pandas/core/dtypes/cast.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ def maybe_convert_objects(values: np.ndarray, convert_numeric: bool = True):
796796
new_values = lib.maybe_convert_numeric(
797797
values, set(), coerce_numeric=True
798798
)
799-
except Exception:
799+
except (ValueError, TypeError):
800800
pass
801801
else:
802802
# if we are all nans then leave me alone
@@ -875,7 +875,7 @@ def soft_convert_objects(
875875
if numeric and is_object_dtype(values.dtype):
876876
try:
877877
converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)
878-
except Exception:
878+
except (ValueError, TypeError):
879879
pass
880880
else:
881881
# If all NaNs, then do not-alter
@@ -953,9 +953,10 @@ def try_datetime(v):
953953
# we might have a sequence of the same-datetimes with tz's
954954
# if so coerce to a DatetimeIndex; if they are not the same,
955955
# then these stay as object dtype, xref GH19671
956+
from pandas._libs.tslibs import conversion
957+
from pandas import DatetimeIndex
958+
956959
try:
957-
from pandas._libs.tslibs import conversion
958-
from pandas import DatetimeIndex
959960

960961
values, tz = conversion.datetime_to_datetime64(v)
961962
return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz)
@@ -1310,9 +1311,8 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False):
13101311
>>> np.array([1.0, 2.0, None], dtype='str')
13111312
array(['1.0', '2.0', 'None'], dtype='<U4')
13121313
1313-
>>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype='str')
1314-
1315-
1314+
>>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype=np.dtype('str'))
1315+
array(['1.0', '2.0', None], dtype=object)
13161316
"""
13171317
subarr = np.array(values, dtype=dtype, copy=copy)
13181318

pandas/core/dtypes/common.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,8 @@ def is_dtype_equal(source, target):
888888

889889

890890
def is_any_int_dtype(arr_or_dtype) -> bool:
891-
"""Check whether the provided array or dtype is of an integer dtype.
891+
"""
892+
Check whether the provided array or dtype is of an integer dtype.
892893
893894
In this function, timedelta64 instances are also considered "any-integer"
894895
type objects and will return True.

0 commit comments

Comments
 (0)