Skip to content

Commit 012f859

Browse files
committed
Merge remote-tracking branch 'upstream/master' into move-metadata-to-cfg
2 parents 37e8a82 + 7e4d331 commit 012f859

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+4865
-4234
lines changed

.pre-commit-config.yaml

+16-16
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ repos:
6060
entry: |
6161
(?x)
6262
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
63-
from\ pandas\.core\.common\ import|
64-
from\ pandas\.core\ import\ common|
63+
from\ pandas\.core\.common\ import
64+
|from\ pandas\.core\ import\ common
6565
6666
# Check for imports from collections.abc instead of `from collections import abc`
67-
from\ collections\.abc\ import
67+
|from\ collections\.abc\ import
6868
6969
- id: non-standard-numpy.random-related-imports
7070
name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
@@ -73,24 +73,24 @@ repos:
7373
entry: |
7474
(?x)
7575
# Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
76-
from\ numpy\ import\ random|
77-
from\ numpy.random\ import
76+
from\ numpy\ import\ random
77+
|from\ numpy.random\ import
7878
types: [python]
7979
- id: non-standard-imports-in-tests
8080
name: Check for non-standard imports in test suite
8181
language: pygrep
8282
entry: |
8383
(?x)
8484
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
85-
from\ pandas\._testing\ import|
86-
from\ pandas\ import\ _testing\ as\ tm|
85+
from\ pandas\._testing\ import
86+
|from\ pandas\ import\ _testing\ as\ tm
8787
8888
# No direct imports from conftest
89-
conftest\ import|
90-
import\ conftest
89+
|conftest\ import
90+
|import\ conftest
9191
9292
# Check for use of pandas.testing instead of tm
93-
pd\.testing\.
93+
|pd\.testing\.
9494
types: [python]
9595
files: ^pandas/tests/
9696
- id: incorrect-code-directives
@@ -148,9 +148,9 @@ repos:
148148
name: Check for outdated annotation syntax and missing error codes
149149
entry: |
150150
(?x)
151-
\#\ type:\ (?!ignore)|
152-
\#\ type:\s?ignore(?!\[)|
153-
\)\ ->\ \"
151+
\#\ type:\ (?!ignore)
152+
|\#\ type:\s?ignore(?!\[)
153+
|\)\ ->\ \"
154154
language: pygrep
155155
types: [python]
156156
- id: np-bool
@@ -166,9 +166,9 @@ repos:
166166
files: ^pandas/tests/
167167
exclude: |
168168
(?x)^
169-
pandas/tests/io/excel/test_writers\.py|
170-
pandas/tests/io/pytables/common\.py|
171-
pandas/tests/io/pytables/test_store\.py$
169+
pandas/tests/io/excel/test_writers\.py
170+
|pandas/tests/io/pytables/common\.py
171+
|pandas/tests/io/pytables/test_store\.py$
172172
- repo: https://github.com/asottile/yesqa
173173
rev: v1.2.2
174174
hooks:

asv_bench/benchmarks/series_methods.py

+33-12
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,32 @@ def time_constructor(self, data):
2525

2626
class IsIn:
2727

28-
params = ["int64", "uint64", "object"]
28+
params = ["int64", "uint64", "object", "Int64"]
2929
param_names = ["dtype"]
3030

3131
def setup(self, dtype):
32-
self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype)
32+
N = 10000
33+
self.s = Series(np.random.randint(1, 10, N)).astype(dtype)
3334
self.values = [1, 2]
3435

3536
def time_isin(self, dtypes):
3637
self.s.isin(self.values)
3738

3839

40+
class IsInBoolean:
41+
42+
params = ["boolean", "bool"]
43+
param_names = ["dtype"]
44+
45+
def setup(self, dtype):
46+
N = 10000
47+
self.s = Series(np.random.randint(0, 2, N)).astype(dtype)
48+
self.values = [True, False]
49+
50+
def time_isin(self, dtypes):
51+
self.s.isin(self.values)
52+
53+
3954
class IsInDatetime64:
4055
def setup(self):
4156
dti = date_range(
@@ -59,21 +74,27 @@ def time_isin_empty(self):
5974

6075

6176
class IsInFloat64:
62-
def setup(self):
63-
self.small = Series([1, 2], dtype=np.float64)
64-
self.many_different_values = np.arange(10 ** 6, dtype=np.float64)
65-
self.few_different_values = np.zeros(10 ** 7, dtype=np.float64)
66-
self.only_nans_values = np.full(10 ** 7, np.nan, dtype=np.float64)
6777

68-
def time_isin_many_different(self):
78+
params = [np.float64, "Float64"]
79+
param_names = ["dtype"]
80+
81+
def setup(self, dtype):
82+
N_many = 10 ** 5
83+
N_few = 10 ** 6
84+
self.small = Series([1, 2], dtype=dtype)
85+
self.many_different_values = np.arange(N_many, dtype=np.float64)
86+
self.few_different_values = np.zeros(N_few, dtype=np.float64)
87+
self.only_nans_values = np.full(N_few, np.nan, dtype=np.float64)
88+
89+
def time_isin_many_different(self, dtypes):
6990
# runtime is dominated by creation of the lookup-table
7091
self.small.isin(self.many_different_values)
7192

72-
def time_isin_few_different(self):
93+
def time_isin_few_different(self, dtypes):
7394
# runtime is dominated by creation of the lookup-table
7495
self.small.isin(self.few_different_values)
7596

76-
def time_isin_nan_values(self):
97+
def time_isin_nan_values(self, dtypes):
7798
# runtime is dominated by creation of the lookup-table
7899
self.small.isin(self.few_different_values)
79100

@@ -114,7 +135,7 @@ def time_isin_long_series_long_values_floats(self):
114135

115136
class IsInLongSeriesLookUpDominates:
116137
params = [
117-
["int64", "int32", "float64", "float32", "object"],
138+
["int64", "int32", "float64", "float32", "object", "Int64", "Float64"],
118139
[5, 1000],
119140
["random_hits", "random_misses", "monotone_hits", "monotone_misses"],
120141
]
@@ -141,7 +162,7 @@ def time_isin(self, dtypes, MaxNumber, series_type):
141162

142163
class IsInLongSeriesValuesDominate:
143164
params = [
144-
["int64", "int32", "float64", "float32", "object"],
165+
["int64", "int32", "float64", "float32", "object", "Int64", "Float64"],
145166
["random", "monotone"],
146167
]
147168
param_names = ["dtype", "series_type"]

doc/source/reference/extensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ objects.
4848
api.extensions.ExtensionArray.equals
4949
api.extensions.ExtensionArray.factorize
5050
api.extensions.ExtensionArray.fillna
51+
api.extensions.ExtensionArray.isin
5152
api.extensions.ExtensionArray.isna
5253
api.extensions.ExtensionArray.ravel
5354
api.extensions.ExtensionArray.repeat

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.2
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.2.2
2728
v1.2.1
2829
v1.2.0
2930

doc/source/whatsnew/v1.2.0.rst

+10
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ Other enhancements
286286
- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
287287
- Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`)
288288
- Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`).
289+
This change has been reverted in pandas 1.2.1, and the behaviour to not align DataFrames
290+
is deprecated instead, see the :ref:`the 1.2.1 release notes <whatsnew_121.ufunc_deprecation>`.
289291
- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
290292
- :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`)
291293
- :func:`read_parquet` gained a ``use_nullable_dtypes=True`` option to use nullable dtypes that use ``pd.NA`` as missing value indicator where possible for the resulting DataFrame (default is ``False``, and only applicable for ``engine="pyarrow"``) (:issue:`31242`)
@@ -536,6 +538,14 @@ Deprecations
536538
- The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`)
537539
- The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`)
538540

541+
**Calling NumPy ufuncs on non-aligned DataFrames**
542+
543+
Calling NumPy ufuncs on non-aligned DataFrames changed behaviour in pandas
544+
1.2.0 (to align the inputs before calling the ufunc), but this change is
545+
reverted in pandas 1.2.1. The behaviour to not align is now deprecated instead,
546+
see the :ref:`the 1.2.1 release notes <whatsnew_121.ufunc_deprecation>` for
547+
more details.
548+
539549
.. ---------------------------------------------------------------------------
540550
541551

doc/source/whatsnew/v1.2.1.rst

+75-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.. _whatsnew_121:
22

3-
What's new in 1.2.1 (January 18, 2021)
3+
What's new in 1.2.1 (January 20, 2021)
44
--------------------------------------
55

66
These are the changes in pandas 1.2.1. See :ref:`release` for a full changelog
@@ -42,6 +42,79 @@ As a result, bugs reported as fixed in pandas 1.2.0 related to inconsistent tick
4242

4343
.. ---------------------------------------------------------------------------
4444
45+
.. _whatsnew_121.ufunc_deprecation:
46+
47+
Calling NumPy ufuncs on non-aligned DataFrames
48+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49+
50+
Before pandas 1.2.0, calling a NumPy ufunc on non-aligned DataFrames (or
51+
DataFrame / Series combination) would ignore the indices, only match
52+
the inputs by shape, and use the index/columns of the first DataFrame for
53+
the result:
54+
55+
.. code-block:: python
56+
57+
>>> df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[0, 1])
58+
... df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[1, 2])
59+
>>> df1
60+
a b
61+
0 1 3
62+
1 2 4
63+
>>> df2
64+
a b
65+
1 1 3
66+
2 2 4
67+
68+
>>> np.add(df1, df2)
69+
a b
70+
0 2 6
71+
1 4 8
72+
73+
This contrasts with how other pandas operations work, which first align
74+
the inputs:
75+
76+
.. code-block:: python
77+
78+
>>> df1 + df2
79+
a b
80+
0 NaN NaN
81+
1 3.0 7.0
82+
2 NaN NaN
83+
84+
In pandas 1.2.0, we refactored how NumPy ufuncs are called on DataFrames, and
85+
this started to align the inputs first (:issue:`39184`), as happens in other
86+
pandas operations and as it happens for ufuncs called on Series objects.
87+
88+
For pandas 1.2.1, we restored the previous behaviour to avoid a breaking
89+
change, but the above example of ``np.add(df1, df2)`` with non-aligned inputs
90+
will now to raise a warning, and a future pandas 2.0 release will start
91+
aligning the inputs first (:issue:`39184`). Calling a NumPy ufunc on Series
92+
objects (eg ``np.add(s1, s2)``) already aligns and continues to do so.
93+
94+
To avoid the warning and keep the current behaviour of ignoring the indices,
95+
convert one of the arguments to a NumPy array:
96+
97+
.. code-block:: python
98+
99+
>>> np.add(df1, np.asarray(df2))
100+
a b
101+
0 2 6
102+
1 4 8
103+
104+
To obtain the future behaviour and silence the warning, you can align manually
105+
before passing the arguments to the ufunc:
106+
107+
.. code-block:: python
108+
109+
>>> df1, df2 = df1.align(df2)
110+
>>> np.add(df1, df2)
111+
a b
112+
0 NaN NaN
113+
1 3.0 7.0
114+
2 NaN NaN
115+
116+
.. ---------------------------------------------------------------------------
117+
45118
.. _whatsnew_121.bug_fixes:
46119

47120
Bug fixes
@@ -71,4 +144,4 @@ Other
71144
Contributors
72145
~~~~~~~~~~~~
73146

74-
.. contributors:: v1.2.0..v1.2.1|HEAD
147+
.. contributors:: v1.2.0..v1.2.1

doc/source/whatsnew/v1.2.2.rst

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
.. _whatsnew_122:
2+
3+
What's new in 1.2.2 (February ??, 2021)
4+
---------------------------------------
5+
6+
These are the changes in pandas 1.2.2. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_122.regressions:
14+
15+
Fixed regressions
16+
~~~~~~~~~~~~~~~~~
17+
-
18+
-
19+
20+
.. ---------------------------------------------------------------------------
21+
22+
.. _whatsnew_122.bug_fixes:
23+
24+
Bug fixes
25+
~~~~~~~~~
26+
27+
-
28+
-
29+
30+
.. ---------------------------------------------------------------------------
31+
32+
.. _whatsnew_122.other:
33+
34+
Other
35+
~~~~~
36+
37+
-
38+
-
39+
40+
.. ---------------------------------------------------------------------------
41+
42+
.. _whatsnew_122.contributors:
43+
44+
Contributors
45+
~~~~~~~~~~~~
46+
47+
.. contributors:: v1.2.1..v1.2.2|HEAD

doc/source/whatsnew/v1.3.0.rst

+7-4
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ Performance improvements
206206
~~~~~~~~~~~~~~~~~~~~~~~~
207207
- Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`)
208208
- Performance improvement in :meth:`Series.mean` for nullable data types (:issue:`34814`)
209-
-
209+
- Performance improvement in :meth:`Series.isin` for nullable data types (:issue:`38340`)
210+
- Performance improvement in :meth:`DataFrame.corr` for method=kendall (:issue:`28329`)
210211

211212
.. ---------------------------------------------------------------------------
212213
@@ -243,7 +244,7 @@ Timedelta
243244

244245
Timezones
245246
^^^^^^^^^
246-
247+
- Bug in different ``tzinfo`` objects representing UTC not being treated as equivalent (:issue:`39216`)
247248
-
248249
-
249250

@@ -271,7 +272,7 @@ Interval
271272
^^^^^^^^
272273
- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`)
273274
- Bug in :meth:`IntervalIndex.intersection` returning duplicates when at least one of both Indexes has duplicates which are present in the other (:issue:`38743`)
274-
-
275+
- :meth:`IntervalIndex.union`, :meth:`IntervalIndex.intersection`, :meth:`IntervalIndex.difference`, and :meth:`IntervalIndex.symmetric_difference` now cast to the appropriate dtype instead of raising ``TypeError`` when operating with another :class:`IntervalIndex` with incompatible dtype (:issue:`39267`)
275276

276277
Indexing
277278
^^^^^^^^
@@ -323,7 +324,7 @@ I/O
323324

324325
Period
325326
^^^^^^
326-
327+
- Comparisons of :class:`Period` objects or :class:`Index`, :class:`Series`, or :class:`DataFrame` with mismatched ``PeriodDtype`` now behave like other mismatched-type comparisons, returning ``False`` for equals, ``True`` for not-equal, and raising ``TypeError`` for inequality checks (:issue:`??`)
327328
-
328329
-
329330

@@ -342,6 +343,7 @@ Groupby/resample/rolling
342343
- Fixed bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` causing loss of precision through using Kahan summation (:issue:`38778`)
343344
- Fixed bug in :meth:`DataFrameGroupBy.cumsum`, :meth:`SeriesGroupBy.cumsum`, :meth:`DataFrameGroupBy.mean` and :meth:`SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`)
344345
- Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`)
346+
- Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`)
345347

346348
Reshaping
347349
^^^^^^^^^
@@ -350,6 +352,7 @@ Reshaping
350352
- Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
351353
- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
352354
- Bug in :meth:`DataFrame.join` not assigning values correctly when having :class:`MultiIndex` where at least one dimension is from dtype ``Categorical`` with non-alphabetically sorted categories (:issue:`38502`)
355+
- Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
353356
-
354357

355358
Sparse

0 commit comments

Comments
 (0)