Skip to content

Commit fe82c37

Browse files
committed
Merge branch '1.2.x' of https://github.com/pandas-dev/pandas into 1.2.x
2 parents 852e2d1 + 90f3797 commit fe82c37

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+648
-140
lines changed

asv_bench/benchmarks/series_methods.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ def setup(self):
8787
self.vals_short = np.arange(2).astype(object)
8888
self.vals_long = np.arange(10 ** 5).astype(object)
8989
# because of nans floats are special:
90-
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype(object)
91-
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(object)
90+
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float_)).astype(object)
91+
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float_).astype(object)
9292

9393
def time_isin_nans(self):
9494
# if nan-objects are different objects,

doc/source/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@
409409
if pattern is None:
410410
intersphinx_mapping = {
411411
"dateutil": ("https://dateutil.readthedocs.io/en/latest/", None),
412-
"matplotlib": ("https://matplotlib.org/", None),
412+
"matplotlib": ("https://matplotlib.org/stable/", None),
413413
"numpy": ("https://numpy.org/doc/stable/", None),
414414
"pandas-gbq": ("https://pandas-gbq.readthedocs.io/en/latest/", None),
415415
"py": ("https://pylib.readthedocs.io/en/latest/", None),

doc/source/user_guide/cookbook.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ Often it's useful to obtain the lower (or upper) triangular form of a correlatio
14101410
14111411
corr_mat.where(mask)
14121412
1413-
The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the ``distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>``__ matrix for a ``DataFrame`` object.
1413+
The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>`__ matrix for a ``DataFrame`` object.
14141414

14151415
.. ipython:: python
14161416

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Version 1.2
1616
.. toctree::
1717
:maxdepth: 2
1818

19+
v1.2.3
1920
v1.2.2
2021
v1.2.1
2122
v1.2.0

doc/source/whatsnew/v1.2.2.rst

+11-14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.. _whatsnew_122:
22

3-
What's new in 1.2.2 (February ??, 2021)
3+
What's new in 1.2.2 (February 09, 2021)
44
---------------------------------------
55

66
These are the changes in pandas 1.2.2. See :ref:`release` for a full changelog
@@ -17,10 +17,16 @@ Fixed regressions
1717

1818
- Fixed regression in :func:`read_excel` that caused it to raise ``AttributeError`` when checking version of older xlrd versions (:issue:`38955`)
1919
- Fixed regression in :class:`DataFrame` constructor reordering element when construction from datetime ndarray with dtype not ``"datetime64[ns]"`` (:issue:`39422`)
20+
- Fixed regression in :meth:`DataFrame.astype` and :meth:`Series.astype` not casting to bytes dtype (:issue:`39474`)
2021
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
2122
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
2223
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
23-
-
24+
- Fixed regression in :meth:`Categorical.astype` casting to incorrect dtype when ``np.int32`` is passed to dtype argument (:issue:`39402`)
25+
- Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`)
26+
- Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`)
27+
- Fixed regression in :meth:`~DataFrame.groupby` or :meth:`~DataFrame.resample` when aggregating an all-NaN or numeric object dtype column (:issue:`39329`)
28+
- Fixed regression in :meth:`.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
29+
- Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`)
2430

2531
.. ---------------------------------------------------------------------------
2632
@@ -30,17 +36,8 @@ Bug fixes
3036
~~~~~~~~~
3137

3238
- :func:`pandas.read_excel` error message when a specified ``sheetname`` does not exist is now uniform across engines (:issue:`39250`)
33-
-
34-
35-
.. ---------------------------------------------------------------------------
36-
37-
.. _whatsnew_122.other:
38-
39-
Other
40-
~~~~~
41-
42-
-
43-
-
39+
- Fixed bug in :func:`pandas.read_excel` producing incorrect results when the engine ``openpyxl`` is used and the excel file is missing or has incorrect dimension information; the fix requires ``openpyxl`` >= 3.0.0, prior versions may still fail (:issue:`38956`, :issue:`39001`)
40+
- Fixed bug in :func:`pandas.read_excel` sometimes producing a ``DataFrame`` with trailing rows of ``np.nan`` when the engine ``openpyxl`` is used (:issue:`39181`)
4441

4542
.. ---------------------------------------------------------------------------
4643
@@ -49,4 +46,4 @@ Other
4946
Contributors
5047
~~~~~~~~~~~~
5148

52-
.. contributors:: v1.2.1..v1.2.2|HEAD
49+
.. contributors:: v1.2.1..v1.2.2

doc/source/whatsnew/v1.2.3.rst

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
.. _whatsnew_123:
2+
3+
What's new in 1.2.3 (March ??, 2021)
4+
------------------------------------
5+
6+
These are the changes in pandas 1.2.3. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_123.regressions:
14+
15+
Fixed regressions
16+
~~~~~~~~~~~~~~~~~
17+
18+
- Fixed regression in :meth:`~DataFrame.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`)
19+
- Fixed regression in nullable integer unary ops propagating mask on assignment (:issue:`39943`)
20+
- Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`)
21+
- Fixed regression in :meth:`~DataFrame.to_json` failing to use ``compression`` with URL-like paths that are internally opened in binary mode or with user-provided file objects that are opened in binary mode (:issue:`39985`)
22+
- Fixed regression in :meth:`~Series.sort_index` and :meth:`~DataFrame.sort_index`,
23+
which exited with an ungraceful error when having kwarg ``ascending=None`` passed (:issue:`39434`).
24+
Passing ``ascending=None`` is still considered invalid,
25+
and the new error message suggests a proper usage
26+
(``ascending`` must be a boolean or a list-like boolean).
27+
28+
.. ---------------------------------------------------------------------------
29+
30+
.. _whatsnew_123.bug_fixes:
31+
32+
Bug fixes
33+
~~~~~~~~~
34+
35+
-
36+
-
37+
38+
.. ---------------------------------------------------------------------------
39+
40+
.. _whatsnew_123.other:
41+
42+
Other
43+
~~~~~
44+
45+
-
46+
-
47+
48+
.. ---------------------------------------------------------------------------
49+
50+
.. _whatsnew_123.contributors:
51+
52+
Contributors
53+
~~~~~~~~~~~~
54+
55+
.. contributors:: v1.2.2..v1.2.3|HEAD

environment.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ channels:
33
- conda-forge
44
dependencies:
55
# required
6-
- numpy>=1.16.5, <1.20 # gh-39513
6+
- numpy>=1.16.5
77
- python=3
88
- python-dateutil>=2.7.3
99
- pytz
@@ -113,5 +113,5 @@ dependencies:
113113
- tabulate>=0.8.3 # DataFrame.to_markdown
114114
- natsort # DataFrame.sort_values
115115
- pip:
116-
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master
117-
- git+https://github.com/numpy/numpydoc
116+
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103
117+
- numpydoc < 1.2 # 2021-02-09 1.2dev breaking CI

pandas/compat/_optional.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"matplotlib": "2.2.3",
1616
"numexpr": "2.6.8",
1717
"odfpy": "1.3.0",
18-
"openpyxl": "2.5.7",
18+
"openpyxl": "2.6.0",
1919
"pandas_gbq": "0.12.0",
2020
"pyarrow": "0.15.0",
2121
"pytest": "5.0.1",

pandas/conftest.py

+26
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,32 @@ def any_nullable_int_dtype(request):
11881188
return request.param
11891189

11901190

1191+
@pytest.fixture(params=tm.ALL_INT_DTYPES + tm.ALL_EA_INT_DTYPES)
1192+
def any_int_or_nullable_int_dtype(request):
1193+
"""
1194+
Parameterized fixture for any nullable integer dtype.
1195+
1196+
* int
1197+
* 'int8'
1198+
* 'uint8'
1199+
* 'int16'
1200+
* 'uint16'
1201+
* 'int32'
1202+
* 'uint32'
1203+
* 'int64'
1204+
* 'uint64'
1205+
* 'UInt8'
1206+
* 'Int8'
1207+
* 'UInt16'
1208+
* 'Int16'
1209+
* 'UInt32'
1210+
* 'Int32'
1211+
* 'UInt64'
1212+
* 'Int64'
1213+
"""
1214+
return request.param
1215+
1216+
11911217
@pytest.fixture(params=tm.ALL_EA_INT_DTYPES + tm.FLOAT_EA_DTYPES)
11921218
def any_numeric_dtype(request):
11931219
"""

pandas/core/aggregation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ def transform(
456456

457457
# Functions that transform may return empty Series/DataFrame
458458
# when the dtype is not appropriate
459-
if isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty:
459+
if isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty and not obj.empty:
460460
raise ValueError("Transform function failed")
461461
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
462462
obj.index

pandas/core/arrays/categorical.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -427,16 +427,16 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
427427
else:
428428
# GH8628 (PERF): astype category codes instead of astyping array
429429
try:
430-
astyped_cats = self.categories.astype(dtype=dtype, copy=copy)
430+
new_cats = np.asarray(self.categories)
431+
new_cats = new_cats.astype(dtype=dtype, copy=copy)
431432
except (
432433
TypeError, # downstream error msg for CategoricalIndex is misleading
433434
ValueError,
434435
):
435436
msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
436437
raise ValueError(msg)
437438

438-
astyped_cats = extract_array(astyped_cats, extract_numpy=True)
439-
result = take_1d(astyped_cats, libalgos.ensure_platform_int(self._codes))
439+
result = take_1d(new_cats, libalgos.ensure_platform_int(self._codes))
440440

441441
return result
442442

pandas/core/arrays/integer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -348,13 +348,13 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
348348
super().__init__(values, mask, copy=copy)
349349

350350
def __neg__(self):
351-
return type(self)(-self._data, self._mask)
351+
return type(self)(-self._data, self._mask.copy())
352352

353353
def __pos__(self):
354354
return self
355355

356356
def __abs__(self):
357-
return type(self)(np.abs(self._data), self._mask)
357+
return type(self)(np.abs(self._data), self._mask.copy())
358358

359359
@classmethod
360360
def _from_sequence(

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def __len__(self) -> int:
142142
return len(self._data)
143143

144144
def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
145-
return type(self)(~self._data, self._mask)
145+
return type(self)(~self._data, self._mask.copy())
146146

147147
def to_numpy(
148148
self, dtype=None, copy: bool = False, na_value: Scalar = lib.no_default

pandas/core/dtypes/cast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,7 @@ def soft_convert_objects(
11371137
# bound of nanosecond-resolution 64-bit integers.
11381138
try:
11391139
values = lib.maybe_convert_objects(values, convert_datetime=True)
1140-
except OutOfBoundsDatetime:
1140+
except (OutOfBoundsDatetime, ValueError):
11411141
pass
11421142

11431143
if timedelta and is_object_dtype(values.dtype):

pandas/core/frame.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -3179,6 +3179,9 @@ def _setitem_array(self, key, value):
31793179
key = check_bool_indexer(self.index, key)
31803180
indexer = key.nonzero()[0]
31813181
self._check_setitem_copy()
3182+
if isinstance(value, DataFrame):
3183+
# GH#39931 reindex since iloc does not align
3184+
value = value.reindex(self.index.take(indexer))
31823185
self.iloc[indexer] = value
31833186
else:
31843187
if isinstance(value, DataFrame):
@@ -3951,8 +3954,8 @@ def lookup(self, row_labels, col_labels) -> np.ndarray:
39513954
.. deprecated:: 1.2.0
39523955
DataFrame.lookup is deprecated,
39533956
use DataFrame.melt and DataFrame.loc instead.
3954-
For an example see :meth:`~pandas.DataFrame.lookup`
3955-
in the user guide.
3957+
For further details see
3958+
:ref:`Looking up values by index/column labels <indexing.lookup>`.
39563959
39573960
Parameters
39583961
----------
@@ -5479,7 +5482,7 @@ def sort_index(
54795482
self,
54805483
axis=0,
54815484
level=None,
5482-
ascending: bool = True,
5485+
ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True,
54835486
inplace: bool = False,
54845487
kind: str = "quicksort",
54855488
na_position: str = "last",
@@ -5500,7 +5503,7 @@ def sort_index(
55005503
and 1 identifies the columns.
55015504
level : int or level name or list of ints or list of level names
55025505
If not None, sort on values in specified index level(s).
5503-
ascending : bool or list of bools, default True
5506+
ascending : bool or list-like of bools, default True
55045507
Sort ascending vs. descending. When the index is a MultiIndex the
55055508
sort direction can be controlled for each level individually.
55065509
inplace : bool, default False

pandas/core/generic.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
from pandas.errors import AbstractMethodError, InvalidIndexError
5757
from pandas.util._decorators import doc, rewrite_axis_style_signature
5858
from pandas.util._validators import (
59+
validate_ascending,
5960
validate_bool_kwarg,
6061
validate_fillna_kwargs,
6162
validate_percentile,
@@ -4518,7 +4519,7 @@ def sort_index(
45184519
self,
45194520
axis=0,
45204521
level=None,
4521-
ascending: bool_t = True,
4522+
ascending: Union[Union[bool_t, int], Sequence[Union[bool_t, int]]] = True,
45224523
inplace: bool_t = False,
45234524
kind: str = "quicksort",
45244525
na_position: str = "last",
@@ -4529,6 +4530,8 @@ def sort_index(
45294530

45304531
inplace = validate_bool_kwarg(inplace, "inplace")
45314532
axis = self._get_axis_number(axis)
4533+
ascending = validate_ascending(ascending)
4534+
45324535
target = self._get_axis(axis)
45334536

45344537
indexer = get_indexer_indexer(
@@ -6438,6 +6441,7 @@ def fillna(
64386441
return result.__finalize__(self, method="fillna")
64396442

64406443
@final
6444+
@doc(klass=_shared_doc_kwargs["klass"])
64416445
def ffill(
64426446
self: FrameOrSeries,
64436447
axis=None,
@@ -6460,6 +6464,7 @@ def ffill(
64606464
pad = ffill
64616465

64626466
@final
6467+
@doc(klass=_shared_doc_kwargs["klass"])
64636468
def bfill(
64646469
self: FrameOrSeries,
64656470
axis=None,

pandas/core/groupby/generic.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -1078,11 +1078,18 @@ def py_fallback(bvalues: ArrayLike) -> ArrayLike:
10781078
# in the operation. We un-split here.
10791079
result = result._consolidate()
10801080
assert isinstance(result, (Series, DataFrame)) # for mypy
1081-
assert len(result._mgr.blocks) == 1
1081+
mgr = result._mgr
1082+
assert isinstance(mgr, BlockManager)
10821083

10831084
# unwrap DataFrame to get array
1084-
result = result._mgr.blocks[0].values
1085-
return result
1085+
if len(mgr.blocks) != 1:
1086+
# We've split an object block! Everything we've assumed
1087+
# about a single block input returning a single block output
1088+
# is a lie. See eg GH-39329
1089+
return mgr.as_array()
1090+
else:
1091+
result = mgr.blocks[0].values
1092+
return result
10861093

10871094
def blk_func(bvalues: ArrayLike) -> ArrayLike:
10881095

pandas/core/indexes/multi.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1286,16 +1286,18 @@ def _format_native_types(self, na_rep="nan", **kwargs):
12861286

12871287
# go through the levels and format them
12881288
for level, level_codes in zip(self.levels, self.codes):
1289-
level = level._format_native_types(na_rep=na_rep, **kwargs)
1289+
level_strs = level._format_native_types(na_rep=na_rep, **kwargs)
12901290
# add nan values, if there are any
12911291
mask = level_codes == -1
12921292
if mask.any():
1293-
nan_index = len(level)
1294-
level = np.append(level, na_rep)
1293+
nan_index = len(level_strs)
1294+
# numpy 1.21 deprecated implicit string casting
1295+
level_strs = level_strs.astype(str)
1296+
level_strs = np.append(level_strs, na_rep)
12951297
assert not level_codes.flags.writeable # i.e. copy is needed
12961298
level_codes = level_codes.copy() # make writeable
12971299
level_codes[mask] = nan_index
1298-
new_levels.append(level)
1300+
new_levels.append(level_strs)
12991301
new_codes.append(level_codes)
13001302

13011303
if len(new_levels) == 1:

0 commit comments

Comments
 (0)