Skip to content

Commit b8495c3

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into HEAD
2 parents 5d60908 + 6b21e3f commit b8495c3

File tree

26 files changed

+427
-234
lines changed

26 files changed

+427
-234
lines changed

doc/source/whatsnew/v1.1.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Fixed regressions
2020
- Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`)
2121
- Fixed regression in metadata propagation for ``groupby`` iterator (:issue:`37343`)
2222
- Fixed regression in :class:`MultiIndex` constructed from a :class:`DatetimeIndex` not retaining frequency (:issue:`35563`)
23+
- Fixed regression in :meth:`DataFrame.unstack` with columns with integer dtype (:issue:`37115`)
2324
- Fixed regression in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`)
2425
- Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issue:`36003`)
2526
- Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)

doc/source/whatsnew/v1.2.0.rst

+6
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,7 @@ Indexing
660660
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` and a level named ``"0"`` (:issue:`37194`)
661661
- Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`)
662662
- Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`)
663+
- Bug in :meth:`DataFrame.loc` returning empty result when indexer is a slice with negative step size (:issue:`38071`)
663664
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when the index was of ``object`` dtype and the given numeric label was in the index (:issue:`26491`)
664665
- Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from a :class:`MultiIndex` (:issue:`27104`)
665666
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`)
@@ -693,6 +694,7 @@ MultiIndex
693694
- Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
694695
- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`)
695696
- Bug in :meth:`MultiIndex.drop` dropped ``NaN`` values when non existing key was given as input (:issue:`18853`)
697+
- Bug in :meth:`MultiIndex.drop` dropping more values than expected when index has duplicates and is not sorted (:issue:`33494`)
696698

697699
I/O
698700
^^^
@@ -828,7 +830,11 @@ Other
828830
- Bug in :meth:`Index.intersection` with non-matching numeric dtypes casting to ``object`` dtype instead of minimal common dtype (:issue:`38122`)
829831
- Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`)
830832
- Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`)
833+
- Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`)
831834
- Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`)
835+
- Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`)
836+
837+
832838

833839
.. ---------------------------------------------------------------------------
834840

pandas/_libs/reduction.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -367,9 +367,9 @@ def apply_frame_axis0(object frame, object f, object names,
367367

368368
try:
369369
piece = f(chunk)
370-
except Exception:
370+
except Exception as err:
371371
# We can't be more specific without knowing something about `f`
372-
raise InvalidApply('Let this error raise above us')
372+
raise InvalidApply("Let this error raise above us") from err
373373

374374
# Need to infer if low level index slider will cause segfaults
375375
require_slow_apply = i == 0 and piece is chunk

pandas/_testing.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -1456,7 +1456,16 @@ def assert_series_equal(
14561456
check_dtype=check_dtype,
14571457
index_values=np.asarray(left.index),
14581458
)
1459-
elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype):
1459+
elif is_extension_array_dtype_and_needs_i8_conversion(
1460+
left.dtype, right.dtype
1461+
) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
1462+
assert_extension_array_equal(
1463+
left._values,
1464+
right._values,
1465+
check_dtype=check_dtype,
1466+
index_values=np.asarray(left.index),
1467+
)
1468+
elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
14601469
# DatetimeArray or TimedeltaArray
14611470
assert_extension_array_equal(
14621471
left._values,
@@ -1866,6 +1875,20 @@ def assert_copy(iter1, iter2, **eql_kwargs):
18661875
assert elem1 is not elem2, msg
18671876

18681877

1878+
def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) -> bool:
1879+
"""
1880+
Checks that we have the combination of an ExtensionArraydtype and
1881+
a dtype that should be converted to int64
1882+
1883+
Returns
1884+
-------
1885+
bool
1886+
1887+
Related to issue #37609
1888+
"""
1889+
return is_extension_array_dtype(left_dtype) and needs_i8_conversion(right_dtype)
1890+
1891+
18691892
def getCols(k):
18701893
return string.ascii_uppercase[:k]
18711894

pandas/core/arrays/datetimelike.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -936,7 +936,8 @@ def _cmp_method(self, other, op):
936936
return result
937937

938938
other_vals = self._unbox(other)
939-
result = op(self._ndarray, other_vals)
939+
# GH#37462 comparison on i8 values is almost 2x faster than M8/m8
940+
result = op(self._ndarray.view("i8"), other_vals.view("i8"))
940941

941942
o_mask = isna(other)
942943
if self._hasnans | np.any(o_mask):

pandas/core/arrays/interval.py

+41-39
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,11 @@
5757

5858
_interval_shared_docs = {}
5959

60-
_shared_docs_kwargs = dict(
61-
klass="IntervalArray", qualname="arrays.IntervalArray", name=""
62-
)
60+
_shared_docs_kwargs = {
61+
"klass": "IntervalArray",
62+
"qualname": "arrays.IntervalArray",
63+
"name": "",
64+
}
6365

6466

6567
_interval_shared_docs[
@@ -127,14 +129,14 @@
127129

128130
@Appender(
129131
_interval_shared_docs["class"]
130-
% dict(
131-
klass="IntervalArray",
132-
summary="Pandas array for interval data that are closed on the same side.",
133-
versionadded="0.24.0",
134-
name="",
135-
extra_attributes="",
136-
extra_methods="",
137-
examples=textwrap.dedent(
132+
% {
133+
"klass": "IntervalArray",
134+
"summary": "Pandas array for interval data that are closed on the same side.",
135+
"versionadded": "0.24.0",
136+
"name": "",
137+
"extra_attributes": "",
138+
"extra_methods": "",
139+
"examples": textwrap.dedent(
138140
"""\
139141
Examples
140142
--------
@@ -151,7 +153,7 @@
151153
:meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
152154
"""
153155
),
154-
)
156+
}
155157
)
156158
class IntervalArray(IntervalMixin, ExtensionArray):
157159
ndim = 1
@@ -319,9 +321,9 @@ def _from_factorized(cls, values, original):
319321
@classmethod
320322
@Appender(
321323
_interval_shared_docs["from_breaks"]
322-
% dict(
323-
klass="IntervalArray",
324-
examples=textwrap.dedent(
324+
% {
325+
"klass": "IntervalArray",
326+
"examples": textwrap.dedent(
325327
"""\
326328
Examples
327329
--------
@@ -331,7 +333,7 @@ def _from_factorized(cls, values, original):
331333
Length: 3, closed: right, dtype: interval[int64]
332334
"""
333335
),
334-
)
336+
}
335337
)
336338
def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
337339
breaks = maybe_convert_platform_interval(breaks)
@@ -390,17 +392,17 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
390392
@classmethod
391393
@Appender(
392394
_interval_shared_docs["from_arrays"]
393-
% dict(
394-
klass="IntervalArray",
395-
examples=textwrap.dedent(
395+
% {
396+
"klass": "IntervalArray",
397+
"examples": textwrap.dedent(
396398
"""\
397399
>>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
398400
<IntervalArray>
399401
[(0, 1], (1, 2], (2, 3]]
400402
Length: 3, closed: right, dtype: interval[int64]
401403
"""
402404
),
403-
)
405+
}
404406
)
405407
def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
406408
left = maybe_convert_platform_interval(left)
@@ -445,9 +447,9 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
445447
@classmethod
446448
@Appender(
447449
_interval_shared_docs["from_tuples"]
448-
% dict(
449-
klass="IntervalArray",
450-
examples=textwrap.dedent(
450+
% {
451+
"klass": "IntervalArray",
452+
"examples": textwrap.dedent(
451453
"""\
452454
Examples
453455
--------
@@ -457,7 +459,7 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
457459
Length: 2, closed: right, dtype: interval[int64]
458460
"""
459461
),
460-
)
462+
}
461463
)
462464
def from_tuples(cls, data, closed="right", copy=False, dtype=None):
463465
if len(data):
@@ -904,7 +906,7 @@ def take(self, indices, *, allow_fill=False, fill_value=None, axis=None, **kwarg
904906
When `indices` contains negative values other than ``-1``
905907
and `allow_fill` is True.
906908
"""
907-
nv.validate_take(tuple(), kwargs)
909+
nv.validate_take((), kwargs)
908910

909911
fill_left = fill_right = fill_value
910912
if allow_fill:
@@ -1144,9 +1146,9 @@ def mid(self):
11441146

11451147
@Appender(
11461148
_interval_shared_docs["overlaps"]
1147-
% dict(
1148-
klass="IntervalArray",
1149-
examples=textwrap.dedent(
1149+
% {
1150+
"klass": "IntervalArray",
1151+
"examples": textwrap.dedent(
11501152
"""\
11511153
>>> data = [(0, 1), (1, 3), (2, 4)]
11521154
>>> intervals = pd.arrays.IntervalArray.from_tuples(data)
@@ -1156,7 +1158,7 @@ def mid(self):
11561158
Length: 3, closed: right, dtype: interval[int64]
11571159
"""
11581160
),
1159-
)
1161+
}
11601162
)
11611163
def overlaps(self, other):
11621164
if isinstance(other, (IntervalArray, ABCIntervalIndex)):
@@ -1207,9 +1209,9 @@ def closed(self):
12071209

12081210
@Appender(
12091211
_interval_shared_docs["set_closed"]
1210-
% dict(
1211-
klass="IntervalArray",
1212-
examples=textwrap.dedent(
1212+
% {
1213+
"klass": "IntervalArray",
1214+
"examples": textwrap.dedent(
12131215
"""\
12141216
Examples
12151217
--------
@@ -1224,7 +1226,7 @@ def closed(self):
12241226
Length: 3, closed: both, dtype: interval[int64]
12251227
"""
12261228
),
1227-
)
1229+
}
12281230
)
12291231
def set_closed(self, closed):
12301232
if closed not in VALID_CLOSED:
@@ -1360,7 +1362,7 @@ def __arrow_array__(self, type=None):
13601362
"""
13611363

13621364
@Appender(
1363-
_interval_shared_docs["to_tuples"] % dict(return_type="ndarray", examples="")
1365+
_interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}
13641366
)
13651367
def to_tuples(self, na_tuple=True):
13661368
tuples = com.asarray_tuplesafe(zip(self._left, self._right))
@@ -1373,7 +1375,7 @@ def to_tuples(self, na_tuple=True):
13731375

13741376
@Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)
13751377
def repeat(self, repeats, axis=None):
1376-
nv.validate_repeat(tuple(), dict(axis=axis))
1378+
nv.validate_repeat((), {"axis": axis})
13771379
left_repeat = self.left.repeat(repeats)
13781380
right_repeat = self.right.repeat(repeats)
13791381
return self._shallow_copy(left=left_repeat, right=right_repeat)
@@ -1412,9 +1414,9 @@ def repeat(self, repeats, axis=None):
14121414

14131415
@Appender(
14141416
_interval_shared_docs["contains"]
1415-
% dict(
1416-
klass="IntervalArray",
1417-
examples=textwrap.dedent(
1417+
% {
1418+
"klass": "IntervalArray",
1419+
"examples": textwrap.dedent(
14181420
"""\
14191421
>>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])
14201422
>>> intervals
@@ -1423,7 +1425,7 @@ def repeat(self, repeats, axis=None):
14231425
Length: 3, closed: right, dtype: interval[int64]
14241426
"""
14251427
),
1426-
)
1428+
}
14271429
)
14281430
def contains(self, other):
14291431
if isinstance(other, Interval):

pandas/core/dtypes/dtypes.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,14 @@ def __eq__(self, other: Any) -> bool:
399399

400400
def __repr__(self) -> str_type:
401401
if self.categories is None:
402-
data = "None, "
402+
data = "None"
403403
else:
404404
data = self.categories._format_data(name=type(self).__name__)
405-
return f"CategoricalDtype(categories={data}ordered={self.ordered})"
405+
if data is None:
406+
# self.categories is RangeIndex
407+
data = str(self.categories._range)
408+
data = data.rstrip(", ")
409+
return f"CategoricalDtype(categories={data}, ordered={self.ordered})"
406410

407411
@staticmethod
408412
def _hash_categories(categories, ordered: Ordered = True) -> int:

pandas/core/groupby/generic.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
import pandas.core.common as com
6464
from pandas.core.construction import create_series_with_explicit_dtype
6565
from pandas.core.frame import DataFrame
66-
from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame
66+
from pandas.core.generic import NDFrame
6767
from pandas.core.groupby import base
6868
from pandas.core.groupby.groupby import (
6969
GroupBy,
@@ -531,7 +531,7 @@ def _transform_general(self, func, *args, **kwargs):
531531
object.__setattr__(group, "name", name)
532532
res = func(group, *args, **kwargs)
533533

534-
if isinstance(res, (ABCDataFrame, ABCSeries)):
534+
if isinstance(res, (DataFrame, Series)):
535535
res = res._values
536536

537537
results.append(klass(res, index=group.index))
@@ -1087,7 +1087,7 @@ def py_fallback(bvalues: ArrayLike) -> ArrayLike:
10871087
def blk_func(bvalues: ArrayLike) -> ArrayLike:
10881088

10891089
try:
1090-
result, _ = self.grouper._cython_operation(
1090+
result = self.grouper._cython_operation(
10911091
"aggregate", bvalues, how, axis=1, min_count=min_count
10921092
)
10931093
except NotImplementedError:

0 commit comments

Comments
 (0)