Skip to content

Commit 1691065

Browse files
committed
Merge remote-tracking branch 'upstream/master' into corr-with-na
2 parents 8e56a3f + 22cf0f5 commit 1691065

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+753
-272
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ Other enhancements
9898
This can be used to set a custom compression level, e.g.,
9999
``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}``
100100
(:issue:`33196`)
101+
- :meth:`Series.update` now accepts objects that can be coerced to a :class:`Series`,
102+
such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`)
101103
- :meth:`~pandas.core.groupby.GroupBy.transform` and :meth:`~pandas.core.groupby.GroupBy.aggregate` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`)
102104
- :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`)
103105
-
@@ -566,12 +568,14 @@ Indexing
566568
- Bug in :meth:`DatetimeIndex.insert` and :meth:`TimedeltaIndex.insert` causing index ``freq`` to be lost when setting an element into an empty :class:`Series` (:issue:33573`)
567569
- Bug in :meth:`Series.__setitem__` with an :class:`IntervalIndex` and a list-like key of integers (:issue:`33473`)
568570
- Bug in :meth:`Series.__getitem__` allowing missing labels with ``np.ndarray``, :class:`Index`, :class:`Series` indexers but not ``list``, these now all raise ``KeyError`` (:issue:`33646`)
571+
- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` where index was assumed to be monotone increasing (:issue:`33756`)
569572

570573
Missing
571574
^^^^^^^
572575
- Calling :meth:`fillna` on an empty Series now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`).
573576
- Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
574577
- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`)
578+
- Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`)
575579

576580
MultiIndex
577581
^^^^^^^^^^
@@ -621,6 +625,9 @@ I/O
621625
unsupported HDF file (:issue:`9539`)
622626
- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`)
623627
- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`)
628+
- Bug in :meth:`~DataFrame.read_feather` was raising an `ArrowIOError` when reading an s3 or http file path (:issue:`29055`)
629+
- Bug in :meth:`read_parquet` was raising a ``FileNotFoundError`` when passed an s3 directory path. (:issue:`26388`)
630+
- Bug in :meth:`~DataFrame.to_parquet` was throwing an ``AttributeError`` when writing a partitioned parquet file to s3 (:issue:`27596`)
624631

625632
Plotting
626633
^^^^^^^^

pandas/_testing.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,7 +1024,12 @@ def _raise(left, right, err_msg):
10241024

10251025

10261026
def assert_extension_array_equal(
1027-
left, right, check_dtype=True, check_less_precise=False, check_exact=False
1027+
left,
1028+
right,
1029+
check_dtype=True,
1030+
check_less_precise=False,
1031+
check_exact=False,
1032+
index_values=None,
10281033
):
10291034
"""
10301035
Check that left and right ExtensionArrays are equal.
@@ -1041,6 +1046,8 @@ def assert_extension_array_equal(
10411046
If int, then specify the digits to compare.
10421047
check_exact : bool, default False
10431048
Whether to compare number exactly.
1049+
index_values : numpy.ndarray, default None
1050+
optional index (shared by both left and right), used in output.
10441051
10451052
Notes
10461053
-----
@@ -1056,24 +1063,31 @@ def assert_extension_array_equal(
10561063
if hasattr(left, "asi8") and type(right) == type(left):
10571064
# Avoid slow object-dtype comparisons
10581065
# np.asarray for case where we have a np.MaskedArray
1059-
assert_numpy_array_equal(np.asarray(left.asi8), np.asarray(right.asi8))
1066+
assert_numpy_array_equal(
1067+
np.asarray(left.asi8), np.asarray(right.asi8), index_values=index_values
1068+
)
10601069
return
10611070

10621071
left_na = np.asarray(left.isna())
10631072
right_na = np.asarray(right.isna())
1064-
assert_numpy_array_equal(left_na, right_na, obj="ExtensionArray NA mask")
1073+
assert_numpy_array_equal(
1074+
left_na, right_na, obj="ExtensionArray NA mask", index_values=index_values
1075+
)
10651076

10661077
left_valid = np.asarray(left[~left_na].astype(object))
10671078
right_valid = np.asarray(right[~right_na].astype(object))
10681079
if check_exact:
1069-
assert_numpy_array_equal(left_valid, right_valid, obj="ExtensionArray")
1080+
assert_numpy_array_equal(
1081+
left_valid, right_valid, obj="ExtensionArray", index_values=index_values
1082+
)
10701083
else:
10711084
_testing.assert_almost_equal(
10721085
left_valid,
10731086
right_valid,
10741087
check_dtype=check_dtype,
10751088
check_less_precise=check_less_precise,
10761089
obj="ExtensionArray",
1090+
index_values=index_values,
10771091
)
10781092

10791093

@@ -1206,12 +1220,17 @@ def assert_series_equal(
12061220
check_less_precise=check_less_precise,
12071221
check_dtype=check_dtype,
12081222
obj=str(obj),
1223+
index_values=np.asarray(left.index),
12091224
)
12101225
elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype):
1211-
assert_extension_array_equal(left._values, right._values)
1226+
assert_extension_array_equal(
1227+
left._values, right._values, index_values=np.asarray(left.index)
1228+
)
12121229
elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype):
12131230
# DatetimeArray or TimedeltaArray
1214-
assert_extension_array_equal(left._values, right._values)
1231+
assert_extension_array_equal(
1232+
left._values, right._values, index_values=np.asarray(left.index)
1233+
)
12151234
else:
12161235
_testing.assert_almost_equal(
12171236
left._values,
@@ -1429,6 +1448,8 @@ def assert_equal(left, right, **kwargs):
14291448

14301449
if isinstance(left, pd.Index):
14311450
assert_index_equal(left, right, **kwargs)
1451+
if isinstance(left, (pd.DatetimeIndex, pd.TimedeltaIndex)):
1452+
assert left.freq == right.freq, (left.freq, right.freq)
14321453
elif isinstance(left, pd.Series):
14331454
assert_series_equal(left, right, **kwargs)
14341455
elif isinstance(left, pd.DataFrame):

pandas/core/arrays/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
463463
return self
464464
return self._set_dtype(dtype)
465465
if is_extension_array_dtype(dtype):
466-
return array(self, dtype=dtype, copy=copy) # type: ignore # GH 28770
466+
return array(self, dtype=dtype, copy=copy)
467467
if is_integer_dtype(dtype) and self.isna().any():
468468
raise ValueError("Cannot convert float NaN to integer")
469469
return np.array(self, dtype=dtype, copy=copy)

pandas/core/arrays/datetimelike.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from datetime import datetime, timedelta
22
import operator
3-
from typing import Any, Sequence, Type, Union, cast
3+
from typing import Any, Sequence, Type, TypeVar, Union, cast
44
import warnings
55

66
import numpy as np
@@ -410,15 +410,15 @@ def ceil(self, freq, ambiguous="raise", nonexistent="raise"):
410410

411411
def _with_freq(self, freq):
412412
"""
413-
Helper to set our freq in-place, returning self to allow method chaining.
413+
Helper to get a view on the same data, with a new freq.
414414
415415
Parameters
416416
----------
417417
freq : DateOffset, None, or "infer"
418418
419419
Returns
420420
-------
421-
self
421+
Same type as self
422422
"""
423423
# GH#29843
424424
if freq is None:
@@ -433,8 +433,12 @@ def _with_freq(self, freq):
433433
assert freq == "infer"
434434
freq = frequencies.to_offset(self.inferred_freq)
435435

436-
self._freq = freq
437-
return self
436+
arr = self.view()
437+
arr._freq = freq
438+
return arr
439+
440+
441+
DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin")
438442

439443

440444
class DatetimeLikeArrayMixin(
@@ -679,7 +683,7 @@ def _concat_same_type(cls, to_concat, axis: int = 0):
679683

680684
return cls._simple_new(values, dtype=dtype, freq=new_freq)
681685

682-
def copy(self):
686+
def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
683687
values = self.asi8.copy()
684688
return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)
685689

pandas/core/arrays/period.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from datetime import timedelta
22
import operator
3-
from typing import Any, Callable, List, Optional, Sequence, Union
3+
from typing import Any, Callable, List, Optional, Sequence, Type, Union
44

55
import numpy as np
66

@@ -20,6 +20,7 @@
2020
period_asfreq_arr,
2121
)
2222
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
23+
from pandas._typing import AnyArrayLike
2324
from pandas.util._decorators import cache_readonly
2425

2526
from pandas.core.dtypes.common import (
@@ -172,8 +173,8 @@ def _simple_new(cls, values: np.ndarray, freq=None, **kwargs) -> "PeriodArray":
172173

173174
@classmethod
174175
def _from_sequence(
175-
cls,
176-
scalars: Sequence[Optional[Period]],
176+
cls: Type["PeriodArray"],
177+
scalars: Union[Sequence[Optional[Period]], AnyArrayLike],
177178
dtype: Optional[PeriodDtype] = None,
178179
copy: bool = False,
179180
) -> "PeriodArray":
@@ -186,7 +187,6 @@ def _from_sequence(
186187
validate_dtype_freq(scalars.dtype, freq)
187188
if copy:
188189
scalars = scalars.copy()
189-
assert isinstance(scalars, PeriodArray) # for mypy
190190
return scalars
191191

192192
periods = np.asarray(scalars, dtype=object)
@@ -772,7 +772,7 @@ def raise_on_incompatible(left, right):
772772

773773

774774
def period_array(
775-
data: Sequence[Optional[Period]],
775+
data: Union[Sequence[Optional[Period]], AnyArrayLike],
776776
freq: Optional[Union[str, Tick]] = None,
777777
copy: bool = False,
778778
) -> PeriodArray:

pandas/core/arrays/timedeltas.py

Lines changed: 11 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,15 @@
2727
pandas_dtype,
2828
)
2929
from pandas.core.dtypes.dtypes import DatetimeTZDtype
30-
from pandas.core.dtypes.generic import (
31-
ABCDataFrame,
32-
ABCIndexClass,
33-
ABCSeries,
34-
ABCTimedeltaIndex,
35-
)
30+
from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex
3631
from pandas.core.dtypes.missing import isna
3732

3833
from pandas.core import nanops
3934
from pandas.core.algorithms import checked_add_with_arr
4035
from pandas.core.arrays import datetimelike as dtl
4136
import pandas.core.common as com
4237
from pandas.core.construction import extract_array
38+
from pandas.core.ops.common import unpack_zerodim_and_defer
4339

4440
from pandas.tseries.frequencies import to_offset
4541
from pandas.tseries.offsets import Tick
@@ -456,12 +452,8 @@ def _addsub_object_array(self, other, op):
456452
f"Cannot add/subtract non-tick DateOffset to {type(self).__name__}"
457453
) from err
458454

455+
@unpack_zerodim_and_defer("__mul__")
459456
def __mul__(self, other):
460-
other = lib.item_from_zerodim(other)
461-
462-
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
463-
return NotImplemented
464-
465457
if is_scalar(other):
466458
# numpy will accept float and int, raise TypeError for others
467459
result = self._data * other
@@ -492,12 +484,9 @@ def __mul__(self, other):
492484

493485
__rmul__ = __mul__
494486

487+
@unpack_zerodim_and_defer("__truediv__")
495488
def __truediv__(self, other):
496489
# timedelta / X is well-defined for timedelta-like or numeric X
497-
other = lib.item_from_zerodim(other)
498-
499-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
500-
return NotImplemented
501490

502491
if isinstance(other, (timedelta, np.timedelta64, Tick)):
503492
other = Timedelta(other)
@@ -553,13 +542,9 @@ def __truediv__(self, other):
553542
result = self._data / other
554543
return type(self)(result)
555544

545+
@unpack_zerodim_and_defer("__rtruediv__")
556546
def __rtruediv__(self, other):
557547
# X / timedelta is defined only for timedelta-like X
558-
other = lib.item_from_zerodim(other)
559-
560-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
561-
return NotImplemented
562-
563548
if isinstance(other, (timedelta, np.timedelta64, Tick)):
564549
other = Timedelta(other)
565550
if other is NaT:
@@ -599,11 +584,9 @@ def __rtruediv__(self, other):
599584
f"Cannot divide {other.dtype} data by {type(self).__name__}"
600585
)
601586

587+
@unpack_zerodim_and_defer("__floordiv__")
602588
def __floordiv__(self, other):
603-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
604-
return NotImplemented
605589

606-
other = lib.item_from_zerodim(other)
607590
if is_scalar(other):
608591
if isinstance(other, (timedelta, np.timedelta64, Tick)):
609592
other = Timedelta(other)
@@ -665,11 +648,9 @@ def __floordiv__(self, other):
665648
dtype = getattr(other, "dtype", type(other).__name__)
666649
raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}")
667650

651+
@unpack_zerodim_and_defer("__rfloordiv__")
668652
def __rfloordiv__(self, other):
669-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
670-
return NotImplemented
671653

672-
other = lib.item_from_zerodim(other)
673654
if is_scalar(other):
674655
if isinstance(other, (timedelta, np.timedelta64, Tick)):
675656
other = Timedelta(other)
@@ -714,45 +695,33 @@ def __rfloordiv__(self, other):
714695
dtype = getattr(other, "dtype", type(other).__name__)
715696
raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}")
716697

698+
@unpack_zerodim_and_defer("__mod__")
717699
def __mod__(self, other):
718700
# Note: This is a naive implementation, can likely be optimized
719-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
720-
return NotImplemented
721-
722-
other = lib.item_from_zerodim(other)
723701
if isinstance(other, (timedelta, np.timedelta64, Tick)):
724702
other = Timedelta(other)
725703
return self - (self // other) * other
726704

705+
@unpack_zerodim_and_defer("__rmod__")
727706
def __rmod__(self, other):
728707
# Note: This is a naive implementation, can likely be optimized
729-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
730-
return NotImplemented
731-
732-
other = lib.item_from_zerodim(other)
733708
if isinstance(other, (timedelta, np.timedelta64, Tick)):
734709
other = Timedelta(other)
735710
return other - (other // self) * self
736711

712+
@unpack_zerodim_and_defer("__divmod__")
737713
def __divmod__(self, other):
738714
# Note: This is a naive implementation, can likely be optimized
739-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
740-
return NotImplemented
741-
742-
other = lib.item_from_zerodim(other)
743715
if isinstance(other, (timedelta, np.timedelta64, Tick)):
744716
other = Timedelta(other)
745717

746718
res1 = self // other
747719
res2 = self - res1 * other
748720
return res1, res2
749721

722+
@unpack_zerodim_and_defer("__rdivmod__")
750723
def __rdivmod__(self, other):
751724
# Note: This is a naive implementation, can likely be optimized
752-
if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)):
753-
return NotImplemented
754-
755-
other = lib.item_from_zerodim(other)
756725
if isinstance(other, (timedelta, np.timedelta64, Tick)):
757726
other = Timedelta(other)
758727

pandas/core/construction.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from pandas._libs import lib
1515
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime
16-
from pandas._typing import ArrayLike, Dtype, DtypeObj
16+
from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj
1717

1818
from pandas.core.dtypes.cast import (
1919
construct_1d_arraylike_from_scalar,
@@ -54,7 +54,9 @@
5454

5555

5656
def array(
57-
data: Sequence[object], dtype: Optional[Dtype] = None, copy: bool = True,
57+
data: Union[Sequence[object], AnyArrayLike],
58+
dtype: Optional[Dtype] = None,
59+
copy: bool = True,
5860
) -> "ExtensionArray":
5961
"""
6062
Create an array.

pandas/core/generic.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9196,6 +9196,9 @@ def truncate(
91969196
if before > after:
91979197
raise ValueError(f"Truncate: {after} must be after {before}")
91989198

9199+
if ax.is_monotonic_decreasing:
9200+
before, after = after, before
9201+
91999202
slicer = [slice(None, None)] * self._AXIS_LEN
92009203
slicer[axis] = slice(before, after)
92019204
result = self.loc[tuple(slicer)]

0 commit comments

Comments
 (0)