Skip to content

Commit 93b0e97

Browse files
Merge branch 'pandas-dev:main' into csv-write-comments
2 parents 4bcb904 + ef95005 commit 93b0e97

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+590
-690
lines changed

ci/code_checks.sh

-11
Original file line numberDiff line numberDiff line change
@@ -105,17 +105,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
105105
pandas.errors.UnsupportedFunctionCall \
106106
pandas.test \
107107
pandas.NaT \
108-
pandas.arrays.TimedeltaArray \
109-
pandas.Period.asfreq \
110-
pandas.Period.now \
111-
pandas.arrays.PeriodArray \
112-
pandas.CategoricalDtype.categories \
113-
pandas.CategoricalDtype.ordered \
114-
pandas.Categorical.dtype \
115-
pandas.Categorical.categories \
116-
pandas.Categorical.ordered \
117-
pandas.Categorical.codes \
118-
pandas.Categorical.__array__ \
119108
pandas.SparseDtype \
120109
pandas.DatetimeTZDtype.unit \
121110
pandas.DatetimeTZDtype.tz \

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ Reshaping
491491
- Bug in :func:`merge_asof` raising ``KeyError`` for extension dtypes (:issue:`52904`)
492492
- Bug in :func:`merge_asof` raising ``ValueError`` for data backed by read-only ndarrays (:issue:`53513`)
493493
- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
494+
- Bug in :meth:`DataFrame.combine_first` ignoring other's columns if ``other`` is empty (:issue:`53792`)
494495
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
495496
- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)
496497
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)

pandas/_libs/groupby.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ def group_skew(
950950
isna_entry = _treat_as_na(val, False)
951951

952952
if not isna_entry:
953-
# Based on RunningSats::Push from
953+
# Based on RunningStats::Push from
954954
# https://www.johndcook.com/blog/skewness_kurtosis/
955955
n1 = nobs[lab, j]
956956
n = n1 + 1

pandas/_libs/tslibs/period.pyx

+11
Original file line numberDiff line numberDiff line change
@@ -1925,6 +1925,12 @@ cdef class _Period(PeriodMixin):
19251925
Returns
19261926
-------
19271927
resampled : Period
1928+
1929+
Examples
1930+
--------
1931+
>>> period = pd.Period('2023-1-1', freq='D')
1932+
>>> period.asfreq('H')
1933+
Period('2023-01-01 23:00', 'H')
19281934
"""
19291935
freq = self._maybe_convert_freq(freq)
19301936
how = validate_end_alias(how)
@@ -2460,6 +2466,11 @@ cdef class _Period(PeriodMixin):
24602466
----------
24612467
freq : str, BaseOffset
24622468
Frequency to use for the returned period.
2469+
2470+
Examples
2471+
--------
2472+
>>> pd.Period.now('H') # doctest: +SKIP
2473+
Period('2023-06-12 11:00', 'H')
24632474
"""
24642475
return Period(datetime.now(), freq=freq)
24652476

pandas/_libs/tslibs/timedeltas.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
974974
"Only resolutions 's', 'ms', 'us', 'ns' are supported."
975975
)
976976

977-
td_base._value= value
977+
td_base._value = value
978978
td_base._is_populated = 0
979979
td_base._creso = reso
980980
return td_base

pandas/core/arrays/arrow/array.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -1131,13 +1131,7 @@ def take(
11311131
it's called by :meth:`Series.reindex`, or any other method
11321132
that causes realignment, with a `fill_value`.
11331133
"""
1134-
# TODO: Remove once we got rid of the (indices < 0) check
1135-
if not is_array_like(indices):
1136-
indices_array = np.asanyarray(indices)
1137-
else:
1138-
# error: Incompatible types in assignment (expression has type
1139-
# "Sequence[int]", variable has type "ndarray")
1140-
indices_array = indices # type: ignore[assignment]
1134+
indices_array = np.asanyarray(indices)
11411135

11421136
if len(self._pa_array) == 0 and (indices_array >= 0).any():
11431137
raise IndexError("cannot do a non-empty take")

pandas/core/arrays/categorical.py

+47
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,15 @@ def __init__(
481481
def dtype(self) -> CategoricalDtype:
482482
"""
483483
The :class:`~pandas.api.types.CategoricalDtype` for this instance.
484+
485+
Examples
486+
--------
487+
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
488+
>>> cat
489+
['a', 'b']
490+
Categories (2, object): ['a' < 'b']
491+
>>> cat.dtype
492+
CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object)
484493
"""
485494
return self._dtype
486495

@@ -751,6 +760,9 @@ def categories(self) -> Index:
751760
752761
Examples
753762
--------
763+
764+
For Series:
765+
754766
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
755767
>>> ser.cat.categories
756768
Index(['a', 'b', 'c'], dtype='object')
@@ -759,6 +771,12 @@ def categories(self) -> Index:
759771
>>> ser = pd.Series(raw_cat)
760772
>>> ser.cat.categories
761773
Index(['b', 'c', 'd'], dtype='object')
774+
775+
For Categorical:
776+
777+
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
778+
>>> cat.categories
779+
Index(['a', 'b'], dtype='object')
762780
"""
763781
return self.dtype.categories
764782

@@ -769,6 +787,9 @@ def ordered(self) -> Ordered:
769787
770788
Examples
771789
--------
790+
791+
For Series:
792+
772793
>>> ser = pd.Series(["a", "b", "c", "a"], dtype="category")
773794
>>> ser.cat.ordered
774795
False
@@ -777,6 +798,16 @@ def ordered(self) -> Ordered:
777798
>>> ser = pd.Series(raw_cat)
778799
>>> ser.cat.ordered
779800
True
801+
802+
For Categorical:
803+
804+
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
805+
>>> cat.ordered
806+
True
807+
808+
>>> cat = pd.Categorical(['a', 'b'], ordered=False)
809+
>>> cat.ordered
810+
False
780811
"""
781812
return self.dtype.ordered
782813

@@ -795,6 +826,12 @@ def codes(self) -> np.ndarray:
795826
-------
796827
ndarray[int]
797828
A non-writable view of the `codes` array.
829+
830+
Examples
831+
--------
832+
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
833+
>>> cat.codes
834+
array([0, 1], dtype=int8)
798835
"""
799836
v = self._codes.view()
800837
v.flags.writeable = False
@@ -1492,6 +1529,16 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
14921529
A numpy array of either the specified dtype or,
14931530
if dtype==None (default), the same dtype as
14941531
categorical.categories.dtype.
1532+
1533+
Examples
1534+
--------
1535+
1536+
>>> cat = pd.Categorical(['a', 'b'], ordered=True)
1537+
1538+
The following calls ``cat.__array__``
1539+
1540+
>>> np.asarray(cat)
1541+
array(['a', 'b'], dtype=object)
14951542
"""
14961543
ret = take_nd(self.categories._values, self._codes)
14971544
if dtype and np.dtype(dtype) != self.categories.dtype:

pandas/core/arrays/period.py

+8
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,14 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc]
163163
164164
The `freq` indicates the span covered by each element of the array.
165165
All elements in the PeriodArray have the same `freq`.
166+
167+
Examples
168+
--------
169+
>>> pd.arrays.PeriodArray(pd.PeriodIndex(['2023-01-01',
170+
... '2023-01-02'], freq='D'))
171+
<PeriodArray>
172+
['2023-01-01', '2023-01-02']
173+
Length: 2, dtype: period[D]
166174
"""
167175

168176
# array priority higher than numpy scalars

pandas/core/arrays/sparse/array.py

+10-14
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
maybe_box_datetimelike,
4343
)
4444
from pandas.core.dtypes.common import (
45-
is_array_like,
4645
is_bool_dtype,
4746
is_integer,
4847
is_list_like,
@@ -428,19 +427,16 @@ def __init__(
428427
# Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
429428
data = np.array([], dtype=dtype) # type: ignore[arg-type]
430429

431-
if not is_array_like(data):
432-
try:
433-
# probably shared code in sanitize_series
434-
435-
data = sanitize_array(data, index=None)
436-
except ValueError:
437-
# NumPy may raise a ValueError on data like [1, []]
438-
# we retry with object dtype here.
439-
if dtype is None:
440-
dtype = np.dtype(object)
441-
data = np.atleast_1d(np.asarray(data, dtype=dtype))
442-
else:
443-
raise
430+
try:
431+
data = sanitize_array(data, index=None)
432+
except ValueError:
433+
# NumPy may raise a ValueError on data like [1, []]
434+
# we retry with object dtype here.
435+
if dtype is None:
436+
dtype = np.dtype(object)
437+
data = np.atleast_1d(np.asarray(data, dtype=dtype))
438+
else:
439+
raise
444440

445441
if copy:
446442
# TODO: avoid double copy when dtype forces cast.

pandas/core/arrays/timedeltas.py

+7
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,13 @@ class TimedeltaArray(dtl.TimelikeOps):
132132
Methods
133133
-------
134134
None
135+
136+
Examples
137+
--------
138+
>>> pd.arrays.TimedeltaArray(pd.TimedeltaIndex(['1H', '2H']))
139+
<TimedeltaArray>
140+
['0 days 01:00:00', '0 days 02:00:00']
141+
Length: 2, dtype: timedelta64[ns]
135142
"""
136143

137144
_typ = "timedeltaarray"

pandas/core/dtypes/dtypes.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -610,13 +610,29 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
610610
def categories(self) -> Index:
611611
"""
612612
An ``Index`` containing the unique categories allowed.
613+
614+
Examples
615+
--------
616+
>>> cat_type = pd.CategoricalDtype(categories=['a', 'b'], ordered=True)
617+
>>> cat_type.categories
618+
Index(['a', 'b'], dtype='object')
613619
"""
614620
return self._categories
615621

616622
@property
617623
def ordered(self) -> Ordered:
618624
"""
619625
Whether the categories have an ordered relationship.
626+
627+
Examples
628+
--------
629+
>>> cat_type = pd.CategoricalDtype(categories=['a', 'b'], ordered=True)
630+
>>> cat_type.ordered
631+
True
632+
633+
>>> cat_type = pd.CategoricalDtype(categories=['a', 'b'], ordered=False)
634+
>>> cat_type.ordered
635+
False
620636
"""
621637
return self._ordered
622638

@@ -1665,17 +1681,6 @@ def _check_fill_value(self):
16651681
FutureWarning,
16661682
stacklevel=find_stack_level(),
16671683
)
1668-
elif isinstance(self.subtype, CategoricalDtype):
1669-
# TODO: is this even supported? It is reached in
1670-
# test_dtype_sparse_with_fill_value_not_present_in_data
1671-
if self.subtype.categories is None or val not in self.subtype.categories:
1672-
warnings.warn(
1673-
"Allowing arbitrary scalar fill_value in SparseDtype is "
1674-
"deprecated. In a future version, the fill_value must be "
1675-
"a valid value for the SparseDtype.subtype.",
1676-
FutureWarning,
1677-
stacklevel=find_stack_level(),
1678-
)
16791684
else:
16801685
dummy = np.empty(0, dtype=self.subtype)
16811686
dummy = ensure_wrapped_if_datetimelike(dummy)

pandas/core/frame.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -673,9 +673,9 @@ def __init__(
673673
manager = get_option("mode.data_manager")
674674

675675
# GH47215
676-
if index is not None and isinstance(index, set):
676+
if isinstance(index, set):
677677
raise ValueError("index cannot be a set")
678-
if columns is not None and isinstance(columns, set):
678+
if isinstance(columns, set):
679679
raise ValueError("columns cannot be a set")
680680

681681
if copy is None:
@@ -8344,7 +8344,13 @@ def combiner(x, y):
83448344

83458345
return expressions.where(mask, y_values, x_values)
83468346

8347-
combined = self.combine(other, combiner, overwrite=False)
8347+
if len(other) == 0:
8348+
combined = self.reindex(
8349+
self.columns.append(other.columns.difference(self.columns)), axis=1
8350+
)
8351+
combined = combined.astype(other.dtypes)
8352+
else:
8353+
combined = self.combine(other, combiner, overwrite=False)
83488354

83498355
dtypes = {
83508356
col: find_common_type([self.dtypes[col], other.dtypes[col]])

pandas/core/generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
AlignJoin,
4848
AnyArrayLike,
4949
ArrayLike,
50+
Axes,
5051
Axis,
5152
AxisInt,
5253
CompressionOptions,
@@ -271,7 +272,7 @@ def __init__(self, data: Manager) -> None:
271272
def _init_mgr(
272273
cls,
273274
mgr: Manager,
274-
axes,
275+
axes: dict[Literal["index", "columns"], Axes | None],
275276
dtype: DtypeObj | None = None,
276277
copy: bool_t = False,
277278
) -> Manager:
@@ -4006,7 +4007,6 @@ class max_speed
40064007
):
40074008
return self.copy(deep=None)
40084009
elif self.ndim == 1:
4009-
# TODO: be consistent here for DataFrame vs Series
40104010
raise TypeError(
40114011
f"{type(self).__name__}.take requires a sequence of integers, "
40124012
"not slice."

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4192,7 +4192,7 @@ def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
41924192

41934193
# TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
41944194
# to simplify this.
4195-
if isinstance(self.dtype, np.dtype) and self.dtype.kind == "f":
4195+
if lib.is_np_dtype(self.dtype, "f"):
41964196
# We always treat __getitem__ slicing as label-based
41974197
# translate to locations
41984198
return self.slice_indexer(start, stop, step)

0 commit comments

Comments
 (0)