Skip to content

Commit 11138d8

Browse files
authored
Merge branch 'pandas-dev:main' into main
2 parents 750a270 + bb0fcc2 commit 11138d8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+892
-305
lines changed

.github/ISSUE_TEMPLATE/pdep_vote.yaml

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
name: PDEP Vote
2+
description: Call for a vote on a PDEP
3+
title: "VOTE: "
4+
labels: [Vote]
5+
6+
body:
7+
- type: markdown
8+
attributes:
9+
value: >
10+
As per [PDEP-1](https://pandas.pydata.org/pdeps/0001-purpose-and-guidelines.html), the following issue template should be used when a
11+
maintainer has opened a PDEP discussion and is ready to call for a vote.
12+
- type: checkboxes
13+
attributes:
14+
label: Locked issue
15+
options:
16+
- label: >
17+
I locked this voting issue so that only voting members are able to cast their votes or
18+
comment on this issue.
19+
required: true
20+
- type: input
21+
id: PDEP-name
22+
attributes:
23+
label: PDEP number and title
24+
placeholder: >
25+
PDEP-1: Purpose and guidelines
26+
validations:
27+
required: true
28+
- type: input
29+
id: PDEP-link
30+
attributes:
31+
label: Pull request with discussion
32+
description: e.g. https://github.com/pandas-dev/pandas/pull/47444
33+
validations:
34+
required: true
35+
- type: input
36+
id: PDEP-rendered-link
37+
attributes:
38+
label: Rendered PDEP for easy reading
39+
description: e.g. https://github.com/pandas-dev/pandas/pull/47444/files?short_path=7c449e6#diff-7c449e698132205b235c501f7e47ebba38da4d2b7f9492c98f16745dba787041
40+
validations:
41+
required: true
42+
- type: input
43+
id: PDEP-number-of-discussion-participants
44+
attributes:
45+
label: Discussion participants
46+
description: >
47+
You may find it useful to list or total the number of participating members in the
48+
PDEP discussion PR. This would be the maximum possible disapprove votes.
49+
placeholder: >
50+
14 voting members participated in the PR discussion thus far.
51+
- type: input
52+
id: PDEP-vote-end
53+
attributes:
54+
label: Voting will close in 15 days.
55+
description: The voting period end date. ('Voting will close in 15 days.' will be automatically written)
56+
- type: markdown
57+
attributes:
58+
value: ---
59+
- type: textarea
60+
id: Vote
61+
attributes:
62+
label: Vote
63+
value: |
64+
Cast your vote in a comment below.
65+
* +1: approve.
66+
* 0: abstain.
67+
* Reason: A one sentence reason is required.
68+
* -1: disapprove
69+
* Reason: A one sentence reason is required.
70+
A disapprove vote requires prior participation in the linked discussion PR.
71+
72+
@pandas-dev/pandas-core
73+
validations:
74+
required: true

asv_bench/benchmarks/frame_methods.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,4 +862,28 @@ def time_last_valid_index(self, dtype):
862862
self.df.last_valid_index()
863863

864864

865+
class Update:
866+
def setup(self):
867+
rng = np.random.default_rng()
868+
self.df = DataFrame(rng.uniform(size=(1_000_000, 10)))
869+
870+
idx = rng.choice(range(1_000_000), size=1_000_000, replace=False)
871+
self.df_random = DataFrame(self.df, index=idx)
872+
873+
idx = rng.choice(range(1_000_000), size=100_000, replace=False)
874+
cols = rng.choice(range(10), size=2, replace=False)
875+
self.df_sample = DataFrame(
876+
rng.uniform(size=(100_000, 2)), index=idx, columns=cols
877+
)
878+
879+
def time_to_update_big_frame_small_arg(self):
880+
self.df.update(self.df_sample)
881+
882+
def time_to_update_random_indices(self):
883+
self.df_random.update(self.df_sample)
884+
885+
def time_to_update_small_frame_big_arg(self):
886+
self.df_sample.update(self.df)
887+
888+
865889
from .pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
153153
-i "pandas.DatetimeTZDtype SA01" \
154154
-i "pandas.DatetimeTZDtype.tz SA01" \
155155
-i "pandas.DatetimeTZDtype.unit SA01" \
156-
-i "pandas.ExcelFile PR01,SA01" \
157-
-i "pandas.ExcelFile.parse PR01,SA01" \
158-
-i "pandas.ExcelWriter SA01" \
159-
-i "pandas.Float32Dtype SA01" \
160-
-i "pandas.Float64Dtype SA01" \
161156
-i "pandas.Grouper PR02,SA01" \
162157
-i "pandas.HDFStore.append PR01,SA01" \
163158
-i "pandas.HDFStore.get SA01" \

doc/source/getting_started/install.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ SciPy 1.10.0 computation Miscellaneous stati
269269
xarray 2022.12.0 computation pandas-like API for N-dimensional data
270270
========================= ================== =============== =============================================================
271271

272+
.. _install.excel_dependencies:
273+
272274
Excel files
273275
^^^^^^^^^^^
274276

doc/source/getting_started/intro_tutorials/02_read_write.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ strings (``object``).
111111

112112
My colleague requested the Titanic data as a spreadsheet.
113113

114+
.. note::
115+
If you want to use :func:`~pandas.to_excel` and :func:`~pandas.read_excel`,
116+
you need to install an Excel reader as outlined in the
117+
:ref:`Excel files <install.excel_dependencies>` section of the
118+
installation documentation.
119+
114120
.. ipython:: python
115121
116122
titanic.to_excel("titanic.xlsx", sheet_name="passengers", index=False)

doc/source/user_guide/missing_data.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,27 @@ Replace NA with a scalar value
386386
df
387387
df.fillna(0)
388388
389+
When the data has object dtype, you can control what type of NA values are present.
390+
391+
.. ipython:: python
392+
393+
df = pd.DataFrame({"a": [pd.NA, np.nan, None]}, dtype=object)
394+
df
395+
df.fillna(None)
396+
df.fillna(np.nan)
397+
df.fillna(pd.NA)
398+
399+
However when the dtype is not object, these will all be replaced with the proper NA value for the dtype.
400+
401+
.. ipython:: python
402+
403+
data = {"np": [1.0, np.nan, np.nan, 2], "arrow": pd.array([1.0, pd.NA, pd.NA, 2], dtype="float64[pyarrow]")}
404+
df = pd.DataFrame(data)
405+
df
406+
df.fillna(None)
407+
df.fillna(np.nan)
408+
df.fillna(pd.NA)
409+
389410
Fill gaps forward or backward
390411

391412
.. ipython:: python

doc/source/whatsnew/v3.0.0.rst

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ enhancement2
2828

2929
Other enhancements
3030
^^^^^^^^^^^^^^^^^^
31+
- :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
3132
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
3233
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
3334
- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
@@ -37,6 +38,7 @@ Other enhancements
3738
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
3839
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
3940
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
41+
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
4042

4143
.. ---------------------------------------------------------------------------
4244
.. _whatsnew_300.notable_bug_fixes:
@@ -208,6 +210,8 @@ Removal of prior version deprecations/changes
208210
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
209211
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
210212
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
213+
- Disallow allowing logical operations (``||``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``); wrap the objects in :class:`Series`, :class:`Index`, or ``np.array`` first instead (:issue:`52264`)
214+
- Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`)
211215
- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
212216
- Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`)
213217
- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
@@ -327,6 +331,7 @@ Performance improvements
327331
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
328332
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
329333
- Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
334+
- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
330335
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
331336
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
332337
- Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
@@ -336,19 +341,6 @@ Performance improvements
336341

337342
Bug fixes
338343
~~~~~~~~~
339-
- Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
340-
- Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
341-
- Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`)
342-
- Fixed bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
343-
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
344-
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
345-
- Fixed bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
346-
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
347-
- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
348-
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
349-
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
350-
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
351-
- Fixed bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
352344

353345
Categorical
354346
^^^^^^^^^^^
@@ -357,14 +349,15 @@ Categorical
357349

358350
Datetimelike
359351
^^^^^^^^^^^^
352+
- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
360353
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
361354
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
362-
-
355+
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
363356

364357
Timedelta
365358
^^^^^^^^^
366359
- Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`)
367-
-
360+
- Bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`)
368361

369362
Timezones
370363
^^^^^^^^^
@@ -378,6 +371,7 @@ Numeric
378371

379372
Conversion
380373
^^^^^^^^^^
374+
- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
381375
- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
382376
- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
383377

@@ -393,7 +387,7 @@ Interval
393387

394388
Indexing
395389
^^^^^^^^
396-
-
390+
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
397391
-
398392

399393
Missing
@@ -408,10 +402,10 @@ MultiIndex
408402

409403
I/O
410404
^^^
405+
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
411406
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
412-
- Now all ``Mapping`` s are pretty printed correctly. Before only literal ``dict`` s were. (:issue:`57915`)
413-
-
414-
-
407+
- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
408+
- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
415409

416410
Period
417411
^^^^^^
@@ -426,23 +420,25 @@ Plotting
426420
Groupby/resample/rolling
427421
^^^^^^^^^^^^^^^^^^^^^^^^
428422
- Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
423+
- Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
429424
- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
430425
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
431-
-
426+
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
427+
432428

433429
Reshaping
434430
^^^^^^^^^
435-
-
431+
- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
436432
-
437433

438434
Sparse
439435
^^^^^^
440-
-
436+
- Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
441437
-
442438

443439
ExtensionArray
444440
^^^^^^^^^^^^^^
445-
- Fixed bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
441+
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
446442
-
447443

448444
Styler
@@ -452,11 +448,15 @@ Styler
452448
Other
453449
^^^^^
454450
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
455-
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
456451
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
452+
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
457453
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
454+
- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
458455
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
459456
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
457+
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
458+
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
459+
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
460460
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
461461

462462
.. ***DO NOT USE THIS SECTION***

pandas/_libs/tslibs/offsets.pyx

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,7 @@ cdef _get_calendar(weekmask, holidays, calendar):
219219
holidays = holidays + calendar.holidays().tolist()
220220
except AttributeError:
221221
pass
222-
holidays = [_to_dt64D(dt) for dt in holidays]
223-
holidays = tuple(sorted(holidays))
222+
holidays = tuple(sorted(_to_dt64D(dt) for dt in holidays))
224223

225224
kwargs = {"weekmask": weekmask}
226225
if holidays:
@@ -419,11 +418,10 @@ cdef class BaseOffset:
419418

420419
if "holidays" in all_paras and not all_paras["holidays"]:
421420
all_paras.pop("holidays")
422-
exclude = ["kwds", "name", "calendar"]
423-
attrs = [(k, v) for k, v in all_paras.items()
424-
if (k not in exclude) and (k[0] != "_")]
425-
attrs = sorted(set(attrs))
426-
params = tuple([str(type(self))] + attrs)
421+
exclude = {"kwds", "name", "calendar"}
422+
attrs = {(k, v) for k, v in all_paras.items()
423+
if (k not in exclude) and (k[0] != "_")}
424+
params = tuple([str(type(self))] + sorted(attrs))
427425
return params
428426

429427
@property

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1751,7 +1751,7 @@ class Timestamp(_Timestamp):
17511751
tzinfo_type tzinfo=None,
17521752
*,
17531753
nanosecond=None,
1754-
tz=None,
1754+
tz=_no_input,
17551755
unit=None,
17561756
fold=None,
17571757
):
@@ -1783,6 +1783,10 @@ class Timestamp(_Timestamp):
17831783
_date_attributes = [year, month, day, hour, minute, second,
17841784
microsecond, nanosecond]
17851785

1786+
explicit_tz_none = tz is None
1787+
if tz is _no_input:
1788+
tz = None
1789+
17861790
if tzinfo is not None:
17871791
# GH#17690 tzinfo must be a datetime.tzinfo object, ensured
17881792
# by the cython annotation.
@@ -1883,6 +1887,11 @@ class Timestamp(_Timestamp):
18831887
if ts.value == NPY_NAT:
18841888
return NaT
18851889

1890+
if ts.tzinfo is not None and explicit_tz_none:
1891+
raise ValueError(
1892+
"Passed data is timezone-aware, incompatible with 'tz=None'."
1893+
)
1894+
18861895
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, ts.fold, ts.creso)
18871896

18881897
def _round(self, freq, mode, ambiguous="raise", nonexistent="raise"):

0 commit comments

Comments
 (0)