Skip to content

Commit 68a4e3f

Browse files
authored
Merge branch 'main' into main
2 parents fa55317 + f3b7985 commit 68a4e3f

File tree

95 files changed

+777
-542
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+777
-542
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.7.2
22+
rev: v0.8.1
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -47,7 +47,7 @@ repos:
4747
types_or: [python, rst, markdown, cython, c]
4848
additional_dependencies: [tomli]
4949
- repo: https://github.com/MarcoGorelli/cython-lint
50-
rev: v0.16.2
50+
rev: v0.16.6
5151
hooks:
5252
- id: cython-lint
5353
- id: double-quote-cython-strings
@@ -95,7 +95,7 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v19.1.3
98+
rev: v19.1.4
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include

asv_bench/benchmarks/groupby.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,7 @@ def setup(self, dtype, method, application, ncols, engine):
511511
# grouping on multiple columns
512512
# and we lack kernels for a bunch of methods
513513
if (
514-
engine == "numba"
515-
and method in _numba_unsupported_methods
514+
(engine == "numba" and method in _numba_unsupported_methods)
516515
or ncols > 1
517516
or application == "transformation"
518517
or dtype == "datetime"

ci/code_checks.sh

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
7575
-i "pandas.RangeIndex.from_range PR01,SA01" \
76-
-i "pandas.Series.dt.unit GL08" \
77-
-i "pandas.Series.pad PR01,SA01" \
7876
-i "pandas.Timedelta.max PR02" \
7977
-i "pandas.Timedelta.min PR02" \
8078
-i "pandas.Timedelta.resolution PR02" \
@@ -83,40 +81,23 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8381
-i "pandas.Timestamp.resolution PR02" \
8482
-i "pandas.Timestamp.tzinfo GL08" \
8583
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
86-
-i "pandas.arrays.IntegerArray SA01" \
8784
-i "pandas.arrays.IntervalArray.length SA01" \
8885
-i "pandas.arrays.NumpyExtensionArray SA01" \
8986
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
9087
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
91-
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
92-
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
93-
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
9488
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
95-
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
96-
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
97-
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
9889
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
99-
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
100-
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
101-
-i "pandas.core.resample.Resampler.indices SA01" \
10290
-i "pandas.core.resample.Resampler.max PR01,RT03,SA01" \
10391
-i "pandas.core.resample.Resampler.mean SA01" \
10492
-i "pandas.core.resample.Resampler.min PR01,RT03,SA01" \
10593
-i "pandas.core.resample.Resampler.prod SA01" \
10694
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \
107-
-i "pandas.core.resample.Resampler.sem SA01" \
10895
-i "pandas.core.resample.Resampler.std SA01" \
10996
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
11097
-i "pandas.core.resample.Resampler.var SA01" \
111-
-i "pandas.errors.IntCastingNaNError SA01" \
112-
-i "pandas.errors.NullFrequencyError SA01" \
113-
-i "pandas.errors.NumbaUtilError SA01" \
114-
-i "pandas.errors.PerformanceWarning SA01" \
11598
-i "pandas.errors.UndefinedVariableError PR01,SA01" \
11699
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
117-
-i "pandas.infer_freq SA01" \
118100
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
119-
-i "pandas.io.stata.StataWriter.write_file SA01" \
120101
-i "pandas.plotting.andrews_curves RT03,SA01" \
121102
-i "pandas.plotting.scatter_matrix PR07,SA01" \
122103
-i "pandas.tseries.offsets.BDay PR02,SA01" \

doc/source/getting_started/install.rst

Lines changed: 81 additions & 81 deletions
Large diffs are not rendered by default.

doc/source/reference/frame.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ Reindexing / selection / label manipulation
185185
DataFrame.duplicated
186186
DataFrame.equals
187187
DataFrame.filter
188-
DataFrame.head
189188
DataFrame.idxmax
190189
DataFrame.idxmin
191190
DataFrame.reindex
@@ -196,7 +195,6 @@ Reindexing / selection / label manipulation
196195
DataFrame.sample
197196
DataFrame.set_axis
198197
DataFrame.set_index
199-
DataFrame.tail
200198
DataFrame.take
201199
DataFrame.truncate
202200

doc/source/user_guide/dsintro.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ This case is handled identically to a dict of arrays.
326326

327327
.. ipython:: python
328328
329-
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "a10")])
329+
data = np.zeros((2,), dtype=[("A", "i4"), ("B", "f4"), ("C", "S10")])
330330
data[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
331331
332332
pd.DataFrame(data)

doc/source/whatsnew/v3.0.0.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ Indexing
667667
^^^^^^^^
668668
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
669669
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
670+
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
670671
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
671672

672673
Missing
@@ -700,6 +701,7 @@ I/O
700701
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
701702
- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
702703
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
704+
- Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
703705
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
704706
- Bug in :meth:`read_json` where extreme value integers in string format were incorrectly parsed as a different integer number (:issue:`20608`)
705707
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
@@ -731,11 +733,13 @@ Groupby/resample/rolling
731733
- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
732734
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
733735
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
736+
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)
734737
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
735738
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
736739
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
737740
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
738741
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
742+
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
739743
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
740744
- Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)
741745

@@ -795,6 +799,8 @@ Other
795799
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
796800
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
797801
- Bug in ``_version_meson.py`` when building pandas in Gitpod environment, using an (unusable) untagged version of pandas that breaks the build
802+
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
803+
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
798804

799805
.. ***DO NOT USE THIS SECTION***
800806

environment.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies:
3535
- hypothesis>=6.84.0
3636
- gcsfs>=2022.11.0
3737
- ipython
38+
- pickleshare # Needed for IPython Sphinx directive in the docs GH#60429
3839
- jinja2>=3.1.2
3940
- lxml>=4.9.2
4041
- matplotlib>=3.6.3
@@ -87,7 +88,7 @@ dependencies:
8788
- google-auth
8889
- natsort # DataFrame.sort_values doctest
8990
- numpydoc
90-
- pydata-sphinx-theme=0.14
91+
- pydata-sphinx-theme=0.16
9192
- pytest-cython # doctest
9293
- sphinx
9394
- sphinx-design

pandas/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@
235235
# Pandas is not (yet) a py.typed library: the public API is determined
236236
# based on the documentation.
237237
__all__ = [
238+
"NA",
238239
"ArrowDtype",
239240
"BooleanDtype",
240241
"Categorical",
@@ -253,15 +254,14 @@
253254
"HDFStore",
254255
"Index",
255256
"IndexSlice",
257+
"Int8Dtype",
256258
"Int16Dtype",
257259
"Int32Dtype",
258260
"Int64Dtype",
259-
"Int8Dtype",
260261
"Interval",
261262
"IntervalDtype",
262263
"IntervalIndex",
263264
"MultiIndex",
264-
"NA",
265265
"NaT",
266266
"NamedAgg",
267267
"Period",
@@ -274,10 +274,10 @@
274274
"Timedelta",
275275
"TimedeltaIndex",
276276
"Timestamp",
277+
"UInt8Dtype",
277278
"UInt16Dtype",
278279
"UInt32Dtype",
279280
"UInt64Dtype",
280-
"UInt8Dtype",
281281
"api",
282282
"array",
283283
"arrays",
@@ -290,8 +290,8 @@
290290
"errors",
291291
"eval",
292292
"factorize",
293-
"get_dummies",
294293
"from_dummies",
294+
"get_dummies",
295295
"get_option",
296296
"infer_freq",
297297
"interval_range",

pandas/_config/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88

99
__all__ = [
1010
"config",
11+
"describe_option",
1112
"detect_console_encoding",
1213
"get_option",
13-
"set_option",
14-
"reset_option",
15-
"describe_option",
1614
"option_context",
1715
"options",
16+
"reset_option",
17+
"set_option",
1818
]
1919
from pandas._config import config
2020
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401

pandas/_config/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ def set_option(*args) -> None:
188188
"""
189189
Set the value of the specified option or options.
190190
191+
This method allows fine-grained control over the behavior and display settings
192+
of pandas. Options affect various functionalities such as output formatting,
193+
display limits, and operational behavior. Settings can be modified at runtime
194+
without requiring changes to global configurations or environment variables.
195+
191196
Parameters
192197
----------
193198
*args : str | object

pandas/_libs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
__all__ = [
2+
"Interval",
23
"NaT",
34
"NaTType",
45
"OutOfBoundsDatetime",
56
"Period",
67
"Timedelta",
78
"Timestamp",
89
"iNaT",
9-
"Interval",
1010
]
1111

1212

pandas/_libs/tslibs/__init__.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,39 @@
11
__all__ = [
2-
"dtypes",
3-
"localize_pydatetime",
2+
"BaseOffset",
3+
"IncompatibleFrequency",
44
"NaT",
55
"NaTType",
6-
"iNaT",
7-
"nat_strings",
86
"OutOfBoundsDatetime",
97
"OutOfBoundsTimedelta",
10-
"IncompatibleFrequency",
118
"Period",
129
"Resolution",
10+
"Tick",
1311
"Timedelta",
14-
"normalize_i8_timestamps",
15-
"is_date_array_normalized",
16-
"dt64arr_to_periodarr",
12+
"Timestamp",
13+
"add_overflowsafe",
14+
"astype_overflowsafe",
1715
"delta_to_nanoseconds",
16+
"dt64arr_to_periodarr",
17+
"dtypes",
18+
"get_resolution",
19+
"get_supported_dtype",
20+
"get_unit_from_dtype",
21+
"guess_datetime_format",
22+
"iNaT",
1823
"ints_to_pydatetime",
1924
"ints_to_pytimedelta",
20-
"get_resolution",
21-
"Timestamp",
22-
"tz_convert_from_utc_single",
23-
"tz_convert_from_utc",
24-
"to_offset",
25-
"Tick",
26-
"BaseOffset",
27-
"tz_compare",
25+
"is_date_array_normalized",
26+
"is_supported_dtype",
2827
"is_unitless",
29-
"astype_overflowsafe",
30-
"get_unit_from_dtype",
28+
"localize_pydatetime",
29+
"nat_strings",
30+
"normalize_i8_timestamps",
3131
"periods_per_day",
3232
"periods_per_second",
33-
"guess_datetime_format",
34-
"add_overflowsafe",
35-
"get_supported_dtype",
36-
"is_supported_dtype",
33+
"to_offset",
34+
"tz_compare",
35+
"tz_convert_from_utc",
36+
"tz_convert_from_utc_single",
3737
]
3838

3939
from pandas._libs.tslibs import dtypes

0 commit comments

Comments
 (0)