Skip to content

Commit 27f6cfb

Browse files
Merge branch 'pandas-dev:main' into methods_test_drop_duplicates_fixture_docs
2 parents 4bc6e84 + 288af5f commit 27f6cfb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+398
-244
lines changed

.github/workflows/unit-tests.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,8 @@ jobs:
388388
389389
- name: Run Tests
390390
uses: ./.github/actions/run-tests
391-
env:
392-
PYTHON_GIL: 0
393391

392+
# NOTE: this job must be kept in sync with the Pyodide build job in wheels.yml
394393
emscripten:
395394
# Note: the Python version, Emscripten toolchain version are determined
396395
# by the Pyodide version. The appropriate versions can be found in the

.github/workflows/wheels.yml

+8
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,13 @@ jobs:
100100
- [windows-2022, win_amd64]
101101
# TODO: support PyPy?
102102
python: [["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"]]
103+
104+
# Build Pyodide wheels and upload them to Anaconda.org
105+
# NOTE: this job is similar to the one in unit-tests.yml except for the fact
106+
# that it uses cibuildwheel instead of a standard Pyodide xbuildenv setup.
107+
include:
108+
- buildplat: [ubuntu-22.04, pyodide_wasm32]
109+
python: ["cp312", "3.12"]
103110
env:
104111
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
105112
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -146,6 +153,7 @@ jobs:
146153
env:
147154
CIBW_PRERELEASE_PYTHONS: True
148155
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
156+
CIBW_PLATFORM: ${{ matrix.buildplat[1] == 'pyodide_wasm32' && 'pyodide' || 'auto' }}
149157

150158
- name: Set up Python
151159
uses: mamba-org/setup-micromamba@v1

ci/code_checks.sh

+1-11
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7070
--format=actions \
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
73-
-i "pandas.DataFrame.max RT03" \
74-
-i "pandas.DataFrame.mean RT03" \
75-
-i "pandas.DataFrame.median RT03" \
76-
-i "pandas.DataFrame.min RT03" \
77-
-i "pandas.DataFrame.plot PR02" \
78-
-i "pandas.Grouper PR02" \
7973
-i "pandas.MultiIndex.append PR07,SA01" \
8074
-i "pandas.MultiIndex.copy PR07,RT03,SA01" \
81-
-i "pandas.MultiIndex.drop PR07,RT03,SA01" \
8275
-i "pandas.MultiIndex.get_level_values SA01" \
8376
-i "pandas.MultiIndex.get_loc PR07" \
8477
-i "pandas.MultiIndex.get_loc_level PR07" \
@@ -160,13 +153,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
160153
-i "pandas.Series.lt SA01" \
161154
-i "pandas.Series.ne SA01" \
162155
-i "pandas.Series.pad PR01,SA01" \
163-
-i "pandas.Series.plot PR02" \
164156
-i "pandas.Series.pop SA01" \
165157
-i "pandas.Series.prod RT03" \
166158
-i "pandas.Series.product RT03" \
167159
-i "pandas.Series.reorder_levels RT03,SA01" \
168160
-i "pandas.Series.sem PR01,RT03,SA01" \
169-
-i "pandas.Series.skew RT03,SA01" \
161+
-i "pandas.Series.skew SA01" \
170162
-i "pandas.Series.sparse PR01,SA01" \
171163
-i "pandas.Series.sparse.density SA01" \
172164
-i "pandas.Series.sparse.fill_value SA01" \
@@ -314,11 +306,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
314306
-i "pandas.api.types.is_period_dtype SA01" \
315307
-i "pandas.api.types.is_re PR07,SA01" \
316308
-i "pandas.api.types.is_re_compilable PR07,SA01" \
317-
-i "pandas.api.types.is_signed_integer_dtype SA01" \
318309
-i "pandas.api.types.is_sparse SA01" \
319310
-i "pandas.api.types.is_string_dtype SA01" \
320311
-i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
321-
-i "pandas.api.types.is_unsigned_integer_dtype SA01" \
322312
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
323313
-i "pandas.api.types.union_categoricals RT03,SA01" \
324314
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \

doc/source/getting_started/intro_tutorials/04_plotting.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ How do I create plots in pandas?
3232
air_quality.head()
3333
3434
.. note::
35-
The usage of the ``index_col`` and ``parse_dates`` parameters of the ``read_csv`` function to define the first (0th) column as
36-
index of the resulting ``DataFrame`` and convert the dates in the column to :class:`Timestamp` objects, respectively.
35+
The ``index_col=0`` and ``parse_dates=True`` parameters passed to the ``read_csv`` function define
36+
the first (0th) column as index of the resulting ``DataFrame`` and convert the dates in the column
37+
to :class:`Timestamp` objects, respectively.
38+
3739

3840
.. raw:: html
3941

doc/source/whatsnew/v3.0.0.rst

+7-2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Other enhancements
3232
- :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
3333
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
3434
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
35+
- :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
3536
- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
3637
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
3738
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
@@ -502,11 +503,13 @@ Datetimelike
502503
- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
503504
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
504505
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
506+
- Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
505507
- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
506508
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
507509
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
508510
- Bug in :meth:`DatetimeIndex.union` when ``unit`` was non-nanosecond (:issue:`59036`)
509511
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
512+
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
510513
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
511514

512515
Timedelta
@@ -556,6 +559,7 @@ MultiIndex
556559
- :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`)
557560
- :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`)
558561
- :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`)
562+
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
559563
-
560564

561565
I/O
@@ -581,9 +585,9 @@ Period
581585

582586
Plotting
583587
^^^^^^^^
584-
- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`)
588+
- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`)
585589
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
586-
-
590+
- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
587591

588592
Groupby/resample/rolling
589593
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -615,6 +619,7 @@ ExtensionArray
615619
^^^^^^^^^^^^^^
616620
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
617621
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
622+
- Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`)
618623

619624
Styler
620625
^^^^^^

meson.build

+5
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ else
4444
meson.add_dist_script(py, versioneer, '-o', '_version_meson.py')
4545
endif
4646

47+
cy = meson.get_compiler('cython')
48+
if cy.version().version_compare('>=3.1.0')
49+
add_project_arguments('-Xfreethreading_compatible=true', language : 'cython')
50+
endif
51+
4752
# Needed by pandas.test() when it looks for the pytest ini options
4853
py.install_sources(
4954
'pyproject.toml',

pandas/_libs/src/vendored/ujson/python/ujson.c

+4
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,10 @@ PyMODINIT_FUNC PyInit_json(void) {
384384
return NULL;
385385
}
386386

387+
#ifdef Py_GIL_DISABLED
388+
PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED);
389+
#endif
390+
387391
#ifndef PYPY_VERSION
388392
PyObject *mod_decimal = PyImport_ImportModule("decimal");
389393
if (mod_decimal) {

pandas/_libs/tslibs/conversion.pyx

+35-30
Original file line numberDiff line numberDiff line change
@@ -606,37 +606,42 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
606606
# equiv: datetime.today().replace(tzinfo=tz)
607607
return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=NPY_FR_us)
608608
else:
609-
string_to_dts_failed = string_to_dts(
610-
ts, &dts, &out_bestunit, &out_local,
611-
&out_tzoffset, False
612-
)
613-
if not string_to_dts_failed:
614-
reso = get_supported_reso(out_bestunit)
615-
check_dts_bounds(&dts, reso)
616-
obj = _TSObject()
617-
obj.dts = dts
618-
obj.creso = reso
619-
ival = npy_datetimestruct_to_datetime(reso, &dts)
620-
621-
if out_local == 1:
622-
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
623-
obj.value = tz_localize_to_utc_single(
624-
ival, obj.tzinfo, ambiguous="raise", nonexistent=None, creso=reso
625-
)
626-
if tz is None:
627-
check_overflows(obj, reso)
628-
return obj
629-
_adjust_tsobject_tz_using_offset(obj, tz)
630-
return obj
631-
else:
632-
if tz is not None:
633-
# shift for _localize_tso
634-
ival = tz_localize_to_utc_single(
635-
ival, tz, ambiguous="raise", nonexistent=None, creso=reso
609+
if not dayfirst: # GH 58859
610+
string_to_dts_failed = string_to_dts(
611+
ts, &dts, &out_bestunit, &out_local,
612+
&out_tzoffset, False
613+
)
614+
if not string_to_dts_failed:
615+
reso = get_supported_reso(out_bestunit)
616+
check_dts_bounds(&dts, reso)
617+
obj = _TSObject()
618+
obj.dts = dts
619+
obj.creso = reso
620+
ival = npy_datetimestruct_to_datetime(reso, &dts)
621+
622+
if out_local == 1:
623+
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
624+
obj.value = tz_localize_to_utc_single(
625+
ival,
626+
obj.tzinfo,
627+
ambiguous="raise",
628+
nonexistent=None,
629+
creso=reso,
636630
)
637-
obj.value = ival
638-
maybe_localize_tso(obj, tz, obj.creso)
639-
return obj
631+
if tz is None:
632+
check_overflows(obj, reso)
633+
return obj
634+
_adjust_tsobject_tz_using_offset(obj, tz)
635+
return obj
636+
else:
637+
if tz is not None:
638+
# shift for _localize_tso
639+
ival = tz_localize_to_utc_single(
640+
ival, tz, ambiguous="raise", nonexistent=None, creso=reso
641+
)
642+
obj.value = ival
643+
maybe_localize_tso(obj, tz, obj.creso)
644+
return obj
640645

641646
dt = parse_datetime_string(
642647
ts,

pandas/_libs/tslibs/offsets.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -4676,6 +4676,7 @@ prefix_mapping = {
46764676
Hour, # 'h'
46774677
Day, # 'D'
46784678
WeekOfMonth, # 'WOM'
4679+
LastWeekOfMonth, # 'LWOM'
46794680
FY5253,
46804681
FY5253Quarter,
46814682
]
@@ -4894,7 +4895,7 @@ cpdef to_offset(freq, bint is_period=False):
48944895
f"\'{name}\' is deprecated and will be removed "
48954896
f"in a future version, please use "
48964897
f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\' "
4897-
f" instead.",
4898+
f"instead.",
48984899
FutureWarning,
48994900
stacklevel=find_stack_level(),
49004901
)

pandas/_libs/tslibs/parsing.pyx

+25-24
Original file line numberDiff line numberDiff line change
@@ -377,32 +377,33 @@ def parse_datetime_string_with_reso(
377377
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
378378

379379
# Try iso8601 first, as it handles nanoseconds
380-
string_to_dts_failed = string_to_dts(
381-
date_string, &dts, &out_bestunit, &out_local,
382-
&out_tzoffset, False
383-
)
384-
if not string_to_dts_failed:
385-
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
386-
# The new resolution will just be nano
387-
# GH#50417
388-
if out_bestunit in _timestamp_units:
389-
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns
390-
391-
if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns:
392-
# TODO: avoid circular import
393-
from pandas import Timestamp
394-
parsed = Timestamp(date_string)
395-
else:
396-
if out_local:
397-
tz = timezone(timedelta(minutes=out_tzoffset))
380+
if not dayfirst: # GH 58859
381+
string_to_dts_failed = string_to_dts(
382+
date_string, &dts, &out_bestunit, &out_local,
383+
&out_tzoffset, False
384+
)
385+
if not string_to_dts_failed:
386+
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
387+
# The new resolution will just be nano
388+
# GH#50417
389+
if out_bestunit in _timestamp_units:
390+
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns
391+
392+
if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns:
393+
# TODO: avoid circular import
394+
from pandas import Timestamp
395+
parsed = Timestamp(date_string)
398396
else:
399-
tz = None
400-
parsed = datetime_new(
401-
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
402-
)
397+
if out_local:
398+
tz = timezone(timedelta(minutes=out_tzoffset))
399+
else:
400+
tz = None
401+
parsed = datetime_new(
402+
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
403+
)
403404

404-
reso = npy_unit_to_attrname[out_bestunit]
405-
return parsed, reso
405+
reso = npy_unit_to_attrname[out_bestunit]
406+
return parsed, reso
406407

407408
parsed = _parse_delimited_date(date_string, dayfirst, &out_bestunit)
408409
if parsed is not None:

pandas/conftest.py

+3
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,9 @@ def rand_series_with_duplicate_datetimeindex() -> Series:
951951
]
952952
)
953953
def ea_scalar_and_dtype(request):
954+
"""
955+
Fixture that tests each scalar and datetime type.
956+
"""
954957
return request.param
955958

956959

pandas/core/apply.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,19 @@ def frame_apply(
9090
kwargs=None,
9191
) -> FrameApply:
9292
"""construct and return a row or column based frame apply object"""
93+
_, func, columns, _ = reconstruct_func(func, **kwargs)
94+
9395
axis = obj._get_axis_number(axis)
9496
klass: type[FrameApply]
9597
if axis == 0:
9698
klass = FrameRowApply
9799
elif axis == 1:
100+
if columns:
101+
raise NotImplementedError(
102+
f"Named aggregation is not supported when {axis=}."
103+
)
98104
klass = FrameColumnApply
99105

100-
_, func, _, _ = reconstruct_func(func, **kwargs)
101-
assert func is not None
102-
103106
return klass(
104107
obj,
105108
func,

pandas/core/arrays/arrow/array.py

-2
Original file line numberDiff line numberDiff line change
@@ -1706,8 +1706,6 @@ def pyarrow_meth(data, skip_nulls, **kwargs):
17061706
if name == "median":
17071707
# GH 52679: Use quantile instead of approximate_median; returns array
17081708
result = result[0]
1709-
if pc.is_null(result).as_py():
1710-
return result
17111709

17121710
if name in ["min", "max", "sum"] and pa.types.is_duration(pa_type):
17131711
result = result.cast(pa_type)

pandas/core/arrays/base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1986,7 +1986,10 @@ def _reduce(
19861986
)
19871987
result = meth(skipna=skipna, **kwargs)
19881988
if keepdims:
1989-
result = np.array([result])
1989+
if name in ["min", "max"]:
1990+
result = self._from_sequence([result], dtype=self.dtype)
1991+
else:
1992+
result = np.array([result])
19901993

19911994
return result
19921995

pandas/core/arrays/datetimelike.py

+22
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
ScalarIndexer,
6666
Self,
6767
SequenceIndexer,
68+
TakeIndexer,
6869
TimeAmbiguous,
6970
TimeNonexistent,
7071
npt,
@@ -2340,6 +2341,27 @@ def interpolate(
23402341
return self
23412342
return type(self)._simple_new(out_data, dtype=self.dtype)
23422343

2344+
def take(
2345+
self,
2346+
indices: TakeIndexer,
2347+
*,
2348+
allow_fill: bool = False,
2349+
fill_value: Any = None,
2350+
axis: AxisInt = 0,
2351+
) -> Self:
2352+
result = super().take(
2353+
indices=indices, allow_fill=allow_fill, fill_value=fill_value, axis=axis
2354+
)
2355+
2356+
indices = np.asarray(indices, dtype=np.intp)
2357+
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
2358+
2359+
if isinstance(maybe_slice, slice):
2360+
freq = self._get_getitem_freq(maybe_slice)
2361+
result._freq = freq
2362+
2363+
return result
2364+
23432365
# --------------------------------------------------------------
23442366
# Unsorted
23452367

0 commit comments

Comments
 (0)