Skip to content

Commit 7532cc3

Browse files
author
Khor Chean Wei
authored
Merge branch 'main' into numeric_to_df_cum
2 parents f7f3f49 + b1525c4 commit 7532cc3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+866
-707
lines changed

asv_bench/asv.conf.json

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
// pip (with all the conda available packages installed first,
4242
// followed by the pip installed packages).
4343
"matrix": {
44+
"pip+build": [],
4445
"Cython": ["3.0"],
4546
"matplotlib": [],
4647
"sqlalchemy": [],

ci/deps/actions-310.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.11.2
27+
# https://github.com/conda-forge/pytables-feedstock/issues/97
28+
- c-blosc2=2.13.2
2729
- blosc>=1.21.3
2830
- bottleneck>=1.3.6
2931
- fastparquet>=2023.10.0

ci/deps/actions-311-downstream_compat.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ dependencies:
2626

2727
# optional dependencies
2828
- beautifulsoup4>=4.11.2
29+
# https://github.com/conda-forge/pytables-feedstock/issues/97
30+
- c-blosc2=2.13.2
2931
- blosc>=1.21.3
3032
- bottleneck>=1.3.6
3133
- fastparquet>=2023.10.0

ci/deps/actions-311.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.11.2
27+
# https://github.com/conda-forge/pytables-feedstock/issues/97
28+
- c-blosc2=2.13.2
2729
- blosc>=1.21.3
2830
- bottleneck>=1.3.6
2931
- fastparquet>=2023.10.0

ci/deps/actions-312.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.11.2
27+
# https://github.com/conda-forge/pytables-feedstock/issues/97
28+
- c-blosc2=2.13.2
2729
- blosc>=1.21.3
2830
- bottleneck>=1.3.6
2931
- fastparquet>=2023.10.0

ci/deps/actions-39-minimum_versions.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ dependencies:
2727

2828
# optional dependencies
2929
- beautifulsoup4=4.11.2
30+
# https://github.com/conda-forge/pytables-feedstock/issues/97
31+
- c-blosc2=2.13.2
3032
- blosc=1.21.3
3133
- bottleneck=1.3.6
3234
- fastparquet=2023.10.0

ci/deps/actions-39.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ dependencies:
2424

2525
# optional dependencies
2626
- beautifulsoup4>=4.11.2
27+
# https://github.com/conda-forge/pytables-feedstock/issues/97
28+
- c-blosc2=2.13.2
2729
- blosc>=1.21.3
2830
- bottleneck>=1.3.6
2931
- fastparquet>=2023.10.0

ci/deps/circle-310-arm64.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ dependencies:
2525

2626
# optional dependencies
2727
- beautifulsoup4>=4.11.2
28+
# https://github.com/conda-forge/pytables-feedstock/issues/97
29+
- c-blosc2=2.13.2
2830
- blosc>=1.21.3
2931
- bottleneck>=1.3.6
3032
- fastparquet>=2023.10.0

doc/source/user_guide/basics.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -476,15 +476,15 @@ For example:
476476
.. ipython:: python
477477
478478
df
479-
df.mean(0)
480-
df.mean(1)
479+
df.mean(axis=0)
480+
df.mean(axis=1)
481481
482482
All such methods have a ``skipna`` option signaling whether to exclude missing
483483
data (``True`` by default):
484484

485485
.. ipython:: python
486486
487-
df.sum(0, skipna=False)
487+
df.sum(axis=0, skipna=False)
488488
df.sum(axis=1, skipna=True)
489489
490490
Combined with the broadcasting / arithmetic behavior, one can describe various
@@ -495,8 +495,8 @@ standard deviation of 1), very concisely:
495495
496496
ts_stand = (df - df.mean()) / df.std()
497497
ts_stand.std()
498-
xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)
499-
xs_stand.std(1)
498+
xs_stand = df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0)
499+
xs_stand.std(axis=1)
500500
501501
Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
502502
preserve the location of ``NaN`` values. This is somewhat different from

doc/source/user_guide/indexing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -952,7 +952,7 @@ To select a row where each column meets its own criterion:
952952
953953
values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]}
954954
955-
row_mask = df.isin(values).all(1)
955+
row_mask = df.isin(values).all(axis=1)
956956
957957
df[row_mask]
958958

doc/source/whatsnew/v2.2.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,4 @@ Other
8787
Contributors
8888
~~~~~~~~~~~~
8989

90-
.. contributors:: v2.2.0..v2.2.1|HEAD
90+
.. contributors:: v2.2.0..v2.2.1

doc/source/whatsnew/v2.2.2.rst

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.. _whatsnew_222:
22

3-
What's new in 2.2.2 (April XX, 2024)
3+
What's new in 2.2.2 (April 10, 2024)
44
---------------------------------------
55

66
These are the changes in pandas 2.2.2. See :ref:`release` for a full changelog
@@ -9,6 +9,21 @@ including other versions of pandas.
99
{{ header }}
1010

1111
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_220.np2_compat:
14+
15+
Pandas 2.2.2 is now compatible with numpy 2.0
16+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17+
18+
Pandas 2.2.2 is the first version of pandas that is generally compatible with the upcoming
19+
numpy 2.0 release, and wheels for pandas 2.2.2 will work with both numpy 1.x and 2.x.
20+
21+
One major caveat is that arrays created with numpy 2.0's new ``StringDtype`` will convert
22+
to ``object`` dtyped arrays upon :class:`Series`/:class:`DataFrame` creation.
23+
Full support for numpy 2.0's StringDtype is expected to land in pandas 3.0.
24+
25+
As usual please report any bugs discovered to our `issue tracker <https://github.com/pandas-dev/pandas/issues/new/choose>`_
26+
1227
.. _whatsnew_222.regressions:
1328

1429
Fixed regressions
@@ -40,3 +55,5 @@ Other
4055

4156
Contributors
4257
~~~~~~~~~~~~
58+
59+
.. contributors:: v2.2.1..v2.2.2|HEAD

doc/source/whatsnew/v3.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Other enhancements
3333
- :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
3434
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
3535
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
36+
- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`)
3637
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
3738
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
3839
- :meth:`DataFrame.cum*` methods now have a ``numeric_only`` parameter (:issue:`53072`)
@@ -190,6 +191,7 @@ Other Deprecations
190191

191192
- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
192193
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
194+
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
193195
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
194196
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
195197
-
@@ -347,6 +349,7 @@ Bug fixes
347349
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
348350
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
349351
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
352+
- Fixed bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
350353

351354
Categorical
352355
^^^^^^^^^^^

environment.yml

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ dependencies:
2828

2929
# optional dependencies
3030
- beautifulsoup4>=4.11.2
31+
# https://github.com/conda-forge/pytables-feedstock/issues/97
32+
- c-blosc2=2.13.2
3133
- blosc
3234
- bottleneck>=1.3.6
3335
- fastparquet>=2023.10.0

pandas/_libs/tslib.pyx

-34
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ from pandas._libs.tslibs.conversion cimport (
7070
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
7171
from pandas._libs.tslibs.nattype cimport (
7272
NPY_NAT,
73-
c_NaT as NaT,
7473
c_nat_strings as nat_strings,
7574
)
7675
from pandas._libs.tslibs.timestamps cimport _Timestamp
@@ -346,39 +345,6 @@ def array_with_unit_to_datetime(
346345
return result, tz
347346

348347

349-
cdef _array_with_unit_to_datetime_object_fallback(ndarray[object] values, str unit):
350-
cdef:
351-
Py_ssize_t i, n = len(values)
352-
ndarray[object] oresult
353-
tzinfo tz = None
354-
355-
# TODO: fix subtle differences between this and no-unit code
356-
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
357-
for i in range(n):
358-
val = values[i]
359-
360-
if checknull_with_nat_and_na(val):
361-
oresult[i] = <object>NaT
362-
elif is_integer_object(val) or is_float_object(val):
363-
364-
if val != val or val == NPY_NAT:
365-
oresult[i] = <object>NaT
366-
else:
367-
try:
368-
oresult[i] = Timestamp(val, unit=unit)
369-
except OutOfBoundsDatetime:
370-
oresult[i] = val
371-
372-
elif isinstance(val, str):
373-
if len(val) == 0 or val in nat_strings:
374-
oresult[i] = <object>NaT
375-
376-
else:
377-
oresult[i] = val
378-
379-
return oresult, tz
380-
381-
382348
@cython.wraparound(False)
383349
@cython.boundscheck(False)
384350
def first_non_null(values: ndarray) -> int:

pandas/core/apply.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1710,9 +1710,9 @@ def normalize_keyword_aggregation(
17101710
# TODO: aggspec type: typing.Dict[str, List[AggScalar]]
17111711
aggspec = defaultdict(list)
17121712
order = []
1713-
columns, pairs = list(zip(*kwargs.items()))
1713+
columns = tuple(kwargs.keys())
17141714

1715-
for column, aggfunc in pairs:
1715+
for column, aggfunc in kwargs.values():
17161716
aggspec[column].append(aggfunc)
17171717
order.append((column, com.get_callable_name(aggfunc) or aggfunc))
17181718

pandas/core/frame.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
from pandas.util._decorators import (
6565
Appender,
6666
Substitution,
67+
deprecate_nonkeyword_arguments,
6768
doc,
6869
set_module,
6970
)
@@ -6167,12 +6168,13 @@ class max type
61676168
names = self.index._get_default_index_names(names, default)
61686169

61696170
if isinstance(self.index, MultiIndex):
6170-
to_insert = zip(self.index.levels, self.index.codes)
6171+
to_insert = zip(reversed(self.index.levels), reversed(self.index.codes))
61716172
else:
61726173
to_insert = ((self.index, None),)
61736174

61746175
multi_col = isinstance(self.columns, MultiIndex)
6175-
for i, (lev, lab) in reversed(list(enumerate(to_insert))):
6176+
for j, (lev, lab) in enumerate(to_insert, start=1):
6177+
i = self.index.nlevels - j
61766178
if level is not None and i not in level:
61776179
continue
61786180
name = names[i]
@@ -11543,6 +11545,7 @@ def all(
1154311545
**kwargs,
1154411546
) -> Series | bool: ...
1154511547

11548+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all")
1154611549
@doc(make_doc("all", ndim=1))
1154711550
def all(
1154811551
self,
@@ -11589,6 +11592,7 @@ def min(
1158911592
**kwargs,
1159011593
) -> Series | Any: ...
1159111594

11595+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min")
1159211596
@doc(make_doc("min", ndim=2))
1159311597
def min(
1159411598
self,
@@ -11635,6 +11639,7 @@ def max(
1163511639
**kwargs,
1163611640
) -> Series | Any: ...
1163711641

11642+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max")
1163811643
@doc(make_doc("max", ndim=2))
1163911644
def max(
1164011645
self,
@@ -11650,6 +11655,7 @@ def max(
1165011655
result = result.__finalize__(self, method="max")
1165111656
return result
1165211657

11658+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum")
1165311659
@doc(make_doc("sum", ndim=2))
1165411660
def sum(
1165511661
self,
@@ -11670,6 +11676,7 @@ def sum(
1167011676
result = result.__finalize__(self, method="sum")
1167111677
return result
1167211678

11679+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod")
1167311680
@doc(make_doc("prod", ndim=2))
1167411681
def prod(
1167511682
self,
@@ -11721,6 +11728,7 @@ def mean(
1172111728
**kwargs,
1172211729
) -> Series | Any: ...
1172311730

11731+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean")
1172411732
@doc(make_doc("mean", ndim=2))
1172511733
def mean(
1172611734
self,
@@ -11767,6 +11775,7 @@ def median(
1176711775
**kwargs,
1176811776
) -> Series | Any: ...
1176911777

11778+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median")
1177011779
@doc(make_doc("median", ndim=2))
1177111780
def median(
1177211781
self,
@@ -11816,6 +11825,7 @@ def sem(
1181611825
**kwargs,
1181711826
) -> Series | Any: ...
1181811827

11828+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
1181911829
@doc(make_doc("sem", ndim=2))
1182011830
def sem(
1182111831
self,
@@ -11866,6 +11876,7 @@ def var(
1186611876
**kwargs,
1186711877
) -> Series | Any: ...
1186811878

11879+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var")
1186911880
@doc(make_doc("var", ndim=2))
1187011881
def var(
1187111882
self,
@@ -11916,6 +11927,7 @@ def std(
1191611927
**kwargs,
1191711928
) -> Series | Any: ...
1191811929

11930+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std")
1191911931
@doc(make_doc("std", ndim=2))
1192011932
def std(
1192111933
self,
@@ -11963,6 +11975,7 @@ def skew(
1196311975
**kwargs,
1196411976
) -> Series | Any: ...
1196511977

11978+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew")
1196611979
@doc(make_doc("skew", ndim=2))
1196711980
def skew(
1196811981
self,
@@ -12009,6 +12022,7 @@ def kurt(
1200912022
**kwargs,
1201012023
) -> Series | Any: ...
1201112024

12025+
@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt")
1201212026
@doc(make_doc("kurt", ndim=2))
1201312027
def kurt(
1201412028
self,

pandas/core/groupby/grouper.py

-5
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,6 @@ def __init__(
263263
self.sort = sort
264264
self.dropna = dropna
265265

266-
self._grouper_deprecated = None
267266
self._indexer_deprecated: npt.NDArray[np.intp] | None = None
268267
self.binner = None
269268
self._grouper = None
@@ -292,10 +291,6 @@ def _get_grouper(
292291
validate=validate,
293292
dropna=self.dropna,
294293
)
295-
# Without setting this, subsequent lookups to .groups raise
296-
# error: Incompatible types in assignment (expression has type "BaseGrouper",
297-
# variable has type "None")
298-
self._grouper_deprecated = grouper # type: ignore[assignment]
299294

300295
return grouper, obj
301296

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ def groups(self) -> dict[Hashable, Index]:
706706
return self.groupings[0].groups
707707
result_index, ids = self.result_index_and_ids
708708
values = result_index._values
709-
categories = Categorical(ids, categories=np.arange(len(result_index)))
709+
categories = Categorical(ids, categories=range(len(result_index)))
710710
result = {
711711
# mypy is not aware that group has to be an integer
712712
values[group]: self.axis.take(axis_ilocs) # type: ignore[call-overload]

0 commit comments

Comments
 (0)