Skip to content

Commit df231f0

Browse files
committed
Merge remote-tracking branch 'upstream/main' into string_array_numpy_semantics
# Conflicts: # pandas/core/arrays/_arrow_string_mixins.py # pandas/tests/strings/__init__.py # pandas/tests/strings/test_strings.py
2 parents 3188c25 + fc30823 commit df231f0

File tree

12 files changed

+361
-20
lines changed

12 files changed

+361
-20
lines changed

.github/workflows/code-checks.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ on:
44
push:
55
branches:
66
- main
7-
- 2.0.x
7+
- 2.1.x
88
pull_request:
99
branches:
1010
- main
11-
- 2.0.x
11+
- 2.1.x
1212

1313
env:
1414
ENV_FILE: environment.yml

.github/workflows/docbuild-and-upload.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ on:
44
push:
55
branches:
66
- main
7-
- 2.0.x
7+
- 2.1.x
88
tags:
99
- '*'
1010
pull_request:
1111
branches:
1212
- main
13-
- 2.0.x
13+
- 2.1.x
1414

1515
env:
1616
ENV_FILE: environment.yml

.github/workflows/package-checks.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ on:
44
push:
55
branches:
66
- main
7-
- 2.0.x
7+
- 2.1.x
88
pull_request:
99
branches:
1010
- main
11-
- 2.0.x
11+
- 2.1.x
1212
types: [ labeled, opened, synchronize, reopened ]
1313

1414
permissions:

.github/workflows/unit-tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ on:
44
push:
55
branches:
66
- main
7-
- 2.0.x
7+
- 2.1.x
88
pull_request:
99
branches:
1010
- main
11-
- 2.0.x
11+
- 2.1.x
1212
paths-ignore:
1313
- "doc/**"
1414
- "web/**"

doc/source/whatsnew/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ This is the list of changes to pandas between each release. For full details,
1010
see the `commit logs <https://github.com/pandas-dev/pandas/commits/>`_. For install and
1111
upgrade instructions, see :ref:`install`.
1212

13+
Version 2.2
14+
-----------
15+
16+
.. toctree::
17+
:maxdepth: 2
18+
19+
v2.2.0
20+
1321
Version 2.1
1422
-----------
1523

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ Performance improvements
614614
- :class:`Period`'s default formatter (`period_format`) is now significantly (~twice) faster. This improves performance of ``str(Period)``, ``repr(Period)``, and :meth:`Period.strftime(fmt=None)`, as well as ``PeriodArray.strftime(fmt=None)``, ``PeriodIndex.strftime(fmt=None)`` and ``PeriodIndex.format(fmt=None)``. Finally, ``to_csv`` operations involving :class:`PeriodArray` or :class:`PeriodIndex` with default ``date_format`` are also significantly accelerated. (:issue:`51459`)
615615
- Performance improvement accessing :attr:`arrays.IntegerArrays.dtype` & :attr:`arrays.FloatingArray.dtype` (:issue:`52998`)
616616
- Performance improvement for :class:`DataFrameGroupBy`/:class:`SeriesGroupBy` aggregations (e.g. :meth:`DataFrameGroupBy.sum`) with ``engine="numba"`` (:issue:`53731`)
617+
- Performance improvement in :class:`DataFrame` reductions with ``axis=1`` and extension dtypes (:issue:`54341`)
617618
- Performance improvement in :class:`DataFrame` reductions with ``axis=None`` and extension dtypes (:issue:`54308`)
618619
- Performance improvement in :class:`MultiIndex` and multi-column operations (e.g. :meth:`DataFrame.sort_values`, :meth:`DataFrame.groupby`, :meth:`Series.unstack`) when index/column values are already sorted (:issue:`53806`)
619620
- Performance improvement in :class:`Series` reductions (:issue:`52341`)

doc/source/whatsnew/v2.2.0.rst

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
.. _whatsnew_220:
2+
3+
What's new in 2.2.0 (Month XX, 2024)
4+
------------------------------------
5+
6+
These are the changes in pandas 2.2.0. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
.. _whatsnew_220.enhancements:
13+
14+
Enhancements
15+
~~~~~~~~~~~~
16+
17+
.. _whatsnew_220.enhancements.enhancement1:
18+
19+
enhancement1
20+
^^^^^^^^^^^^
21+
22+
.. _whatsnew_220.enhancements.enhancement2:
23+
24+
enhancement2
25+
^^^^^^^^^^^^
26+
27+
.. _whatsnew_220.enhancements.other:
28+
29+
Other enhancements
30+
^^^^^^^^^^^^^^^^^^
31+
-
32+
-
33+
34+
.. ---------------------------------------------------------------------------
35+
.. _whatsnew_220.notable_bug_fixes:
36+
37+
Notable bug fixes
38+
~~~~~~~~~~~~~~~~~
39+
40+
These are bug fixes that might have notable behavior changes.
41+
42+
.. _whatsnew_220.notable_bug_fixes.notable_bug_fix1:
43+
44+
notable_bug_fix1
45+
^^^^^^^^^^^^^^^^
46+
47+
.. _whatsnew_220.notable_bug_fixes.notable_bug_fix2:
48+
49+
notable_bug_fix2
50+
^^^^^^^^^^^^^^^^
51+
52+
.. ---------------------------------------------------------------------------
53+
.. _whatsnew_220.api_breaking:
54+
55+
Backwards incompatible API changes
56+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57+
58+
.. _whatsnew_220.api_breaking.deps:
59+
60+
Increased minimum versions for dependencies
61+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
62+
Some minimum supported versions of dependencies were updated.
63+
If installed, we now require:
64+
65+
+-----------------+-----------------+----------+---------+
66+
| Package | Minimum Version | Required | Changed |
67+
+=================+=================+==========+=========+
68+
| | | X | X |
69+
+-----------------+-----------------+----------+---------+
70+
71+
For `optional libraries <https://pandas.pydata.org/docs/getting_started/install.html>`_ the general recommendation is to use the latest version.
72+
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
73+
Optional libraries below the lowest tested version may still work, but are not considered supported.
74+
75+
+-----------------+-----------------+---------+
76+
| Package | Minimum Version | Changed |
77+
+=================+=================+=========+
78+
| | | X |
79+
+-----------------+-----------------+---------+
80+
81+
See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
82+
83+
.. _whatsnew_220.api_breaking.other:
84+
85+
Other API changes
86+
^^^^^^^^^^^^^^^^^
87+
-
88+
-
89+
90+
.. ---------------------------------------------------------------------------
91+
.. _whatsnew_220.deprecations:
92+
93+
Deprecations
94+
~~~~~~~~~~~~
95+
-
96+
-
97+
98+
.. ---------------------------------------------------------------------------
99+
.. _whatsnew_220.performance:
100+
101+
Performance improvements
102+
~~~~~~~~~~~~~~~~~~~~~~~~
103+
-
104+
-
105+
106+
.. ---------------------------------------------------------------------------
107+
.. _whatsnew_220.bug_fixes:
108+
109+
Bug fixes
110+
~~~~~~~~~
111+
112+
Categorical
113+
^^^^^^^^^^^
114+
-
115+
-
116+
117+
Datetimelike
118+
^^^^^^^^^^^^
119+
-
120+
-
121+
122+
Timedelta
123+
^^^^^^^^^
124+
-
125+
-
126+
127+
Timezones
128+
^^^^^^^^^
129+
-
130+
-
131+
132+
Numeric
133+
^^^^^^^
134+
-
135+
-
136+
137+
Conversion
138+
^^^^^^^^^^
139+
-
140+
-
141+
142+
Strings
143+
^^^^^^^
144+
-
145+
-
146+
147+
Interval
148+
^^^^^^^^
149+
-
150+
-
151+
152+
Indexing
153+
^^^^^^^^
154+
-
155+
-
156+
157+
Missing
158+
^^^^^^^
159+
-
160+
-
161+
162+
MultiIndex
163+
^^^^^^^^^^
164+
-
165+
-
166+
167+
I/O
168+
^^^
169+
-
170+
-
171+
172+
Period
173+
^^^^^^
174+
-
175+
-
176+
177+
Plotting
178+
^^^^^^^^
179+
-
180+
-
181+
182+
Groupby/resample/rolling
183+
^^^^^^^^^^^^^^^^^^^^^^^^
184+
-
185+
-
186+
187+
Reshaping
188+
^^^^^^^^^
189+
-
190+
-
191+
192+
Sparse
193+
^^^^^^
194+
-
195+
-
196+
197+
ExtensionArray
198+
^^^^^^^^^^^^^^
199+
-
200+
-
201+
202+
Styler
203+
^^^^^^
204+
-
205+
-
206+
207+
Other
208+
^^^^^
209+
210+
.. ***DO NOT USE THIS SECTION***
211+
212+
-
213+
-
214+
215+
.. ---------------------------------------------------------------------------
216+
.. _whatsnew_220.contributors:
217+
218+
Contributors
219+
~~~~~~~~~~~~

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212

1313

1414
class ArrowStringArrayMixin:
15+
_pa_array = None
16+
17+
def __init__(self, *args, **kwargs) -> None:
18+
raise NotImplementedError
19+
1520
def _str_pad(
1621
self,
1722
width: int,
@@ -46,7 +51,9 @@ def _str_get(self, i: int):
4651
selected = pc.utf8_slice_codeunits(
4752
self._pa_array, start=start, stop=stop, step=step
4853
)
49-
null_value = pa.scalar(None, type=self._pa_array.type)
54+
null_value = pa.scalar(
55+
None, type=self._pa_array.type # type: ignore[attr-defined]
56+
)
5057
result = pc.if_else(not_out_of_bounds, selected, null_value)
5158
return type(self)(result)
5259

pandas/core/arrays/arrow/array.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,12 +1024,11 @@ def factorize(
10241024
) -> tuple[np.ndarray, ExtensionArray]:
10251025
null_encoding = "mask" if use_na_sentinel else "encode"
10261026

1027-
pa_type = self._pa_array.type
1028-
if pa.types.is_duration(pa_type):
1027+
data = self._pa_array
1028+
pa_type = data.type
1029+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
10291030
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
1030-
data = self._pa_array.cast(pa.int64())
1031-
else:
1032-
data = self._pa_array
1031+
data = data.cast(pa.int64())
10331032

10341033
if pa.types.is_dictionary(data.type):
10351034
encoded = data
@@ -1047,7 +1046,7 @@ def factorize(
10471046
)
10481047
uniques = type(self)(encoded.chunk(0).dictionary)
10491048

1050-
if pa.types.is_duration(pa_type):
1049+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
10511050
uniques = cast(ArrowExtensionArray, uniques.astype(self.dtype))
10521051
return indices, uniques
10531052

@@ -1286,15 +1285,15 @@ def unique(self) -> Self:
12861285
"""
12871286
pa_type = self._pa_array.type
12881287

1289-
if pa.types.is_duration(pa_type):
1288+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
12901289
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
12911290
data = self._pa_array.cast(pa.int64())
12921291
else:
12931292
data = self._pa_array
12941293

12951294
pa_result = pc.unique(data)
12961295

1297-
if pa.types.is_duration(pa_type):
1296+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
12981297
pa_result = pa_result.cast(pa_type)
12991298

13001299
return type(self)(pa_result)
@@ -1317,7 +1316,7 @@ def value_counts(self, dropna: bool = True) -> Series:
13171316
Series.value_counts
13181317
"""
13191318
pa_type = self._pa_array.type
1320-
if pa.types.is_duration(pa_type):
1319+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
13211320
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
13221321
data = self._pa_array.cast(pa.int64())
13231322
else:
@@ -1337,7 +1336,7 @@ def value_counts(self, dropna: bool = True) -> Series:
13371336
values = values.filter(mask)
13381337
counts = counts.filter(mask)
13391338

1340-
if pa.types.is_duration(pa_type):
1339+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
13411340
values = values.cast(pa_type)
13421341

13431342
counts = ArrowExtensionArray(counts)

0 commit comments

Comments
 (0)