Skip to content

Commit c781b59

Browse files
committed
Merge remote-tracking branch 'upstream/2.3.x' into remove-read_json-datetime-deprecation-warning
2 parents 51ad07d + a24a653 commit c781b59

File tree

302 files changed

+4521
-2373
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

302 files changed

+4521
-2373
lines changed

.github/actions/setup-conda/action.yml

+6
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,9 @@ runs:
1616
condarc-file: ci/.condarc
1717
cache-environment: true
1818
cache-downloads: true
19+
20+
- name: Uninstall pyarrow
21+
if: ${{ env.REMOVE_PYARROW == '1' }}
22+
run: |
23+
micromamba remove -y pyarrow
24+
shell: bash -el {0}

.github/workflows/unit-tests.yml

+13-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
3030
# Prevent the include jobs from overriding other jobs
3131
pattern: [""]
32+
pandas_future_infer_string: ["0"]
3233
include:
3334
- name: "Downstream Compat"
3435
env_file: actions-311-downstream_compat.yaml
@@ -85,6 +86,14 @@ jobs:
8586
env_file: actions-39.yaml
8687
pattern: "not slow and not network and not single_cpu"
8788
pandas_copy_on_write: "warn"
89+
- name: "Future infer strings"
90+
env_file: actions-312.yaml
91+
pandas_future_infer_string: "1"
92+
pandas_copy_on_write: "1"
93+
- name: "Future infer strings (without pyarrow)"
94+
env_file: actions-311.yaml
95+
pandas_future_infer_string: "1"
96+
pandas_copy_on_write: "1"
8897
- name: "Pypy"
8998
env_file: actions-pypy-39.yaml
9099
pattern: "not slow and not network and not single_cpu"
@@ -103,16 +112,18 @@ jobs:
103112
LANG: ${{ matrix.lang || 'C.UTF-8' }}
104113
LC_ALL: ${{ matrix.lc_all || '' }}
105114
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
106-
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
115+
PANDAS_CI: '1'
116+
PANDAS_FUTURE_INFER_STRING: ${{ matrix.pandas_future_infer_string || '0' }}
107117
TEST_ARGS: ${{ matrix.test_args || '' }}
108118
PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
109119
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
110120
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
111121
# Clipboard tests
112122
QT_QPA_PLATFORM: offscreen
123+
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
113124
concurrency:
114125
# https://github.community/t/concurrecy-not-work-for-push/183068/7
115-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
126+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}-${{ matrix.pandas_future_infer_string }}
116127
cancel-in-progress: true
117128

118129
services:

.github/workflows/wheels.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ jobs:
150150
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
151151

152152
- name: Build wheels
153-
uses: pypa/cibuildwheel@v2.20.0
153+
uses: pypa/cibuildwheel@v2.21.0
154154
with:
155155
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
156156
env:

.pre-commit-config.yaml

-7
Original file line numberDiff line numberDiff line change
@@ -274,13 +274,6 @@ repos:
274274
language: python
275275
types: [rst]
276276
files: ^doc/source/(development|reference)/
277-
- id: unwanted-patterns-bare-pytest-raises
278-
name: Check for use of bare pytest raises
279-
language: python
280-
entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
281-
types: [python]
282-
files: ^pandas/tests/
283-
exclude: ^pandas/tests/extension/
284277
- id: unwanted-patterns-private-function-across-module
285278
name: Check for use of private functions across modules
286279
language: python

doc/source/whatsnew/index.rst

+8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ This is the list of changes to pandas between each release. For full details,
1010
see the `commit logs <https://github.com/pandas-dev/pandas/commits/>`_. For install and
1111
upgrade instructions, see :ref:`install`.
1212

13+
Version 2.3
14+
-----------
15+
16+
.. toctree::
17+
:maxdepth: 2
18+
19+
v2.3.0
20+
1321
Version 2.2
1422
-----------
1523

doc/source/whatsnew/v2.3.0.rst

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
.. _whatsnew_230:
2+
3+
What's new in 2.3.0 (Month XX, 2024)
4+
------------------------------------
5+
6+
These are the changes in pandas 2.3.0. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
13+
.. _whatsnew_230.upcoming_changes:
14+
15+
Upcoming changes in pandas 3.0
16+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
17+
18+
19+
.. _whatsnew_230.enhancements:
20+
21+
Enhancements
22+
~~~~~~~~~~~~
23+
24+
.. _whatsnew_230.enhancements.enhancement1:
25+
26+
enhancement1
27+
^^^^^^^^^^^^
28+
29+
30+
.. _whatsnew_230.enhancements.other:
31+
32+
Other enhancements
33+
^^^^^^^^^^^^^^^^^^
34+
35+
-
36+
-
37+
38+
.. ---------------------------------------------------------------------------
39+
.. _whatsnew_230.notable_bug_fixes:
40+
41+
Notable bug fixes
42+
~~~~~~~~~~~~~~~~~
43+
44+
These are bug fixes that might have notable behavior changes.
45+
46+
.. _whatsnew_230.notable_bug_fixes.notable_bug_fix1:
47+
48+
notable_bug_fix1
49+
^^^^^^^^^^^^^^^^
50+
51+
.. ---------------------------------------------------------------------------
52+
.. _whatsnew_230.deprecations:
53+
54+
Deprecations
55+
~~~~~~~~~~~~
56+
- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
57+
-
58+
59+
.. ---------------------------------------------------------------------------
60+
.. _whatsnew_230.performance:
61+
62+
Performance improvements
63+
~~~~~~~~~~~~~~~~~~~~~~~~
64+
-
65+
-
66+
67+
.. ---------------------------------------------------------------------------
68+
.. _whatsnew_230.bug_fixes:
69+
70+
Bug fixes
71+
~~~~~~~~~
72+
73+
Categorical
74+
^^^^^^^^^^^
75+
-
76+
-
77+
78+
Datetimelike
79+
^^^^^^^^^^^^
80+
-
81+
-
82+
83+
Timedelta
84+
^^^^^^^^^
85+
-
86+
-
87+
88+
Timezones
89+
^^^^^^^^^
90+
-
91+
-
92+
93+
Numeric
94+
^^^^^^^
95+
-
96+
-
97+
98+
Conversion
99+
^^^^^^^^^^
100+
-
101+
-
102+
103+
Strings
104+
^^^^^^^
105+
- Bug in :meth:`Series.rank` for :class:`StringDtype` with ``storage="pyarrow"`` incorrectly returning integer results in case of ``method="average"`` and raising an error if it would truncate results (:issue:`59768`)
106+
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
107+
- Bug in ``ser.str.slice`` with negative ``step`` with :class:`ArrowDtype` and :class:`StringDtype` with ``storage="pyarrow"`` giving incorrect results (:issue:`59710`)
108+
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
109+
-
110+
111+
Interval
112+
^^^^^^^^
113+
-
114+
-
115+
116+
Indexing
117+
^^^^^^^^
118+
-
119+
-
120+
121+
Missing
122+
^^^^^^^
123+
-
124+
-
125+
126+
MultiIndex
127+
^^^^^^^^^^
128+
-
129+
-
130+
131+
I/O
132+
^^^
133+
-
134+
-
135+
136+
Period
137+
^^^^^^
138+
-
139+
-
140+
141+
Plotting
142+
^^^^^^^^
143+
-
144+
-
145+
146+
Groupby/resample/rolling
147+
^^^^^^^^^^^^^^^^^^^^^^^^
148+
-
149+
-
150+
151+
Reshaping
152+
^^^^^^^^^
153+
-
154+
-
155+
156+
Sparse
157+
^^^^^^
158+
-
159+
-
160+
161+
ExtensionArray
162+
^^^^^^^^^^^^^^
163+
-
164+
-
165+
166+
Styler
167+
^^^^^^
168+
-
169+
-
170+
171+
Other
172+
^^^^^
173+
-
174+
-
175+
176+
.. ---------------------------------------------------------------------------
177+
.. _whatsnew_230.contributors:
178+
179+
Contributors
180+
~~~~~~~~~~~~

pandas/_config/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,6 @@ def using_nullable_dtypes() -> bool:
5252
return _mode_options["nullable_dtypes"]
5353

5454

55-
def using_pyarrow_string_dtype() -> bool:
55+
def using_string_dtype() -> bool:
5656
_mode_options = _global_config["future"]
5757
return _mode_options["infer_string"]

pandas/_libs/arrays.pyx

+4
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ cdef class NDArrayBacked:
6767
"""
6868
Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
6969
70+
The returned array has the same dtype as self.
71+
72+
Caller is responsible for ensuring `values.dtype == self._ndarray.dtype`.
73+
7074
This should round-trip:
7175
self == self._from_backing_data(self._ndarray)
7276
"""

pandas/_libs/hashtable.pyx

+4-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ from pandas._libs.khash cimport (
3333
kh_python_hash_func,
3434
khiter_t,
3535
)
36-
from pandas._libs.missing cimport checknull
36+
from pandas._libs.missing cimport (
37+
checknull,
38+
is_matching_na,
39+
)
3740

3841

3942
def get_hashtable_trace_domain():

pandas/_libs/hashtable_class_helper.pxi.in

+15-3
Original file line numberDiff line numberDiff line change
@@ -1121,11 +1121,13 @@ cdef class StringHashTable(HashTable):
11211121
const char **vecs
11221122
khiter_t k
11231123
bint use_na_value
1124+
bint non_null_na_value
11241125

11251126
if return_inverse:
11261127
labels = np.zeros(n, dtype=np.intp)
11271128
uindexer = np.empty(n, dtype=np.int64)
11281129
use_na_value = na_value is not None
1130+
non_null_na_value = not checknull(na_value)
11291131

11301132
# assign pointers and pre-filter out missing (if ignore_na)
11311133
vecs = <const char **>malloc(n * sizeof(char *))
@@ -1134,7 +1136,12 @@ cdef class StringHashTable(HashTable):
11341136

11351137
if (ignore_na
11361138
and (not isinstance(val, str)
1137-
or (use_na_value and val == na_value))):
1139+
or (use_na_value and (
1140+
(non_null_na_value and val == na_value) or
1141+
(not non_null_na_value and is_matching_na(val, na_value)))
1142+
)
1143+
)
1144+
):
11381145
# if missing values do not count as unique values (i.e. if
11391146
# ignore_na is True), we can skip the actual value, and
11401147
# replace the label with na_sentinel directly
@@ -1400,18 +1407,23 @@ cdef class PyObjectHashTable(HashTable):
14001407
object val
14011408
khiter_t k
14021409
bint use_na_value
1403-
1410+
bint non_null_na_value
14041411
if return_inverse:
14051412
labels = np.empty(n, dtype=np.intp)
14061413
use_na_value = na_value is not None
1414+
non_null_na_value = not checknull(na_value)
14071415

14081416
for i in range(n):
14091417
val = values[i]
14101418
hash(val)
14111419

14121420
if ignore_na and (
14131421
checknull(val)
1414-
or (use_na_value and val == na_value)
1422+
or (use_na_value and (
1423+
(non_null_na_value and val == na_value) or
1424+
(not non_null_na_value and is_matching_na(val, na_value))
1425+
)
1426+
)
14151427
):
14161428
# if missing values do not count as unique values (i.e. if
14171429
# ignore_na is True), skip the hashtable entry for them, and

0 commit comments

Comments
 (0)