Skip to content

Commit 54e3d02

Browse files
authored
Merge branch 'main' into issue#26195
2 parents 9c231d0 + 0e8c730 commit 54e3d02

File tree

204 files changed

+3629
-1356
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

204 files changed

+3629
-1356
lines changed

.circleci/config.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ jobs:
4747
- run:
4848
name: Build aarch64 wheels
4949
command: |
50-
pip3 install cibuildwheel==2.12.1
51-
cibuildwheel --output-dir wheelhouse
50+
pip3 install cibuildwheel==2.14.1
51+
cibuildwheel --prerelease-pythons --output-dir wheelhouse
5252
environment:
5353
CIBW_BUILD: << parameters.cibw-build >>
5454

@@ -91,4 +91,5 @@ workflows:
9191
only: /^v.*/
9292
matrix:
9393
parameters:
94-
cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]
94+
# TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
95+
cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]#, "cp312-manylinux_aarch64"]

.github/CODEOWNERS

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ web/ @datapythonista
99

1010
# docs
1111
doc/cheatsheet @Dr-Irv
12+
doc/source/development @noatamir
1213

1314
# pandas
1415
pandas/_libs/ @WillAyd

.github/workflows/unit-tests.yml

+16-11
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ jobs:
103103

104104
services:
105105
mysql:
106-
image: mysql
106+
image: mysql:8.0.33
107107
env:
108108
MYSQL_ALLOW_EMPTY_PASSWORD: yes
109109
MYSQL_DATABASE: pandas
@@ -116,8 +116,9 @@ jobs:
116116
- 3306:3306
117117

118118
postgres:
119-
image: postgres
119+
image: postgres:13
120120
env:
121+
PGUSER: postgres
121122
POSTGRES_USER: postgres
122123
POSTGRES_PASSWORD: postgres
123124
POSTGRES_DB: pandas
@@ -130,7 +131,7 @@ jobs:
130131
- 5432:5432
131132

132133
moto:
133-
image: motoserver/moto:4.1.12
134+
image: motoserver/moto:4.1.13
134135
env:
135136
AWS_ACCESS_KEY_ID: foobar_key
136137
AWS_SECRET_ACCESS_KEY: foobar_secret
@@ -237,7 +238,7 @@ jobs:
237238
run: |
238239
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
239240
. ~/virtualenvs/pandas-dev/bin/activate
240-
python -m pip install -U pip wheel setuptools meson[ninja]==1.0.1 meson-python==0.13.1
241+
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.0.1 meson-python==0.13.1
241242
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
242243
python -m pip install --no-cache-dir --no-build-isolation -e .
243244
python -m pip list --no-cache-dir
@@ -275,7 +276,7 @@ jobs:
275276
run: |
276277
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
277278
. ~/virtualenvs/pandas-dev/bin/activate
278-
python -m pip install -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.0.1
279+
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.0.1
279280
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
280281
python -m pip install --no-cache-dir --no-build-isolation -e .
281282
python -m pip list --no-cache-dir
@@ -310,12 +311,16 @@ jobs:
310311
# To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
311312
# to the corresponding posix/windows-macos/sdist etc. workflows.
312313
# Feel free to modify this comment as necessary.
313-
if: false # Uncomment this to freeze the workflow, comment it to unfreeze
314+
#if: false # Uncomment this to freeze the workflow, comment it to unfreeze
314315
runs-on: ${{ matrix.os }}
315316
strategy:
316317
fail-fast: false
317318
matrix:
318-
os: [ubuntu-22.04, macOS-latest, windows-latest]
319+
# TODO: Disable macOS for now, Github Actions bug where python is not
320+
# symlinked correctly to 3.12
321+
# xref https://github.com/actions/setup-python/issues/701
322+
#os: [ubuntu-22.04, macOS-latest, windows-latest]
323+
os: [ubuntu-22.04, windows-latest]
319324

320325
timeout-minutes: 180
321326

@@ -339,21 +344,21 @@ jobs:
339344
- name: Set up Python Dev Version
340345
uses: actions/setup-python@v4
341346
with:
342-
python-version: '3.11-dev'
347+
python-version: '3.12-dev'
343348

344349
- name: Install dependencies
345350
run: |
346351
python --version
347-
python -m pip install --upgrade pip setuptools wheel
352+
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.0.1 meson-python==0.13.1
348353
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
349354
python -m pip install git+https://github.com/nedbat/coveragepy.git
350355
python -m pip install versioneer[toml]
351-
python -m pip install python-dateutil pytz cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
356+
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
352357
python -m pip list
353358
354359
- name: Build Pandas
355360
run: |
356-
python -m pip install -e . --no-build-isolation --no-index
361+
python -m pip install -ve . --no-build-isolation --no-index
357362
358363
- name: Build Version
359364
run: |

.github/workflows/wheels.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ jobs:
9393
- [macos-12, macosx_*]
9494
- [windows-2022, win_amd64]
9595
# TODO: support PyPy?
96-
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]
96+
# TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
97+
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]#, ["cp312", "3.12"]]
9798
env:
9899
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
99100
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -117,6 +118,7 @@ jobs:
117118
#with:
118119
# package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
119120
env:
121+
CIBW_PRERELEASE_PYTHONS: True
120122
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
121123

122124
- name: Set up Python

CITATION.cff

+4
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@ title: 'pandas-dev/pandas: Pandas'
33
message: 'If you use this software, please cite it as below.'
44
authors:
55
- name: "The pandas development team"
6+
abstract: "Pandas is a powerful data structures for data analysis, time series, and statistics."
67
license: BSD-3-Clause
78
license-url: "https://github.com/pandas-dev/pandas/blob/main/LICENSE"
89
repository-code: "https://github.com/pandas-dev/pandas"
10+
keywords:
11+
- python
12+
- data science
913
type: software
1014
url: "https://github.com/pandas-dev/pandas"

asv_bench/benchmarks/frame_methods.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -565,10 +565,7 @@ def time_frame_object_unequal(self):
565565

566566

567567
class Interpolate:
568-
params = [None, "infer"]
569-
param_names = ["downcast"]
570-
571-
def setup(self, downcast):
568+
def setup(self):
572569
N = 10000
573570
# this is the worst case, where every column has NaNs.
574571
arr = np.random.randn(N, 100)
@@ -589,11 +586,11 @@ def setup(self, downcast):
589586
self.df2.loc[1::5, "A"] = np.nan
590587
self.df2.loc[1::5, "C"] = np.nan
591588

592-
def time_interpolate(self, downcast):
593-
self.df.interpolate(downcast=downcast)
589+
def time_interpolate(self):
590+
self.df.interpolate()
594591

595-
def time_interpolate_some_good(self, downcast):
596-
self.df2.interpolate(downcast=downcast)
592+
def time_interpolate_some_good(self):
593+
self.df2.interpolate()
597594

598595

599596
class Shift:

asv_bench/benchmarks/io/csv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def setup(self, sep, thousands, engine):
341341
if thousands is not None:
342342
fmt = f":{thousands}"
343343
fmt = "{" + fmt + "}"
344-
df = df.applymap(lambda x: fmt.format(x))
344+
df = df.map(lambda x: fmt.format(x))
345345
df.to_csv(self.fname, sep=sep)
346346

347347
def time_thousands(self, sep, thousands, engine):

asv_bench/benchmarks/io/excel.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ def time_write_excel_style(self, engine):
5757
bio.seek(0)
5858
with ExcelWriter(bio, engine=engine) as writer:
5959
df_style = self.df.style
60-
df_style.applymap(lambda x: "border: red 1px solid;")
61-
df_style.applymap(lambda x: "color: blue")
62-
df_style.applymap(lambda x: "border-color: green black", subset=["float1"])
60+
df_style.map(lambda x: "border: red 1px solid;")
61+
df_style.map(lambda x: "color: blue")
62+
df_style.map(lambda x: "border-color: green black", subset=["float1"])
6363
df_style.to_excel(writer, sheet_name="Sheet1")
6464

6565

asv_bench/benchmarks/io/style.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def _apply_func(s):
6666
self.st = self.df.style.apply(_apply_func, axis=1)
6767

6868
def _style_classes(self):
69-
classes = self.df.applymap(lambda v: ("cls-1" if v > 0 else ""))
69+
classes = self.df.map(lambda v: ("cls-1" if v > 0 else ""))
7070
classes.index, classes.columns = self.df.index, self.df.columns
7171
self.st = self.df.style.set_td_classes(classes)
7272

@@ -80,7 +80,7 @@ def _style_format(self):
8080
)
8181

8282
def _style_apply_format_hide(self):
83-
self.st = self.df.style.applymap(lambda v: "color: red;")
83+
self.st = self.df.style.map(lambda v: "color: red;")
8484
self.st.format("{:.3f}")
8585
self.st.hide(self.st.index[1:], axis=0)
8686
self.st.hide(self.st.columns[1:], axis=1)

ci/code_checks.sh

-48
Original file line numberDiff line numberDiff line change
@@ -63,74 +63,26 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
6363

6464
MSG='Partially validate docstrings (EX01)' ; echo $MSG
6565
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
66-
pandas.errors.IncompatibilityWarning \
67-
pandas.errors.InvalidComparison \
68-
pandas.errors.IntCastingNaNError \
69-
pandas.errors.LossySetitemError \
70-
pandas.errors.MergeError \
71-
pandas.errors.NoBufferPresent \
72-
pandas.errors.NullFrequencyError \
73-
pandas.errors.NumbaUtilError \
74-
pandas.errors.OptionError \
75-
pandas.errors.OutOfBoundsDatetime \
76-
pandas.errors.OutOfBoundsTimedelta \
77-
pandas.errors.ParserError \
7866
pandas.errors.PerformanceWarning \
7967
pandas.errors.PyperclipException \
8068
pandas.errors.PyperclipWindowsException \
8169
pandas.errors.UnsortedIndexError \
8270
pandas.errors.UnsupportedFunctionCall \
83-
pandas.test \
8471
pandas.NaT \
85-
pandas.read_feather \
86-
pandas.DataFrame.to_feather \
87-
pandas.read_orc \
88-
pandas.read_sas \
89-
pandas.read_spss \
90-
pandas.read_sql_query \
9172
pandas.io.stata.StataReader.data_label \
9273
pandas.io.stata.StataReader.value_labels \
9374
pandas.io.stata.StataReader.variable_labels \
9475
pandas.io.stata.StataWriter.write_file \
9576
pandas.plotting.deregister_matplotlib_converters \
96-
pandas.plotting.plot_params \
9777
pandas.plotting.register_matplotlib_converters \
98-
pandas.plotting.table \
99-
pandas.util.hash_array \
100-
pandas.util.hash_pandas_object \
101-
pandas_object \
102-
pandas.api.interchange.from_dataframe \
103-
pandas.DatetimeIndex.snap \
104-
pandas.api.indexers.BaseIndexer \
105-
pandas.api.indexers.VariableOffsetWindowIndexer \
10678
pandas.api.extensions.ExtensionDtype \
10779
pandas.api.extensions.ExtensionArray \
10880
pandas.arrays.NumpyExtensionArray \
109-
pandas.api.extensions.ExtensionArray._accumulate \
110-
pandas.api.extensions.ExtensionArray._concat_same_type \
111-
pandas.api.extensions.ExtensionArray._formatter \
112-
pandas.api.extensions.ExtensionArray._from_factorized \
113-
pandas.api.extensions.ExtensionArray._from_sequence \
11481
pandas.api.extensions.ExtensionArray._from_sequence_of_strings \
11582
pandas.api.extensions.ExtensionArray._hash_pandas_object \
11683
pandas.api.extensions.ExtensionArray._reduce \
11784
pandas.api.extensions.ExtensionArray._values_for_factorize \
118-
pandas.api.extensions.ExtensionArray.dropna \
119-
pandas.api.extensions.ExtensionArray.equals \
120-
pandas.api.extensions.ExtensionArray.factorize \
121-
pandas.api.extensions.ExtensionArray.fillna \
122-
pandas.api.extensions.ExtensionArray.insert \
12385
pandas.api.extensions.ExtensionArray.interpolate \
124-
pandas.api.extensions.ExtensionArray.isin \
125-
pandas.api.extensions.ExtensionArray.isna \
126-
pandas.api.extensions.ExtensionArray.ravel \
127-
pandas.api.extensions.ExtensionArray.searchsorted \
128-
pandas.api.extensions.ExtensionArray.shift \
129-
pandas.api.extensions.ExtensionArray.unique \
130-
pandas.api.extensions.ExtensionArray.ndim \
131-
pandas.api.extensions.ExtensionArray.shape \
132-
pandas.api.extensions.ExtensionArray.tolist \
133-
pandas.DataFrame.__dataframe__
13486
RET=$(($RET + $?)) ; echo $MSG "DONE"
13587

13688
fi

ci/meta.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ requirements:
3434
- tomli # [py<311]
3535
run:
3636
- python
37-
- {{ pin_compatible('numpy') }}
37+
- numpy >=1.21.6 # [py<311]
38+
- numpy >=1.23.2 # [py>=311]
3839
- python-dateutil >=2.8.2
3940
- pytz >=2020.1
4041
- python-tzdata >=2022.1

doc/source/conf.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,8 @@
348348
methods = [
349349
x for x in dir(klass) if not x.startswith("_") or x in ("__iter__", "__array__")
350350
]
351-
352-
for method in methods:
353-
# ... and each of its public methods
354-
moved_api_pages.append((f"{old}.{method}", f"{new}.{method}"))
351+
# ... and each of its public methods
352+
moved_api_pages.extend((f"{old}.{method}", f"{new}.{method}") for method in methods)
355353

356354
if include_api:
357355
html_additional_pages = {

doc/source/user_guide/categorical.rst

+1
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,7 @@ Setting values by assigning categorical data will also check that the ``categori
779779
Assigning a ``Categorical`` to parts of a column of other types will use the values:
780780

781781
.. ipython:: python
782+
:okwarning:
782783
783784
df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]})
784785
df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"])

doc/source/user_guide/indexing.rst

+12-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ columns.
155155

156156
.. warning::
157157

158-
pandas aligns all AXES when setting ``Series`` and ``DataFrame`` from ``.loc``, and ``.iloc``.
158+
pandas aligns all AXES when setting ``Series`` and ``DataFrame`` from ``.loc``.
159159

160160
This will **not** modify ``df`` because the column alignment is before value assignment.
161161

@@ -172,6 +172,17 @@ columns.
172172
df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy()
173173
df[['A', 'B']]
174174
175+
However, pandas does not align AXES when setting ``Series`` and ``DataFrame`` from ``.iloc``
176+
because ``.iloc`` operates by position.
177+
178+
This will modify ``df`` because the column alignment is not done before value assignment.
179+
180+
.. ipython:: python
181+
182+
df[['A', 'B']]
183+
df.iloc[:, [1, 0]] = df[['A', 'B']]
184+
df[['A','B']]
185+
175186
176187
Attribute access
177188
----------------

doc/source/user_guide/io.rst

+2-8
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,8 @@ Parsing a CSV with mixed timezones
931931
pandas cannot natively represent a column or index with mixed timezones. If your CSV
932932
file contains columns with a mixture of timezones, the default result will be
933933
an object-dtype column with strings, even with ``parse_dates``.
934+
To parse the mixed-timezone values as a datetime column, read in as ``object`` dtype and
935+
then call :func:`to_datetime` with ``utc=True``.
934936

935937

936938
.. ipython:: python
@@ -939,14 +941,6 @@ an object-dtype column with strings, even with ``parse_dates``.
939941
a
940942
2000-01-01T00:00:00+05:00
941943
2000-01-01T00:00:00+06:00"""
942-
df = pd.read_csv(StringIO(content), parse_dates=["a"])
943-
df["a"]
944-
945-
To parse the mixed-timezone values as a datetime column, read in as ``object`` dtype and
946-
then call :func:`to_datetime` with ``utc=True``.
947-
948-
.. ipython:: python
949-
950944
df = pd.read_csv(StringIO(content))
951945
df["a"] = pd.to_datetime(df["a"], utc=True)
952946
df["a"]

0 commit comments

Comments
 (0)