Skip to content

Commit fd9ca30

Browse files
authored
Merge branch 'main' into 57884-add-kwargs-to-pivot_table
2 parents f5f62c0 + 9e7abc8 commit fd9ca30

File tree

87 files changed

+1106
-886
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+1106
-886
lines changed

.circleci/config.yml

+45-27
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,64 @@
11
version: 2.1
22

33
jobs:
4-
test-arm:
4+
test-linux-arm:
55
machine:
66
image: default
77
resource_class: arm.large
88
environment:
9-
ENV_FILE: ci/deps/circle-310-arm64.yaml
9+
ENV_FILE: ci/deps/circle-311-arm64.yaml
1010
PYTEST_WORKERS: auto
1111
PATTERN: "not single_cpu and not slow and not network and not clipboard and not arm_slow and not db"
1212
PYTEST_TARGET: "pandas"
1313
PANDAS_CI: "1"
1414
steps:
1515
- checkout
16-
- run: .circleci/setup_env.sh
17-
- run: >
18-
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
19-
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD
20-
ci/run_tests.sh
21-
linux-musl:
16+
- run:
17+
name: Install Environment and Run Tests
18+
shell: /bin/bash -exuo pipefail
19+
command: |
20+
MAMBA_URL="https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Mambaforge-24.3.0-0-Linux-aarch64.sh"
21+
wget -q $MAMBA_URL -O minimamba.sh
22+
chmod +x minimamba.sh
23+
MAMBA_DIR="$HOME/miniconda3"
24+
rm -rf $MAMBA_DIR
25+
./minimamba.sh -b -p $MAMBA_DIR
26+
export PATH=$MAMBA_DIR/bin:$PATH
27+
conda info -a
28+
conda env create -q -n pandas-dev -f $ENV_FILE
29+
conda list -n pandas-dev
30+
source activate pandas-dev
31+
if pip show pandas 1>/dev/null; then
32+
pip uninstall -y pandas
33+
fi
34+
python -m pip install --no-build-isolation -ve . --config-settings=setup-args="--werror"
35+
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
36+
ci/run_tests.sh
37+
test-linux-musl:
2238
docker:
2339
- image: quay.io/pypa/musllinux_1_1_aarch64
2440
resource_class: arm.large
2541
steps:
2642
# Install pkgs first to have git in the image
2743
# (needed for checkout)
28-
- run: |
29-
apk update
30-
apk add git
31-
apk add musl-locales
44+
- run:
45+
name: Install System Packages
46+
command: |
47+
apk update
48+
apk add git
49+
apk add musl-locales
3250
- checkout
33-
- run: |
34-
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
35-
. ~/virtualenvs/pandas-dev/bin/activate
36-
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
37-
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
38-
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
39-
python -m pip list --no-cache-dir
40-
- run: |
41-
. ~/virtualenvs/pandas-dev/bin/activate
42-
export PANDAS_CI=1
43-
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
51+
- run:
52+
name: Install Environment and Run Tests
53+
command: |
54+
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
55+
. ~/virtualenvs/pandas-dev/bin/activate
56+
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
57+
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
58+
python -m pip install --no-cache-dir --no-build-isolation -e . --config-settings=setup-args="--werror"
59+
python -m pip list --no-cache-dir
60+
export PANDAS_CI=1
61+
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
4462
build-aarch64:
4563
parameters:
4664
cibw-build:
@@ -71,7 +89,7 @@ jobs:
7189
name: Build aarch64 wheels
7290
no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
7391
command: |
74-
pip3 install cibuildwheel==2.15.0
92+
pip3 install cibuildwheel==2.18.1
7593
cibuildwheel --prerelease-pythons --output-dir wheelhouse
7694
7795
environment:
@@ -81,7 +99,7 @@ jobs:
8199
name: Install Anaconda Client & Upload Wheels
82100
command: |
83101
echo "Install Mambaforge"
84-
MAMBA_URL="https://github.com/conda-forge/miniforge/releases/download/23.1.0-0/Mambaforge-23.1.0-0-Linux-aarch64.sh"
102+
MAMBA_URL="https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Mambaforge-24.3.0-0-Linux-aarch64.sh"
85103
echo "Downloading $MAMBA_URL"
86104
wget -q $MAMBA_URL -O minimamba.sh
87105
chmod +x minimamba.sh
@@ -107,14 +125,14 @@ workflows:
107125
not:
108126
equal: [ scheduled_pipeline, << pipeline.trigger_source >> ]
109127
jobs:
110-
- test-arm
128+
- test-linux-arm
111129
test-musl:
112130
# Don't run trigger this one when scheduled pipeline runs
113131
when:
114132
not:
115133
equal: [ scheduled_pipeline, << pipeline.trigger_source >> ]
116134
jobs:
117-
- linux-musl
135+
- test-linux-musl
118136
build-wheels:
119137
jobs:
120138
- build-aarch64:

.circleci/setup_env.sh

-60
This file was deleted.

.pre-commit-config.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.4.3
22+
rev: v0.4.7
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -40,7 +40,7 @@ repos:
4040
pass_filenames: true
4141
require_serial: false
4242
- repo: https://github.com/codespell-project/codespell
43-
rev: v2.2.6
43+
rev: v2.3.0
4444
hooks:
4545
- id: codespell
4646
types_or: [python, rst, markdown, cython, c]
@@ -92,7 +92,7 @@ repos:
9292
- id: sphinx-lint
9393
args: ["--enable", "all", "--disable", "line-too-long"]
9494
- repo: https://github.com/pre-commit/mirrors-clang-format
95-
rev: v18.1.4
95+
rev: v18.1.5
9696
hooks:
9797
- id: clang-format
9898
files: ^pandas/_libs/src|^pandas/_libs/include

ci/code_checks.sh

-12
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
343343
-i "pandas.arrays.NumpyExtensionArray SA01" \
344344
-i "pandas.arrays.SparseArray PR07,SA01" \
345345
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
346-
-i "pandas.bdate_range RT03,SA01" \
347346
-i "pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01" \
348347
-i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
349348
-i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
@@ -404,7 +403,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
404403
-i "pandas.core.window.rolling.Window.std PR01" \
405404
-i "pandas.core.window.rolling.Window.var PR01" \
406405
-i "pandas.date_range RT03" \
407-
-i "pandas.describe_option SA01" \
408406
-i "pandas.errors.AbstractMethodError PR01,SA01" \
409407
-i "pandas.errors.AttributeConflictWarning SA01" \
410408
-i "pandas.errors.CSSWarning SA01" \
@@ -470,24 +468,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
470468
-i "pandas.merge PR07" \
471469
-i "pandas.merge_asof PR07,RT03" \
472470
-i "pandas.merge_ordered PR07" \
473-
-i "pandas.option_context SA01" \
474471
-i "pandas.period_range RT03,SA01" \
475472
-i "pandas.pivot PR07" \
476-
-i "pandas.pivot_table PR07" \
477473
-i "pandas.plotting.andrews_curves RT03,SA01" \
478-
-i "pandas.plotting.autocorrelation_plot RT03,SA01" \
479474
-i "pandas.plotting.lag_plot RT03,SA01" \
480-
-i "pandas.plotting.parallel_coordinates PR07,RT03,SA01" \
481475
-i "pandas.plotting.scatter_matrix PR07,SA01" \
482-
-i "pandas.plotting.table PR07,RT03,SA01" \
483476
-i "pandas.qcut PR07,SA01" \
484-
-i "pandas.read_orc SA01" \
485477
-i "pandas.read_spss SA01" \
486-
-i "pandas.reset_option SA01" \
487478
-i "pandas.set_eng_float_format RT03,SA01" \
488-
-i "pandas.show_versions SA01" \
489479
-i "pandas.testing.assert_extension_array_equal SA01" \
490-
-i "pandas.testing.assert_series_equal PR07,SA01" \
491480
-i "pandas.tseries.offsets.BDay PR02,SA01" \
492481
-i "pandas.tseries.offsets.BQuarterBegin PR02" \
493482
-i "pandas.tseries.offsets.BQuarterBegin.freqstr SA01" \
@@ -779,7 +768,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
779768
-i "pandas.tseries.offsets.YearEnd.nanos GL08" \
780769
-i "pandas.tseries.offsets.YearEnd.normalize GL08" \
781770
-i "pandas.tseries.offsets.YearEnd.rule_code GL08" \
782-
-i "pandas.unique PR07" \
783771
-i "pandas.util.hash_pandas_object PR07,SA01" # There should be no backslash in the final line, please keep this comment in the last ignored function
784772

785773
RET=$(($RET + $?)) ; echo $MSG "DONE"

ci/deps/circle-310-arm64.yaml renamed to ci/deps/circle-311-arm64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: pandas-dev
22
channels:
33
- conda-forge
44
dependencies:
5-
- python=3.10
5+
- python=3.11
66

77
# build dependencies
88
- versioneer[toml]

doc/source/getting_started/index.rst

+19-19
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ to explore, clean, and process your data. In pandas, a data table is called a :c
134134
<div id="collapseTwo" class="collapse" data-parent="#accordion">
135135
<div class="card-body">
136136

137-
pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these
138-
data sources is provided by function with the prefix ``read_*``. Similarly, the ``to_*`` methods are used to store data.
137+
pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). The ability to import data from each of these
138+
data sources is provided by functions with the prefix, ``read_*``. Similarly, the ``to_*`` methods are used to store data.
139139

140140
.. image:: ../_static/schemas/02_io_readwrite.svg
141141
:align: center
@@ -181,7 +181,7 @@ data sources is provided by function with the prefix ``read_*``. Similarly, the
181181
<div id="collapseThree" class="collapse" data-parent="#accordion">
182182
<div class="card-body">
183183

184-
Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the
184+
Selecting or filtering specific rows and/or columns? Filtering the data on a particular condition? Methods for slicing, selecting, and extracting the
185185
data you need are available in pandas.
186186

187187
.. image:: ../_static/schemas/03_subset_columns_rows.svg
@@ -228,7 +228,7 @@ data you need are available in pandas.
228228
<div id="collapseFour" class="collapse" data-parent="#accordion">
229229
<div class="card-body">
230230

231-
pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...)
231+
pandas provides plotting for your data right out of the box with the power of Matplotlib. Simply pick the plot type (scatter, bar, boxplot,...)
232232
corresponding to your data.
233233

234234
.. image:: ../_static/schemas/04_plot_overview.svg
@@ -275,7 +275,7 @@ corresponding to your data.
275275
<div id="collapseFive" class="collapse" data-parent="#accordion">
276276
<div class="card-body">
277277

278-
There is no need to loop over all rows of your data table to do calculations. Data manipulations on a column work elementwise.
278+
There's no need to loop over all rows of your data table to do calculations. Column data manipulations work elementwise in pandas.
279279
Adding a column to a :class:`DataFrame` based on existing data in other columns is straightforward.
280280

281281
.. image:: ../_static/schemas/05_newcolumn_2.svg
@@ -322,7 +322,7 @@ Adding a column to a :class:`DataFrame` based on existing data in other columns
322322
<div id="collapseSix" class="collapse" data-parent="#accordion">
323323
<div class="card-body">
324324

325-
Basic statistics (mean, median, min, max, counts...) are easily calculable. These or custom aggregations can be applied on the entire
325+
Basic statistics (mean, median, min, max, counts...) are easily calculable across data frames. These, or even custom aggregations, can be applied on the entire
326326
data set, a sliding window of the data, or grouped by categories. The latter is also known as the split-apply-combine approach.
327327

328328
.. image:: ../_static/schemas/06_groupby.svg
@@ -369,8 +369,8 @@ data set, a sliding window of the data, or grouped by categories. The latter is
369369
<div id="collapseSeven" class="collapse" data-parent="#accordion">
370370
<div class="card-body">
371371

372-
Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot`
373-
from long to wide format. With aggregations built-in, a pivot table is created with a single command.
372+
Change the structure of your data table in a variety of ways. You can use :func:`~pandas.melt` to reshape your data from a wide format to a long and tidy one. Use :func:`~pandas.pivot`
373+
to go from long to wide format. With aggregations built-in, a pivot table can be created with a single command.
374374

375375
.. image:: ../_static/schemas/07_melt.svg
376376
:align: center
@@ -416,7 +416,7 @@ from long to wide format. With aggregations built-in, a pivot table is created w
416416
<div id="collapseEight" class="collapse" data-parent="#accordion">
417417
<div class="card-body">
418418

419-
Multiple tables can be concatenated both column wise and row wise as database-like join/merge operations are provided to combine multiple tables of data.
419+
Multiple tables can be concatenated column wise or row wise with pandas' database-like join and merge operations.
420420

421421
.. image:: ../_static/schemas/08_concat_row.svg
422422
:align: center
@@ -505,7 +505,7 @@ pandas has great support for time series and has an extensive set of tools for w
505505
<div id="collapseTen" class="collapse" data-parent="#accordion">
506506
<div class="card-body">
507507

508-
Data sets do not only contain numerical data. pandas provides a wide range of functions to clean textual data and extract useful information from it.
508+
Data sets often contain more than just numerical data. pandas provides a wide range of functions to clean textual data and extract useful information from it.
509509

510510
.. raw:: html
511511

@@ -551,9 +551,9 @@ the pandas-equivalent operations compared to software you already know:
551551
:class-card: comparison-card
552552
:shadow: md
553553

554-
The `R programming language <https://www.r-project.org/>`__ provides the
555-
``data.frame`` data structure and multiple packages, such as
556-
`tidyverse <https://www.tidyverse.org>`__ use and extend ``data.frame``
554+
The `R programming language <https://www.r-project.org/>`__ provides a
555+
``data.frame`` data structure as well as packages like
556+
`tidyverse <https://www.tidyverse.org>`__ which use and extend ``data.frame``
557557
for convenient data handling functionalities similar to pandas.
558558

559559
+++
@@ -572,8 +572,8 @@ the pandas-equivalent operations compared to software you already know:
572572
:class-card: comparison-card
573573
:shadow: md
574574

575-
Already familiar to ``SELECT``, ``GROUP BY``, ``JOIN``, etc.?
576-
Most of these SQL manipulations do have equivalents in pandas.
575+
Already familiar with ``SELECT``, ``GROUP BY``, ``JOIN``, etc.?
576+
Many SQL manipulations have equivalents in pandas.
577577

578578
+++
579579

@@ -631,10 +631,10 @@ the pandas-equivalent operations compared to software you already know:
631631
:class-card: comparison-card
632632
:shadow: md
633633

634-
The `SAS <https://en.wikipedia.org/wiki/SAS_(software)>`__ statistical software suite
635-
also provides the ``data set`` corresponding to the pandas ``DataFrame``.
636-
Also SAS vectorized operations, filtering, string processing operations,
637-
and more have similar functions in pandas.
634+
`SAS <https://en.wikipedia.org/wiki/SAS_(software)>`__, the statistical software suite,
635+
uses the ``data set`` structure, which closely corresponds pandas' ``DataFrame``.
636+
Also SAS vectorized operations such as filtering or string processing operations
637+
have similar functions in pandas.
638638

639639
+++
640640

doc/source/user_guide/missing_data.rst

+2-4
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,8 @@ When taking the product, NA values or empty data will be treated as 1.
337337
pd.Series([], dtype="float64").prod()
338338
339339
Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
340-
ignore NA values by default preserve them in the result. This behavior can be changed
341-
with ``skipna``
342-
343-
* Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` ignore NA values by default, but preserve them in the resulting arrays. To override this behaviour and include NA values, use ``skipna=False``.
340+
ignore NA values by default, but preserve them in the resulting array. To override
341+
this behaviour and include NA values in the calculation, use ``skipna=False``.
344342

345343

346344
.. ipython:: python

0 commit comments

Comments
 (0)