Skip to content

Commit 40f2b24

Browse files
Merge remote-tracking branch 'upstream/main' into blockmanager-cow
2 parents 4b1ccf6 + b99ec4a commit 40f2b24

File tree

318 files changed

+5907
-2983
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

318 files changed

+5907
-2983
lines changed

.circleci/config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
PANDAS_CI: "1"
1414
steps:
1515
- checkout
16-
- run: ci/setup_env.sh
16+
- run: .circleci/setup_env.sh
1717
- run: PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH ci/run_tests.sh
1818

1919
workflows:

ci/setup_env.sh renamed to .circleci/setup_env.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ echo
5151
echo "update conda"
5252
conda config --set ssl_verify false
5353
conda config --set quiet true --set always_yes true --set changeps1 false
54-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
55-
conda install -y -c conda-forge -n base 'mamba>=0.21.2' pip
54+
conda install -y -c conda-forge -n base 'mamba>=0.21.2' pip setuptools
5655

5756
echo "conda info -a"
5857
conda info -a
@@ -67,8 +66,6 @@ echo "mamba env update --file=${ENV_FILE}"
6766
# See https://github.com/mamba-org/mamba/issues/633
6867
mamba create -q -n pandas-dev
6968
time mamba env update -n pandas-dev --file="${ENV_FILE}"
70-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
71-
mamba install -n pandas-dev 'setuptools<60'
7269

7370
echo "conda list -n pandas-dev"
7471
conda list -n pandas-dev

.github/PULL_REQUEST_TEMPLATE.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
- [ ] closes #xxxx (Replace xxxx with the Github issue number)
22
- [ ] [Tests added and passed](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#writing-tests) if fixing a bug or adding a new feature
33
- [ ] All [code checks passed](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#pre-commit).
4+
- [ ] Added [type annotations](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#type-hints) to new arguments/methods/functions.
45
- [ ] Added an entry in the latest `doc/source/whatsnew/vX.X.X.rst` file if fixing a bug or adding a new feature.
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Set up Conda environment
2+
inputs:
3+
environment-file:
4+
description: Conda environment file to use.
5+
default: environment.yml
6+
pyarrow-version:
7+
description: If set, overrides the PyArrow version in the Conda environment to the given string.
8+
required: false
9+
runs:
10+
using: composite
11+
steps:
12+
- name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }}
13+
run: |
14+
grep -q ' - pyarrow' ${{ inputs.environment-file }}
15+
sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
16+
cat ${{ inputs.environment-file }}
17+
shell: bash
18+
if: ${{ inputs.pyarrow-version }}
19+
20+
- name: Install ${{ inputs.environment-file }}
21+
uses: conda-incubator/setup-miniconda@v2
22+
with:
23+
environment-file: ${{ inputs.environment-file }}
24+
channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
25+
channels: conda-forge
26+
mamba-version: "0.23"
27+
use-mamba: true

.github/actions/setup/action.yml

-12
This file was deleted.

.github/workflows/32-bit-linux.yml

+6
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,15 @@ jobs:
2323

2424
- name: Run 32-bit manylinux2014 Docker Build / Tests
2525
run: |
26+
# Without this (line 34), versioneer will not be able to determine the pandas version.
27+
# This is because of a security update to git that blocks it from reading the config folder if
28+
# it is not owned by the current user. We hit this since the "mounted" folder is not hit by the
29+
# Docker container.
30+
# xref https://github.com/pypa/manylinux/issues/1309
2631
docker pull quay.io/pypa/manylinux2014_i686
2732
docker run --platform linux/386 -v $(pwd):/pandas quay.io/pypa/manylinux2014_i686 \
2833
/bin/bash -xc "cd pandas && \
34+
git config --global --add safe.directory /pandas && \
2935
/opt/python/cp38-cp38/bin/python -m venv ~/virtualenvs/pandas-dev && \
3036
. ~/virtualenvs/pandas-dev/bin/activate && \
3137
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \

.github/workflows/docbuild-and-upload.yml

+14-11
Original file line numberDiff line numberDiff line change
@@ -24,43 +24,46 @@ jobs:
2424
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-web-docs
2525
cancel-in-progress: true
2626

27+
defaults:
28+
run:
29+
shell: bash -el {0}
30+
2731
steps:
2832
- name: Checkout
2933
uses: actions/checkout@v3
3034
with:
3135
fetch-depth: 0
3236

33-
- name: Set up pandas
34-
uses: ./.github/actions/setup
37+
- name: Set up Conda
38+
uses: ./.github/actions/setup-conda
39+
40+
- name: Build Pandas
41+
uses: ./.github/actions/build_pandas
3542

3643
- name: Build website
37-
run: |
38-
source activate pandas-dev
39-
python web/pandas_web.py web/pandas --target-path=web/build
44+
run: python web/pandas_web.py web/pandas --target-path=web/build
4045

4146
- name: Build documentation
42-
run: |
43-
source activate pandas-dev
44-
doc/make.py --warnings-are-errors
47+
run: doc/make.py --warnings-are-errors
4548

4649
- name: Install ssh key
4750
run: |
4851
mkdir -m 700 -p ~/.ssh
4952
echo "${{ secrets.server_ssh_key }}" > ~/.ssh/id_rsa
5053
chmod 600 ~/.ssh/id_rsa
5154
echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBE1Kkopomm7FHG5enATf7SgnpICZ4W2bw+Ho+afqin+w7sMcrsa0je7sbztFAV8YchDkiBKnWTG4cRT+KZgZCaY=" > ~/.ssh/known_hosts
52-
if: ${{github.event_name == 'push' && github.ref == 'refs/heads/main'}}
55+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
5356

5457
- name: Copy cheatsheets into site directory
5558
run: cp doc/cheatsheet/Pandas_Cheat_Sheet* web/build/
5659

5760
- name: Upload web
5861
run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' web/build/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas
59-
if: ${{github.event_name == 'push' && github.ref == 'refs/heads/main'}}
62+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
6063

6164
- name: Upload dev docs
6265
run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/dev
63-
if: ${{github.event_name == 'push' && github.ref == 'refs/heads/main'}}
66+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
6467

6568
- name: Move docs into site directory
6669
run: mv doc/build/html web/build/docs

.github/workflows/macos-windows.yml

+3-17
Original file line numberDiff line numberDiff line change
@@ -43,32 +43,18 @@ jobs:
4343
with:
4444
fetch-depth: 0
4545

46-
- name: Install Dependencies
47-
uses: conda-incubator/setup-[email protected]
46+
- name: Set up Conda
47+
uses: ./.github/actions/setup-conda
4848
with:
49-
mamba-version: "*"
50-
channels: conda-forge
51-
activate-environment: pandas-dev
52-
channel-priority: ${{ matrix.os == 'macos-latest' && 'flexible' || 'strict' }}
5349
environment-file: ci/deps/${{ matrix.env_file }}
54-
use-only-tar-bz2: true
55-
56-
# ImportError: 2): Library not loaded: @rpath/libssl.1.1.dylib
57-
# Referenced from: /Users/runner/miniconda3/envs/pandas-dev/lib/libthrift.0.13.0.dylib
58-
# Reason: image not found
59-
- name: Upgrade pyarrow on MacOS
60-
run: conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=6
61-
if: ${{ matrix.os == 'macos-latest' }}
50+
pyarrow-version: ${{ matrix.os == 'macos-latest' && '6' || '' }}
6251

6352
- name: Build Pandas
6453
uses: ./.github/actions/build_pandas
6554

6655
- name: Test
6756
run: ci/run_tests.sh
6857

69-
- name: Build Version
70-
run: conda list
71-
7258
- name: Publish test results
7359
uses: actions/upload-artifact@v3
7460
with:

.github/workflows/python-dev.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,10 @@ jobs:
5454
with:
5555
python-version: '3.11-dev'
5656

57-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
5857
- name: Install dependencies
5958
shell: bash -el {0}
6059
run: |
61-
python -m pip install --upgrade pip "setuptools<60.0.0" wheel
60+
python -m pip install --upgrade pip setuptools wheel
6261
pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
6362
pip install git+https://github.com/nedbat/coveragepy.git
6463
pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov

.github/workflows/sdist.yml

+3-6
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@ jobs:
4141
with:
4242
python-version: ${{ matrix.python-version }}
4343

44-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
4544
- name: Install dependencies
4645
run: |
47-
python -m pip install --upgrade pip "setuptools<60.0.0" wheel
46+
python -m pip install --upgrade pip setuptools wheel
4847
4948
# GH 39416
5049
pip install numpy
@@ -66,20 +65,18 @@ jobs:
6665
channels: conda-forge
6766
python-version: '${{ matrix.python-version }}'
6867

69-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
7068
- name: Install pandas from sdist
7169
run: |
72-
python -m pip install --upgrade "setuptools<60.0.0"
7370
pip list
7471
python -m pip install dist/*.gz
7572
7673
- name: Force oldest supported NumPy
7774
run: |
7875
case "${{matrix.python-version}}" in
7976
3.8)
80-
pip install numpy==1.18.5 ;;
77+
pip install numpy==1.19.5 ;;
8178
3.9)
82-
pip install numpy==1.19.3 ;;
79+
pip install numpy==1.19.5 ;;
8380
3.10)
8481
pip install numpy==1.21.2 ;;
8582
esac

.pre-commit-config.yaml

+16-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ repos:
1111
- id: absolufy-imports
1212
files: ^pandas/
1313
- repo: https://github.com/jendrikseipp/vulture
14-
rev: 'v2.3'
14+
rev: 'v2.4'
1515
hooks:
1616
- id: vulture
1717
entry: python scripts/run_vulture.py
@@ -26,7 +26,6 @@ repos:
2626
hooks:
2727
- id: codespell
2828
types_or: [python, rst, markdown]
29-
files: ^(pandas|doc)/
3029
- repo: https://github.com/pre-commit/pre-commit-hooks
3130
rev: v4.2.0
3231
hooks:
@@ -60,7 +59,7 @@ repos:
6059
hooks:
6160
- id: isort
6261
- repo: https://github.com/asottile/pyupgrade
63-
rev: v2.32.0
62+
rev: v2.32.1
6463
hooks:
6564
- id: pyupgrade
6665
args: [--py38-plus]
@@ -75,7 +74,7 @@ repos:
7574
types: [text] # overwrite types: [rst]
7675
types_or: [python, rst]
7776
- repo: https://github.com/sphinx-contrib/sphinx-lint
78-
rev: v0.4.1
77+
rev: v0.6
7978
hooks:
8079
- id: sphinx-lint
8180
- repo: https://github.com/asottile/yesqa
@@ -93,7 +92,19 @@ repos:
9392
pass_filenames: false
9493
types: [python]
9594
stages: [manual]
96-
additional_dependencies: ['[email protected]']
95+
additional_dependencies: &pyright_dependencies
96+
97+
- repo: local
98+
hooks:
99+
- id: pyright_reportGeneralTypeIssues
100+
name: pyright reportGeneralTypeIssues
101+
entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json
102+
# note: assumes python env is setup and activated
103+
language: node
104+
pass_filenames: false
105+
types: [python]
106+
stages: [manual]
107+
additional_dependencies: *pyright_dependencies
97108
- repo: local
98109
hooks:
99110
- id: mypy

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
// followed by the pip installed packages).
4343
"matrix": {
4444
"numpy": [],
45-
"Cython": ["0.29.24"],
45+
"Cython": ["0.29.30"],
4646
"matplotlib": [],
4747
"sqlalchemy": [],
4848
"scipy": [],

asv_bench/benchmarks/frame_ctor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def setup(self):
3737
self.dict_list = frame.to_dict(orient="records")
3838
self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)}
3939

40-
# arrays which we wont consolidate
40+
# arrays which we won't consolidate
4141
self.dict_of_categoricals = {i: Categorical(np.arange(N)) for i in range(K)}
4242

4343
def time_list_of_dict(self):
@@ -60,7 +60,7 @@ def time_nested_dict_int64(self):
6060
DataFrame(self.data2)
6161

6262
def time_dict_of_categoricals(self):
63-
# dict of arrays that we wont consolidate
63+
# dict of arrays that we won't consolidate
6464
DataFrame(self.dict_of_categoricals)
6565

6666

asv_bench/benchmarks/frame_methods.py

+20
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,26 @@ def time_values_mixed_wide(self):
288288
self.df_mixed_wide.values
289289

290290

291+
class ToRecords:
292+
def setup(self):
293+
N = 100_000
294+
data = np.random.randn(N, 2)
295+
mi = MultiIndex.from_arrays(
296+
[
297+
np.arange(N),
298+
date_range("1970-01-01", periods=N, freq="ms"),
299+
]
300+
)
301+
self.df = DataFrame(data)
302+
self.df_mi = DataFrame(data, index=mi)
303+
304+
def time_to_records(self):
305+
self.df.to_records(index=True)
306+
307+
def time_to_records_multiindex(self):
308+
self.df_mi.to_records(index=True)
309+
310+
291311
class Repr:
292312
def setup(self):
293313
nrows = 10000

asv_bench/benchmarks/groupby.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pandas import (
88
Categorical,
99
DataFrame,
10+
Index,
1011
MultiIndex,
1112
Series,
1213
Timestamp,
@@ -111,6 +112,18 @@ def time_copy_overhead_single_col(self, factor):
111112
self.df.groupby("key").apply(self.df_copy_function)
112113

113114

115+
class ApplyNonUniqueUnsortedIndex:
116+
def setup(self):
117+
# GH 46527
118+
# unsorted and non-unique index
119+
idx = np.arange(100)[::-1]
120+
idx = Index(np.repeat(idx, 200), name="key")
121+
self.df = DataFrame(np.random.randn(len(idx), 10), index=idx)
122+
123+
def time_groupby_apply_non_unique_unsorted_index(self):
124+
self.df.groupby("key", group_keys=False).apply(lambda x: x)
125+
126+
114127
class Groups:
115128

116129
param_names = ["key"]
@@ -514,7 +527,7 @@ def time_dtype_as_field(self, dtype, method, application, ncols):
514527

515528
class GroupByCythonAgg:
516529
"""
517-
Benchmarks specifically targetting our cython aggregation algorithms
530+
Benchmarks specifically targeting our cython aggregation algorithms
518531
(using a big enough dataframe with simple key, so a large part of the
519532
time is actually spent in the grouped aggregation).
520533
"""

asv_bench/benchmarks/io/excel.py

+11
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,15 @@ def time_read_excel(self, engine):
8686
read_excel(fname, engine=engine)
8787

8888

89+
class ReadExcelNRows(ReadExcel):
90+
def time_read_excel(self, engine):
91+
if engine == "xlrd":
92+
fname = self.fname_excel_xls
93+
elif engine == "odf":
94+
fname = self.fname_odf
95+
else:
96+
fname = self.fname_excel
97+
read_excel(fname, engine=engine, nrows=10)
98+
99+
89100
from ..pandas_vb_common import setup # noqa: F401 isort:skip

0 commit comments

Comments
 (0)