Skip to content

Commit 14dd5a2

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 1cb4ec8 + f1286a7 commit 14dd5a2

File tree

479 files changed

+14035
-9213
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

479 files changed

+14035
-9213
lines changed

.github/actions/build_pandas/action.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ runs:
1313
- name: Build Pandas
1414
run: |
1515
python setup.py build_ext -j 2
16-
python -m pip install -e . --no-build-isolation --no-use-pep517
16+
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
1717
shell: bash -l {0}

.github/workflows/ci.yml

+14-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ jobs:
4545
environment-file: ${{ env.ENV_FILE }}
4646
use-only-tar-bz2: true
4747

48+
- name: Install node.js (for pyright)
49+
uses: actions/setup-node@v2
50+
with:
51+
node-version: "16"
52+
53+
- name: Install pyright
54+
# note: keep version in sync with .pre-commit-config.yaml
55+
run: npm install -g [email protected]
56+
4857
- name: Build Pandas
4958
uses: ./.github/actions/build_pandas
5059

@@ -127,8 +136,11 @@ jobs:
127136
echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBE1Kkopomm7FHG5enATf7SgnpICZ4W2bw+Ho+afqin+w7sMcrsa0je7sbztFAV8YchDkiBKnWTG4cRT+KZgZCaY=" > ~/.ssh/known_hosts
128137
if: ${{github.event_name == 'push' && github.ref == 'refs/heads/master'}}
129138

139+
- name: Copy cheatsheets into site directory
140+
run: cp doc/cheatsheet/Pandas_Cheat_Sheet* web/build/
141+
130142
- name: Upload web
131-
run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' --exclude='Pandas_Cheat_Sheet*' web/build/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas
143+
run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' web/build/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas
132144
if: ${{github.event_name == 'push' && github.ref == 'refs/heads/master'}}
133145

134146
- name: Upload dev docs
@@ -137,6 +149,7 @@ jobs:
137149

138150
- name: Move docs into site directory
139151
run: mv doc/build/html web/build/docs
152+
140153
- name: Save website as an artifact
141154
uses: actions/upload-artifact@v2
142155
with:

.github/workflows/database.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ on:
44
push:
55
branches:
66
- master
7+
- 1.3.x
78
pull_request:
89
branches:
910
- master
@@ -79,7 +80,7 @@ jobs:
7980
- uses: conda-incubator/setup-miniconda@v2
8081
with:
8182
activate-environment: pandas-dev
82-
channel-priority: flexible
83+
channel-priority: strict
8384
environment-file: ${{ matrix.ENV_FILE }}
8485
use-only-tar-bz2: true
8586

.github/workflows/pre-commit.yml

+2
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,6 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v2
1818
- uses: actions/setup-python@v2
19+
with:
20+
python-version: '3.9.7'
1921
- uses: pre-commit/[email protected]

.github/workflows/python-dev.yml

+18-8
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,25 @@ env:
1717
PANDAS_CI: 1
1818
PATTERN: "not slow and not network and not clipboard"
1919
COVERAGE: true
20-
PYTEST_TARGET: pandas
2120

2221
jobs:
2322
build:
24-
runs-on: ubuntu-latest
23+
runs-on: ${{ matrix.os }}
24+
strategy:
25+
fail-fast: false
26+
matrix:
27+
os: [ubuntu-latest, macOS-latest]
28+
pytest_target: ["pandas/tests/[a-h]*", "pandas/tests/[i-z]*"]
29+
include:
30+
# No need to split tests on windows
31+
- os: windows-latest
32+
pytest_target: pandas
33+
2534
name: actions-310-dev
26-
timeout-minutes: 60
35+
timeout-minutes: 80
2736

2837
concurrency:
29-
group: ${{ github.ref }}-dev
38+
group: ${{ github.ref }}-${{ matrix.os }}-${{ matrix.pytest_target }}-dev
3039
cancel-in-progress: ${{github.event_name == 'pull_request'}}
3140

3241
steps:
@@ -40,12 +49,12 @@ jobs:
4049
python-version: '3.10-dev'
4150

4251
- name: Install dependencies
52+
shell: bash
4353
run: |
4454
python -m pip install --upgrade pip setuptools wheel
4555
pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
46-
pip install git+https://github.com/pytest-dev/pytest.git
4756
pip install git+https://github.com/nedbat/coveragepy.git
48-
pip install cython python-dateutil pytz hypothesis pytest-xdist pytest-cov
57+
pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov
4958
pip list
5059
5160
- name: Build Pandas
@@ -58,10 +67,11 @@ jobs:
5867
python -c "import pandas; pandas.show_versions();"
5968
6069
- name: Test with pytest
70+
env:
71+
PYTEST_TARGET: ${{ matrix.pytest_target }}
72+
shell: bash
6173
run: |
6274
ci/run_tests.sh
63-
# GH 41935
64-
continue-on-error: true
6575
6676
- name: Publish test results
6777
uses: actions/upload-artifact@master

.github/workflows/sdist.yml

+14-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
strategy:
2424
fail-fast: false
2525
matrix:
26-
python-version: ["3.8", "3.9"]
26+
python-version: ["3.8", "3.9", "3.10"]
2727
concurrency:
2828
group: ${{github.ref}}-${{matrix.python-version}}-sdist
2929
cancel-in-progress: ${{github.event_name == 'pull_request'}}
@@ -53,13 +53,24 @@ jobs:
5353
- uses: conda-incubator/setup-miniconda@v2
5454
with:
5555
activate-environment: pandas-sdist
56-
python-version: ${{ matrix.python-version }}
56+
python-version: '${{ matrix.python-version }}'
5757

5858
- name: Install pandas from sdist
5959
run: |
60-
conda list
60+
pip list
6161
python -m pip install dist/*.gz
6262
63+
- name: Force oldest supported NumPy
64+
run: |
65+
case "${{matrix.python-version}}" in
66+
3.8)
67+
pip install numpy==1.18.5 ;;
68+
3.9)
69+
pip install numpy==1.19.3 ;;
70+
3.10)
71+
pip install numpy==1.21.2 ;;
72+
esac
73+
6374
- name: Import pandas
6475
run: |
6576
cd ..

.pre-commit-config.yaml

+25-11
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ repos:
99
- id: absolufy-imports
1010
files: ^pandas/
1111
- repo: https://github.com/python/black
12-
rev: 21.7b0
12+
rev: 21.9b0
1313
hooks:
1414
- id: black
1515
- repo: https://github.com/codespell-project/codespell
@@ -39,10 +39,11 @@ repos:
3939
rev: 3.9.2
4040
hooks:
4141
- id: flake8
42-
additional_dependencies:
43-
- flake8-comprehensions==3.1.0
44-
- flake8-bugbear==21.3.2
45-
- pandas-dev-flaker==0.2.0
42+
additional_dependencies: &flake8_dependencies
43+
- flake8==3.9.2
44+
- flake8-comprehensions==3.1.0
45+
- flake8-bugbear==21.3.2
46+
- pandas-dev-flaker==0.2.0
4647
- id: flake8
4748
alias: flake8-cython
4849
name: flake8 (cython)
@@ -58,7 +59,7 @@ repos:
5859
hooks:
5960
- id: isort
6061
- repo: https://github.com/asottile/pyupgrade
61-
rev: v2.23.3
62+
rev: v2.29.0
6263
hooks:
6364
- id: pyupgrade
6465
args: [--py38-plus]
@@ -76,11 +77,18 @@ repos:
7677
rev: v1.2.3
7778
hooks:
7879
- id: yesqa
79-
additional_dependencies:
80-
- flake8==3.9.2
81-
- flake8-comprehensions==3.1.0
82-
- flake8-bugbear==21.3.2
83-
- pandas-dev-flaker==0.2.0
80+
additional_dependencies: *flake8_dependencies
81+
- repo: local
82+
hooks:
83+
- id: pyright
84+
name: pyright
85+
entry: pyright
86+
language: node
87+
pass_filenames: false
88+
types: [python]
89+
stages: [manual]
90+
# note: keep version in sync with .github/workflows/ci.yml
91+
additional_dependencies: ['[email protected]']
8492
- repo: local
8593
hooks:
8694
- id: flake8-rst
@@ -124,6 +132,12 @@ repos:
124132
entry: 'np\.random\.seed'
125133
files: ^asv_bench/benchmarks
126134
exclude: ^asv_bench/benchmarks/pandas_vb_common\.py
135+
- id: np-testing-array-equal
136+
name: Check for usage of numpy testing or array_equal
137+
language: pygrep
138+
entry: '(numpy|np)(\.testing|\.array_equal)'
139+
files: ^pandas/tests/
140+
types: [python]
127141
- id: invalid-ea-testing
128142
name: Check for invalid EA testing
129143
language: pygrep

asv_bench/benchmarks/groupby.py

+32
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,38 @@ def time_sum(self):
603603
self.df.groupby(["a"])["b"].sum()
604604

605605

606+
class String:
607+
# GH#41596
608+
param_names = ["dtype", "method"]
609+
params = [
610+
["str", "string[python]"],
611+
[
612+
"sum",
613+
"prod",
614+
"min",
615+
"max",
616+
"mean",
617+
"median",
618+
"var",
619+
"first",
620+
"last",
621+
"any",
622+
"all",
623+
],
624+
]
625+
626+
def setup(self, dtype, method):
627+
cols = list("abcdefghjkl")
628+
self.df = DataFrame(
629+
np.random.randint(0, 100, size=(1_000_000, len(cols))),
630+
columns=cols,
631+
dtype=dtype,
632+
)
633+
634+
def time_str_func(self, dtype, method):
635+
self.df.groupby("a")[self.df.columns[1:]].agg(method)
636+
637+
606638
class Categories:
607639
def setup(self):
608640
N = 10 ** 5

asv_bench/benchmarks/index_object.py

+6
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ def time_iter_dec(self):
8686
for _ in self.idx_dec:
8787
pass
8888

89+
def time_sort_values_asc(self):
90+
self.idx_inc.sort_values()
91+
92+
def time_sort_values_des(self):
93+
self.idx_inc.sort_values(ascending=False)
94+
8995

9096
class IndexEquals:
9197
def setup(self):

asv_bench/benchmarks/indexing_engines.py

+36-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Benchmarks in this fiel depend exclusively on code in _libs/
2+
Benchmarks in this file depend exclusively on code in _libs/
33
44
If a PR does not edit anything in _libs, it is very unlikely that benchmarks
55
in this file will be affected.
@@ -35,25 +35,49 @@ class NumericEngineIndexing:
3535
params = [
3636
_get_numeric_engines(),
3737
["monotonic_incr", "monotonic_decr", "non_monotonic"],
38+
[True, False],
39+
[10 ** 5, 2 * 10 ** 6], # 2e6 is above SIZE_CUTOFF
3840
]
39-
param_names = ["engine_and_dtype", "index_type"]
41+
param_names = ["engine_and_dtype", "index_type", "unique", "N"]
4042

41-
def setup(self, engine_and_dtype, index_type):
43+
def setup(self, engine_and_dtype, index_type, unique, N):
4244
engine, dtype = engine_and_dtype
43-
N = 10 ** 5
44-
values = list([1] * N + [2] * N + [3] * N)
45-
arr = {
46-
"monotonic_incr": np.array(values, dtype=dtype),
47-
"monotonic_decr": np.array(list(reversed(values)), dtype=dtype),
48-
"non_monotonic": np.array([1, 2, 3] * N, dtype=dtype),
49-
}[index_type]
45+
46+
if index_type == "monotonic_incr":
47+
if unique:
48+
arr = np.arange(N * 3, dtype=dtype)
49+
else:
50+
values = list([1] * N + [2] * N + [3] * N)
51+
arr = np.array(values, dtype=dtype)
52+
elif index_type == "monotonic_decr":
53+
if unique:
54+
arr = np.arange(N * 3, dtype=dtype)[::-1]
55+
else:
56+
values = list([1] * N + [2] * N + [3] * N)
57+
arr = np.array(values, dtype=dtype)[::-1]
58+
else:
59+
assert index_type == "non_monotonic"
60+
if unique:
61+
arr = np.empty(N * 3, dtype=dtype)
62+
arr[:N] = np.arange(N * 2, N * 3, dtype=dtype)
63+
arr[N:] = np.arange(N * 2, dtype=dtype)
64+
else:
65+
arr = np.array([1, 2, 3] * N, dtype=dtype)
5066

5167
self.data = engine(arr)
5268
# code belows avoids populating the mapping etc. while timing.
5369
self.data.get_loc(2)
5470

55-
def time_get_loc(self, engine_and_dtype, index_type):
56-
self.data.get_loc(2)
71+
self.key_middle = arr[len(arr) // 2]
72+
self.key_early = arr[2]
73+
74+
def time_get_loc(self, engine_and_dtype, index_type, unique, N):
75+
self.data.get_loc(self.key_early)
76+
77+
def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
78+
# searchsorted performance may be different near the middle of a range
79+
# vs near an endpoint
80+
self.data.get_loc(self.key_middle)
5781

5882

5983
class ObjectEngineIndexing:

asv_bench/benchmarks/inference.py

+10
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,16 @@ def time_dup_string_tzoffset_dates(self, cache):
277277
to_datetime(self.dup_string_with_tz, cache=cache)
278278

279279

280+
# GH 43901
281+
class ToDatetimeInferDatetimeFormat:
282+
def setup(self):
283+
rng = date_range(start="1/1/2000", periods=100000, freq="H")
284+
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
285+
286+
def time_infer_datetime_format(self):
287+
to_datetime(self.strings, infer_datetime_format=True)
288+
289+
280290
class ToTimedelta:
281291
def setup(self):
282292
self.ints = np.random.randint(0, 60, size=10000)

0 commit comments

Comments
 (0)