Skip to content

Commit 3319811

Browse files
authored
Merge branch 'main' into include-pyproject
2 parents b0d0d6a + cc6c957 commit 3319811

File tree

298 files changed

+7651
-3814
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

298 files changed

+7651
-3814
lines changed

.circleci/config.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ jobs:
1414
steps:
1515
- checkout
1616
- run: .circleci/setup_env.sh
17-
- run: PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH ci/run_tests.sh
17+
- run: >
18+
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
19+
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD
20+
ci/run_tests.sh
1821
1922
workflows:
2023
test:

.github/ISSUE_TEMPLATE/bug_report.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ body:
1717
[latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
1818
required: true
1919
- label: >
20-
I have confirmed this bug exists on the [main branch]
21-
(https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
20+
I have confirmed this bug exists on the
21+
[main branch](https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
2222
of pandas.
2323
- type: textarea
2424
id: example

.github/workflows/docbuild-and-upload.yml

-6
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,6 @@ jobs:
4646
- name: Build Pandas
4747
uses: ./.github/actions/build_pandas
4848

49-
- name: Set up maintainers cache
50-
uses: actions/cache@v3
51-
with:
52-
path: maintainers.json
53-
key: maintainers
54-
5549
- name: Build website
5650
run: python web/pandas_web.py web/pandas --target-path=web/build
5751

.github/workflows/macos-windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
strategy:
3232
matrix:
3333
os: [macos-latest, windows-latest]
34-
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
34+
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
3535
fail-fast: false
3636
runs-on: ${{ matrix.os }}
3737
name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}

.github/workflows/python-dev.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ name: Python Dev
2323
on:
2424
push:
2525
branches:
26-
- main
27-
- 1.5.x
26+
# - main
27+
# - 1.5.x
28+
- None
2829
pull_request:
2930
branches:
30-
- main
31-
- 1.5.x
31+
# - main
32+
# - 1.5.x
33+
- None
3234
paths-ignore:
3335
- "doc/**"
3436

.github/workflows/sdist.yml

-1
Original file line numberDiff line numberDiff line change
@@ -92,5 +92,4 @@ jobs:
9292
- name: Import pandas
9393
run: |
9494
cd ..
95-
conda list
9695
python -c "import pandas; pandas.show_versions();"

.github/workflows/ubuntu.yml

+8-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
timeout-minutes: 180
2828
strategy:
2929
matrix:
30-
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
30+
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
3131
pattern: ["not single_cpu", "single_cpu"]
3232
pyarrow_version: ["7", "8", "9", "10"]
3333
include:
@@ -73,11 +73,10 @@ jobs:
7373
env_file: actions-pypy-38.yaml
7474
pattern: "not slow and not network and not single_cpu"
7575
test_args: "--max-worker-restart 0"
76-
error_on_warnings: "0"
7776
- name: "Numpy Dev"
7877
env_file: actions-310-numpydev.yaml
7978
pattern: "not slow and not network and not single_cpu"
80-
test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy"
79+
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
8180
error_on_warnings: "0"
8281
exclude:
8382
- env_file: actions-38.yaml
@@ -92,6 +91,12 @@ jobs:
9291
pyarrow_version: "8"
9392
- env_file: actions-39.yaml
9493
pyarrow_version: "9"
94+
- env_file: actions-311.yaml
95+
pyarrow_version: "7"
96+
- env_file: actions-311.yaml
97+
pyarrow_version: "8"
98+
- env_file: actions-311.yaml
99+
pyarrow_version: "9"
95100
fail-fast: false
96101
name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }}
97102
env:

.github/workflows/wheels.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ jobs:
8686
activate-environment: test
8787
channels: conda-forge, anaconda
8888
channel-priority: true
89-
mamba-version: "*"
89+
# mamba fails to solve, also we really don't need this since we're just installing python
90+
# mamba-version: "*"
9091

9192
- name: Test wheels (Windows 64-bit only)
9293
if: ${{ matrix.buildplat[1] == 'win_amd64' }}
@@ -154,7 +155,8 @@ jobs:
154155
python-version: '3.8'
155156
channels: conda-forge
156157
channel-priority: true
157-
mamba-version: "*"
158+
# mamba fails to solve, also we really don't need this since we're just installing python
159+
# mamba-version: "*"
158160

159161
- name: Build sdist
160162
run: |

.pre-commit-config.yaml

+1-4
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ repos:
9292
args: [--disable=all, --enable=redefined-outer-name]
9393
stages: [manual]
9494
- repo: https://github.com/PyCQA/isort
95-
rev: 5.11.4
95+
rev: 5.12.0
9696
hooks:
9797
- id: isort
9898
- repo: https://github.com/asottile/pyupgrade
@@ -443,6 +443,3 @@ repos:
443443
types: [python]
444444
files: ^pandas/tests
445445
language: python
446-
exclude: |
447-
(?x)
448-
^pandas/tests/generic/test_generic.py # GH50380

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ BSD 3-Clause License
33
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
44
All rights reserved.
55

6-
Copyright (c) 2011-2022, Open source contributors.
6+
Copyright (c) 2011-2023, Open source contributors.
77

88
Redistribution and use in source and binary forms, with or without
99
modification, are permitted provided that the following conditions are met:

asv_bench/benchmarks/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def time_setitem(self, multiple_chunks):
9393
self.array[i] = "foo"
9494

9595
def time_setitem_list(self, multiple_chunks):
96-
indexer = list(range(0, 50)) + list(range(-50, 0))
96+
indexer = list(range(0, 50)) + list(range(-1000, 0, 50))
9797
self.array[indexer] = ["foo"] * len(indexer)
9898

9999
def time_setitem_slice(self, multiple_chunks):

asv_bench/benchmarks/indexing.py

+32
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99

1010
from pandas import (
11+
NA,
1112
CategoricalIndex,
1213
DataFrame,
1314
Index,
@@ -83,6 +84,37 @@ def time_loc_slice(self, index, index_structure):
8384
self.data.loc[:800000]
8485

8586

87+
class NumericMaskedIndexing:
88+
monotonic_list = list(range(10**6))
89+
non_monotonic_list = (
90+
list(range(50)) + [54, 53, 52, 51] + list(range(55, 10**6 - 1))
91+
)
92+
93+
params = [
94+
("Int64", "UInt64", "Float64"),
95+
(True, False),
96+
]
97+
param_names = ["dtype", "monotonic"]
98+
99+
def setup(self, dtype, monotonic):
100+
101+
indices = {
102+
True: Index(self.monotonic_list, dtype=dtype),
103+
False: Index(self.non_monotonic_list, dtype=dtype).append(
104+
Index([NA], dtype=dtype)
105+
),
106+
}
107+
self.data = indices[monotonic]
108+
self.indexer = np.arange(300, 1_000)
109+
self.data_dups = self.data.append(self.data)
110+
111+
def time_get_indexer(self, dtype, monotonic):
112+
self.data.get_indexer(self.indexer)
113+
114+
def time_get_indexer_dups(self, dtype, monotonic):
115+
self.data.get_indexer_for(self.indexer)
116+
117+
86118
class NonNumericSeriesIndexing:
87119

88120
params = [

asv_bench/benchmarks/indexing_engines.py

+81-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""
2-
Benchmarks in this file depend exclusively on code in _libs/
2+
Benchmarks in this file depend mostly on code in _libs/
3+
4+
We have to created masked arrays to test the masked engine though. The
5+
array is unpacked on the Cython level.
36
47
If a PR does not edit anything in _libs, it is very unlikely that benchmarks
58
in this file will be affected.
@@ -9,6 +12,8 @@
912

1013
from pandas._libs import index as libindex
1114

15+
from pandas.core.arrays import BaseMaskedArray
16+
1217

1318
def _get_numeric_engines():
1419
engine_names = [
@@ -30,6 +35,26 @@ def _get_numeric_engines():
3035
]
3136

3237

38+
def _get_masked_engines():
39+
engine_names = [
40+
("MaskedInt64Engine", "Int64"),
41+
("MaskedInt32Engine", "Int32"),
42+
("MaskedInt16Engine", "Int16"),
43+
("MaskedInt8Engine", "Int8"),
44+
("MaskedUInt64Engine", "UInt64"),
45+
("MaskedUInt32Engine", "UInt32"),
46+
("MaskedUInt16engine", "UInt16"),
47+
("MaskedUInt8Engine", "UInt8"),
48+
("MaskedFloat64Engine", "Float64"),
49+
("MaskedFloat32Engine", "Float32"),
50+
]
51+
return [
52+
(getattr(libindex, engine_name), dtype)
53+
for engine_name, dtype in engine_names
54+
if hasattr(libindex, engine_name)
55+
]
56+
57+
3358
class NumericEngineIndexing:
3459

3560
params = [
@@ -80,6 +105,61 @@ def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
80105
self.data.get_loc(self.key_middle)
81106

82107

108+
class MaskedNumericEngineIndexing:
109+
110+
params = [
111+
_get_masked_engines(),
112+
["monotonic_incr", "monotonic_decr", "non_monotonic"],
113+
[True, False],
114+
[10**5, 2 * 10**6], # 2e6 is above SIZE_CUTOFF
115+
]
116+
param_names = ["engine_and_dtype", "index_type", "unique", "N"]
117+
118+
def setup(self, engine_and_dtype, index_type, unique, N):
119+
engine, dtype = engine_and_dtype
120+
121+
if index_type == "monotonic_incr":
122+
if unique:
123+
arr = np.arange(N * 3, dtype=dtype.lower())
124+
else:
125+
values = list([1] * N + [2] * N + [3] * N)
126+
arr = np.array(values, dtype=dtype.lower())
127+
mask = np.zeros(N * 3, dtype=np.bool_)
128+
elif index_type == "monotonic_decr":
129+
if unique:
130+
arr = np.arange(N * 3, dtype=dtype.lower())[::-1]
131+
else:
132+
values = list([1] * N + [2] * N + [3] * N)
133+
arr = np.array(values, dtype=dtype.lower())[::-1]
134+
mask = np.zeros(N * 3, dtype=np.bool_)
135+
else:
136+
assert index_type == "non_monotonic"
137+
if unique:
138+
arr = np.zeros(N * 3, dtype=dtype.lower())
139+
arr[:N] = np.arange(N * 2, N * 3, dtype=dtype.lower())
140+
arr[N:] = np.arange(N * 2, dtype=dtype.lower())
141+
142+
else:
143+
arr = np.array([1, 2, 3] * N, dtype=dtype.lower())
144+
mask = np.zeros(N * 3, dtype=np.bool_)
145+
mask[-1] = True
146+
147+
self.data = engine(BaseMaskedArray(arr, mask))
148+
# code belows avoids populating the mapping etc. while timing.
149+
self.data.get_loc(2)
150+
151+
self.key_middle = arr[len(arr) // 2]
152+
self.key_early = arr[2]
153+
154+
def time_get_loc(self, engine_and_dtype, index_type, unique, N):
155+
self.data.get_loc(self.key_early)
156+
157+
def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
158+
# searchsorted performance may be different near the middle of a range
159+
# vs near an endpoint
160+
self.data.get_loc(self.key_middle)
161+
162+
83163
class ObjectEngineIndexing:
84164

85165
params = [("monotonic_incr", "monotonic_decr", "non_monotonic")]

asv_bench/benchmarks/io/hdf.py

+8
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,17 @@ def setup(self, format):
128128
self.df["object"] = tm.makeStringIndex(N)
129129
self.df.to_hdf(self.fname, "df", format=format)
130130

131+
# Numeric df
132+
self.df1 = self.df.copy()
133+
self.df1 = self.df1.reset_index()
134+
self.df1.to_hdf(self.fname, "df1", format=format)
135+
131136
def time_read_hdf(self, format):
132137
read_hdf(self.fname, "df")
133138

139+
def peakmem_read_hdf(self, format):
140+
read_hdf(self.fname, "df")
141+
134142
def time_write_hdf(self, format):
135143
self.df.to_hdf(self.fname, "df", format=format)
136144

asv_bench/benchmarks/io/json.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ def time_float_longint_str_lines(self):
294294
class ToJSONMem:
295295
def setup_cache(self):
296296
df = DataFrame([[1]])
297-
frames = {"int": df, "float": df.astype(float)}
297+
df2 = DataFrame(range(8), date_range("1/1/2000", periods=8, freq="T"))
298+
frames = {"int": df, "float": df.astype(float), "datetime": df2}
298299

299300
return frames
300301

@@ -308,5 +309,10 @@ def peakmem_float(self, frames):
308309
for _ in range(100_000):
309310
df.to_json()
310311

312+
def peakmem_time(self, frames):
313+
df = frames["datetime"]
314+
for _ in range(10_000):
315+
df.to_json(orient="table")
316+
311317

312318
from ..pandas_vb_common import setup # noqa: F401 isort:skip

0 commit comments

Comments
 (0)