Skip to content

Commit 57ed028

Browse files
committed
Merge branch 'main' into enh-stata-non-nano
2 parents a500311 + e2aa710 commit 57ed028

File tree

370 files changed

+6820
-4118
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

370 files changed

+6820
-4118
lines changed

.github/workflows/unit-tests.yml

+19-6
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
timeout-minutes: 90
2727
strategy:
2828
matrix:
29-
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
29+
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
3030
# Prevent the include jobs from overriding other jobs
3131
pattern: [""]
3232
include:
@@ -69,10 +69,22 @@ jobs:
6969
env_file: actions-311.yaml
7070
pattern: "not slow and not network and not single_cpu"
7171
pandas_copy_on_write: "1"
72+
- name: "Copy-on-Write 3.12"
73+
env_file: actions-312.yaml
74+
pattern: "not slow and not network and not single_cpu"
75+
pandas_copy_on_write: "1"
7276
- name: "Copy-on-Write 3.11 (warnings)"
7377
env_file: actions-311.yaml
7478
pattern: "not slow and not network and not single_cpu"
7579
pandas_copy_on_write: "warn"
80+
- name: "Copy-on-Write 3.10 (warnings)"
81+
env_file: actions-310.yaml
82+
pattern: "not slow and not network and not single_cpu"
83+
pandas_copy_on_write: "warn"
84+
- name: "Copy-on-Write 3.9 (warnings)"
85+
env_file: actions-39.yaml
86+
pattern: "not slow and not network and not single_cpu"
87+
pandas_copy_on_write: "warn"
7688
- name: "Pypy"
7789
env_file: actions-pypy-39.yaml
7890
pattern: "not slow and not network and not single_cpu"
@@ -88,14 +100,15 @@ jobs:
88100
name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}
89101
env:
90102
PATTERN: ${{ matrix.pattern }}
91-
EXTRA_APT: ${{ matrix.extra_apt || '' }}
92103
LANG: ${{ matrix.lang || 'C.UTF-8' }}
93104
LC_ALL: ${{ matrix.lc_all || '' }}
94105
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
95106
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
96107
TEST_ARGS: ${{ matrix.test_args || '' }}
97108
PYTEST_WORKERS: 'auto'
98109
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
110+
# Clipboard tests
111+
QT_QPA_PLATFORM: offscreen
99112
concurrency:
100113
# https://github.community/t/concurrecy-not-work-for-push/183068/7
101114
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
@@ -145,8 +158,8 @@ jobs:
145158
fetch-depth: 0
146159

147160
- name: Extra installs
148-
# xsel for clipboard tests
149-
run: sudo apt-get update && sudo apt-get install -y xsel ${{ env.EXTRA_APT }}
161+
run: sudo apt-get update && sudo apt-get install -y ${{ matrix.extra_apt }}
162+
if: ${{ matrix.extra_apt }}
150163

151164
- name: Generate extra locales
152165
# These extra locales will be available for locale.setlocale() calls in tests
@@ -181,7 +194,7 @@ jobs:
181194
strategy:
182195
matrix:
183196
os: [macos-latest, windows-latest]
184-
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
197+
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
185198
fail-fast: false
186199
runs-on: ${{ matrix.os }}
187200
name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}
@@ -312,7 +325,7 @@ jobs:
312325
# To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
313326
# to the corresponding posix/windows-macos/sdist etc. workflows.
314327
# Feel free to modify this comment as necessary.
315-
#if: false # Uncomment this to freeze the workflow, comment it to unfreeze
328+
if: false # Uncomment this to freeze the workflow, comment it to unfreeze
316329
defaults:
317330
run:
318331
shell: bash -eou pipefail {0}

.pre-commit-config.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,9 @@ repos:
240240
# pytest raises without context
241241
|\s\ pytest.raises
242242
243+
# TODO
243244
# pytest.warns (use tm.assert_produces_warning instead)
244-
|pytest\.warns
245+
# |pytest\.warns
245246
246247
# os.remove
247248
|os\.remove

asv_bench/benchmarks/algorithms.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
import pandas as pd
66

7-
from .pandas_vb_common import tm
8-
97
for imp in ["pandas.util", "pandas.tools.hashing"]:
108
try:
119
hashing = import_module(imp)
@@ -47,9 +45,12 @@ def setup(self, unique, sort, dtype):
4745
elif dtype == "datetime64[ns, tz]":
4846
data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
4947
elif dtype == "object_str":
50-
data = tm.makeStringIndex(N)
48+
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
5149
elif dtype == "string[pyarrow]":
52-
data = pd.array(tm.makeStringIndex(N), dtype="string[pyarrow]")
50+
data = pd.array(
51+
pd.Index([f"i-{i}" for i in range(N)], dtype=object),
52+
dtype="string[pyarrow]",
53+
)
5354
else:
5455
raise NotImplementedError
5556

@@ -88,7 +89,7 @@ def setup(self, unique, keep, dtype):
8889
elif dtype == "float64":
8990
data = pd.Index(np.random.randn(N), dtype="float64")
9091
elif dtype == "string":
91-
data = tm.makeStringIndex(N)
92+
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
9293
elif dtype == "datetime64[ns]":
9394
data = pd.date_range("2011-01-01", freq="h", periods=N)
9495
elif dtype == "datetime64[ns, tz]":
@@ -136,7 +137,9 @@ def setup_cache(self):
136137
df = pd.DataFrame(
137138
{
138139
"strings": pd.Series(
139-
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
140+
pd.Index([f"i-{i}" for i in range(10000)], dtype=object).take(
141+
np.random.randint(0, 10000, size=N)
142+
)
140143
),
141144
"floats": np.random.randn(N),
142145
"ints": np.arange(N),

asv_bench/benchmarks/algos/isin.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
date_range,
99
)
1010

11-
from ..pandas_vb_common import tm
12-
1311

1412
class IsIn:
1513
params = [
@@ -60,7 +58,9 @@ def setup(self, dtype):
6058

6159
elif dtype in ["str", "string[python]", "string[pyarrow]"]:
6260
try:
63-
self.series = Series(tm.makeStringIndex(N), dtype=dtype)
61+
self.series = Series(
62+
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
63+
)
6464
except ImportError:
6565
raise NotImplementedError
6666
self.values = list(self.series[:2])

asv_bench/benchmarks/arithmetic.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
import pandas as pd
77
from pandas import (
88
DataFrame,
9+
Index,
910
Series,
1011
Timestamp,
1112
date_range,
1213
to_timedelta,
1314
)
14-
import pandas._testing as tm
1515
from pandas.core.algorithms import checked_add_with_arr
1616

1717
from .pandas_vb_common import numeric_dtypes
@@ -323,8 +323,10 @@ class IndexArithmetic:
323323

324324
def setup(self, dtype):
325325
N = 10**6
326-
indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
327-
self.index = getattr(tm, indexes[dtype])(N)
326+
if dtype == "float":
327+
self.index = Index(np.arange(N), dtype=np.float64)
328+
elif dtype == "int":
329+
self.index = Index(np.arange(N), dtype=np.int64)
328330

329331
def time_add(self, dtype):
330332
self.index + 2

asv_bench/benchmarks/categoricals.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66

77
import pandas as pd
88

9-
from .pandas_vb_common import tm
10-
119
try:
1210
from pandas.api.types import union_categoricals
1311
except ImportError:
@@ -189,7 +187,7 @@ def setup(self):
189187
N = 10**5
190188
ncats = 15
191189

192-
self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
190+
self.s_str = pd.Series(np.random.randint(0, ncats, size=N).astype(str))
193191
self.s_str_cat = pd.Series(self.s_str, dtype="category")
194192
with warnings.catch_warnings(record=True):
195193
str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
@@ -242,7 +240,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
242240
class Contains:
243241
def setup(self):
244242
N = 10**5
245-
self.ci = tm.makeCategoricalIndex(N)
243+
self.ci = pd.CategoricalIndex(np.arange(N))
246244
self.c = self.ci.values
247245
self.key = self.ci.categories[0]
248246

@@ -325,7 +323,7 @@ def time_sort_values(self):
325323
class SearchSorted:
326324
def setup(self):
327325
N = 10**5
328-
self.ci = tm.makeCategoricalIndex(N).sort_values()
326+
self.ci = pd.CategoricalIndex(np.arange(N)).sort_values()
329327
self.c = self.ci.values
330328
self.key = self.ci.categories[1]
331329

asv_bench/benchmarks/ctors.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
date_range,
1010
)
1111

12-
from .pandas_vb_common import tm
13-
1412

1513
def no_change(arr):
1614
return arr
@@ -115,7 +113,7 @@ def time_dtindex_from_index_with_series(self):
115113
class MultiIndexConstructor:
116114
def setup(self):
117115
N = 10**4
118-
self.iterables = [tm.makeStringIndex(N), range(20)]
116+
self.iterables = [Index([f"i-{i}" for i in range(N)], dtype=object), range(20)]
119117

120118
def time_multiindex_from_iterables(self):
121119
MultiIndex.from_product(self.iterables)

asv_bench/benchmarks/dtypes.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import numpy as np
44

55
import pandas as pd
6-
from pandas import DataFrame
6+
from pandas import (
7+
DataFrame,
8+
Index,
9+
)
710
import pandas._testing as tm
811
from pandas.api.types import (
912
is_extension_array_dtype,
@@ -73,8 +76,8 @@ class SelectDtypes:
7376

7477
def setup(self, dtype):
7578
N, K = 5000, 50
76-
self.index = tm.makeStringIndex(N)
77-
self.columns = tm.makeStringIndex(K)
79+
self.index = Index([f"i-{i}" for i in range(N)], dtype=object)
80+
self.columns = Index([f"i-{i}" for i in range(K)], dtype=object)
7881

7982
def create_df(data):
8083
return DataFrame(data, index=self.index, columns=self.columns)

asv_bench/benchmarks/frame_ctor.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
date_range,
1313
)
1414

15-
from .pandas_vb_common import tm
16-
1715
try:
1816
from pandas.tseries.offsets import (
1917
Hour,
@@ -30,8 +28,8 @@
3028
class FromDicts:
3129
def setup(self):
3230
N, K = 5000, 50
33-
self.index = tm.makeStringIndex(N)
34-
self.columns = tm.makeStringIndex(K)
31+
self.index = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
32+
self.columns = pd.Index([f"i-{i}" for i in range(K)], dtype=object)
3533
frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns)
3634
self.data = frame.to_dict()
3735
self.dict_list = frame.to_dict(orient="records")

asv_bench/benchmarks/frame_methods.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas import (
77
DataFrame,
8+
Index,
89
MultiIndex,
910
NaT,
1011
Series,
@@ -14,8 +15,6 @@
1415
timedelta_range,
1516
)
1617

17-
from .pandas_vb_common import tm
18-
1918

2019
class AsType:
2120
params = [
@@ -703,8 +702,12 @@ def setup(self, monotonic):
703702
K = 10
704703
df = DataFrame(
705704
{
706-
"key1": tm.makeStringIndex(N).values.repeat(K),
707-
"key2": tm.makeStringIndex(N).values.repeat(K),
705+
"key1": Index([f"i-{i}" for i in range(N)], dtype=object).values.repeat(
706+
K
707+
),
708+
"key2": Index([f"i-{i}" for i in range(N)], dtype=object).values.repeat(
709+
K
710+
),
708711
"value": np.random.randn(N * K),
709712
}
710713
)

asv_bench/benchmarks/gil.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@
55

66
from pandas import (
77
DataFrame,
8+
Index,
89
Series,
910
date_range,
1011
factorize,
1112
read_csv,
1213
)
1314
from pandas.core.algorithms import take_nd
1415

15-
from .pandas_vb_common import tm
16-
1716
try:
1817
from pandas import (
1918
rolling_kurt,
@@ -34,7 +33,6 @@
3433
except ImportError:
3534
from pandas import algos
3635

37-
3836
from .pandas_vb_common import BaseIO # isort:skip
3937

4038

@@ -305,7 +303,7 @@ class ParallelFactorize:
305303
param_names = ["threads"]
306304

307305
def setup(self, threads):
308-
strings = tm.makeStringIndex(100000)
306+
strings = Index([f"i-{i}" for i in range(100000)], dtype=object)
309307

310308
@test_parallel(num_threads=threads)
311309
def parallel():

asv_bench/benchmarks/groupby.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
to_timedelta,
1818
)
1919

20-
from .pandas_vb_common import tm
21-
2220
method_blocklist = {
2321
"object": {
2422
"diff",
@@ -167,10 +165,14 @@ def setup_cache(self):
167165
"int64_small": Series(np.random.randint(0, 100, size=size)),
168166
"int64_large": Series(np.random.randint(0, 10000, size=size)),
169167
"object_small": Series(
170-
tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size))
168+
Index([f"i-{i}" for i in range(100)], dtype=object).take(
169+
np.random.randint(0, 100, size=size)
170+
)
171171
),
172172
"object_large": Series(
173-
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size))
173+
Index([f"i-{i}" for i in range(10000)], dtype=object).take(
174+
np.random.randint(0, 10000, size=size)
175+
)
174176
),
175177
}
176178
return data
@@ -912,7 +914,7 @@ def setup(self):
912914
n1 = 400
913915
n2 = 250
914916
index = MultiIndex(
915-
levels=[np.arange(n1), tm.makeStringIndex(n2)],
917+
levels=[np.arange(n1), Index([f"i-{i}" for i in range(n2)], dtype=object)],
916918
codes=[np.repeat(range(n1), n2).tolist(), list(range(n2)) * n1],
917919
names=["lev1", "lev2"],
918920
)

0 commit comments

Comments
 (0)