Skip to content

Commit 8c52003

Browse files
authored
TST: Parameterize & make tests more performant (#55830)
* use _SIZE_CUTOFF in test_indexer_caching * use _SIZE_CUTOFF in test_loc_getitem_large_series * Parameterize test_align_fill_method * Mark numba engine as single_cpu * Reduce data size for test_concat_copies * Use _SIZE_CUTOFF in test_large_mi_contains * Parameterize * Parameterize test_round_trip_current * Remove str call * Param over limit
1 parent 0d761ef commit 8c52003

File tree

9 files changed

+116
-115
lines changed

9 files changed

+116
-115
lines changed

pandas/tests/apply/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def int_frame_const_col():
1818
return df
1919

2020

21-
@pytest.fixture(params=["python", "numba"])
21+
@pytest.fixture(params=["python", pytest.param("numba", marks=pytest.mark.single_cpu)])
2222
def engine(request):
2323
if request.param == "numba":
2424
pytest.importorskip("numba")

pandas/tests/frame/methods/test_align.py

+36-42
Original file line numberDiff line numberDiff line change
@@ -392,27 +392,57 @@ def test_missing_axis_specification_exception(self):
392392
with pytest.raises(ValueError, match=r"axis=0 or 1"):
393393
df.align(series)
394394

395-
def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
395+
@pytest.mark.parametrize("method", ["pad", "bfill"])
396+
@pytest.mark.parametrize("axis", [0, 1, None])
397+
@pytest.mark.parametrize("fill_axis", [0, 1])
398+
@pytest.mark.parametrize("how", ["inner", "outer", "left", "right"])
399+
@pytest.mark.parametrize(
400+
"left_slice",
401+
[
402+
[slice(4), slice(10)],
403+
[slice(0), slice(0)],
404+
],
405+
)
406+
@pytest.mark.parametrize(
407+
"right_slice",
408+
[
409+
[slice(2, None), slice(6, None)],
410+
[slice(0), slice(0)],
411+
],
412+
)
413+
@pytest.mark.parametrize("limit", [1, None])
414+
def test_align_fill_method(
415+
self, how, method, axis, fill_axis, float_frame, left_slice, right_slice, limit
416+
):
417+
frame = float_frame
418+
left = frame.iloc[left_slice[0], left_slice[1]]
419+
right = frame.iloc[right_slice[0], right_slice[1]]
420+
396421
msg = (
397422
"The 'method', 'limit', and 'fill_axis' keywords in DataFrame.align "
398423
"are deprecated"
399424
)
400425

401426
with tm.assert_produces_warning(FutureWarning, match=msg):
402-
aa, ab = a.align(
403-
b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis
427+
aa, ab = left.align(
428+
right,
429+
axis=axis,
430+
join=how,
431+
method=method,
432+
limit=limit,
433+
fill_axis=fill_axis,
404434
)
405435

406436
join_index, join_columns = None, None
407437

408-
ea, eb = a, b
438+
ea, eb = left, right
409439
if axis is None or axis == 0:
410-
join_index = a.index.join(b.index, how=how)
440+
join_index = left.index.join(right.index, how=how)
411441
ea = ea.reindex(index=join_index)
412442
eb = eb.reindex(index=join_index)
413443

414444
if axis is None or axis == 1:
415-
join_columns = a.columns.join(b.columns, how=how)
445+
join_columns = left.columns.join(right.columns, how=how)
416446
ea = ea.reindex(columns=join_columns)
417447
eb = eb.reindex(columns=join_columns)
418448

@@ -424,42 +454,6 @@ def _check_align(self, a, b, axis, fill_axis, how, method, limit=None):
424454
tm.assert_frame_equal(aa, ea)
425455
tm.assert_frame_equal(ab, eb)
426456

427-
@pytest.mark.parametrize("meth", ["pad", "bfill"])
428-
@pytest.mark.parametrize("ax", [0, 1, None])
429-
@pytest.mark.parametrize("fax", [0, 1])
430-
@pytest.mark.parametrize("how", ["inner", "outer", "left", "right"])
431-
def test_align_fill_method(self, how, meth, ax, fax, float_frame):
432-
df = float_frame
433-
self._check_align_fill(df, how, meth, ax, fax)
434-
435-
def _check_align_fill(self, frame, kind, meth, ax, fax):
436-
left = frame.iloc[0:4, :10]
437-
right = frame.iloc[2:, 6:]
438-
empty = frame.iloc[:0, :0]
439-
440-
self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth)
441-
self._check_align(
442-
left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
443-
)
444-
445-
# empty left
446-
self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth)
447-
self._check_align(
448-
empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
449-
)
450-
451-
# empty right
452-
self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth)
453-
self._check_align(
454-
left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
455-
)
456-
457-
# both empty
458-
self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth)
459-
self._check_align(
460-
empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1
461-
)
462-
463457
def test_align_series_check_copy(self):
464458
# GH#
465459
df = DataFrame({0: [1, 2]})

pandas/tests/indexes/multi/test_indexing.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import numpy as np
55
import pytest
66

7+
from pandas._libs import index as libindex
78
from pandas.errors import (
89
InvalidIndexError,
910
PerformanceWarning,
@@ -843,11 +844,12 @@ def test_contains_td64_level(self):
843844
assert "element_not_exit" not in idx
844845
assert "0 day 09:30:00" in idx
845846

846-
@pytest.mark.slow
847-
def test_large_mi_contains(self):
847+
def test_large_mi_contains(self, monkeypatch):
848848
# GH#10645
849-
result = MultiIndex.from_arrays([range(10**6), range(10**6)])
850-
assert (10**6, 0) not in result
849+
with monkeypatch.context():
850+
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 10)
851+
result = MultiIndex.from_arrays([range(10), range(10)])
852+
assert (10, 0) not in result
851853

852854

853855
def test_timestamp_multiindex_indexer():

pandas/tests/indexing/interval/test_interval.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._libs import index as libindex
45
from pandas.compat import IS64
56

67
import pandas as pd
@@ -72,15 +73,18 @@ def test_getitem_non_matching(self, series_with_interval_index, indexer_sl):
7273
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
7374
indexer_sl(ser)[[-1, 3]]
7475

75-
@pytest.mark.slow
76-
def test_loc_getitem_large_series(self):
77-
ser = Series(
78-
np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001))
79-
)
80-
81-
result1 = ser.loc[:80000]
82-
result2 = ser.loc[0:80000]
83-
result3 = ser.loc[0:80000:1]
76+
def test_loc_getitem_large_series(self, monkeypatch):
77+
size_cutoff = 20
78+
with monkeypatch.context():
79+
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
80+
ser = Series(
81+
np.arange(size_cutoff),
82+
index=IntervalIndex.from_breaks(np.arange(size_cutoff + 1)),
83+
)
84+
85+
result1 = ser.loc[:8]
86+
result2 = ser.loc[0:8]
87+
result3 = ser.loc[0:8:1]
8488
tm.assert_series_equal(result1, result2)
8589
tm.assert_series_equal(result1, result3)
8690

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._libs import index as libindex
45
from pandas.errors import SettingWithCopyError
56
import pandas.util._test_decorators as td
67

@@ -69,15 +70,16 @@ def test_cache_updating(using_copy_on_write):
6970
assert result == 2
7071

7172

72-
@pytest.mark.slow
73-
def test_indexer_caching():
73+
def test_indexer_caching(monkeypatch):
7474
# GH5727
7575
# make sure that indexers are in the _internal_names_set
76-
n = 1000001
77-
index = MultiIndex.from_arrays([np.arange(n), np.arange(n)])
78-
ser = Series(np.zeros(n), index=index)
76+
size_cutoff = 20
77+
with monkeypatch.context():
78+
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
79+
index = MultiIndex.from_arrays([np.arange(size_cutoff), np.arange(size_cutoff)])
80+
s = Series(np.zeros(size_cutoff), index=index)
7981

80-
# setitem
81-
expected = Series(np.ones(n), index=index)
82-
ser[ser == 0] = 1
83-
tm.assert_series_equal(ser, expected)
82+
# setitem
83+
s[s == 0] = 1
84+
expected = Series(np.ones(size_cutoff), index=index)
85+
tm.assert_series_equal(s, expected)

pandas/tests/io/generate_legacy_storage_files.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ def _create_sp_frame():
124124
return DataFrame(data, index=dates).apply(SparseArray)
125125

126126

127-
def create_data():
127+
def create_pickle_data():
128128
"""create the pickle data"""
129129
data = {
130130
"A": [0.0, 1.0, 2.0, 3.0, np.nan],
@@ -282,12 +282,6 @@ def create_data():
282282
}
283283

284284

285-
def create_pickle_data():
286-
data = create_data()
287-
288-
return data
289-
290-
291285
def platform_name():
292286
return "_".join(
293287
[

pandas/tests/io/parser/test_textreader.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ def test_empty_field_eof(self):
298298
}
299299
assert_array_dicts_equal(result, expected)
300300

301+
@pytest.mark.parametrize("repeat", range(10))
302+
def test_empty_field_eof_mem_access_bug(self, repeat):
301303
# GH5664
302304
a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"])
303305
b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1])
@@ -312,21 +314,20 @@ def test_empty_field_eof(self):
312314
index=[0, 5, 7, 12],
313315
)
314316

315-
for _ in range(100):
316-
df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c")
317-
tm.assert_frame_equal(df, a)
317+
df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c")
318+
tm.assert_frame_equal(df, a)
318319

319-
df = read_csv(
320-
StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c"
321-
)
322-
tm.assert_frame_equal(df, b)
320+
df = read_csv(
321+
StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c"
322+
)
323+
tm.assert_frame_equal(df, b)
323324

324-
df = read_csv(
325-
StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"),
326-
names=list("abcd"),
327-
engine="c",
328-
)
329-
tm.assert_frame_equal(df, c)
325+
df = read_csv(
326+
StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"),
327+
names=list("abcd"),
328+
engine="c",
329+
)
330+
tm.assert_frame_equal(df, c)
330331

331332
def test_empty_csv_input(self):
332333
# GH14867

pandas/tests/io/test_pickle.py

+33-29
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
1111
3. Move the created pickle to "data/legacy_pickle/<version>" directory.
1212
"""
13+
from __future__ import annotations
14+
1315
from array import array
1416
import bz2
1517
import datetime
@@ -22,6 +24,7 @@
2224
import pickle
2325
import shutil
2426
import tarfile
27+
from typing import Any
2528
import uuid
2629
import zipfile
2730

@@ -52,12 +55,6 @@
5255
)
5356

5457

55-
@pytest.fixture
56-
def current_pickle_data():
57-
# our current version pickle data
58-
return create_pickle_data()
59-
60-
6158
# ---------------------
6259
# comparison functions
6360
# ---------------------
@@ -173,6 +170,15 @@ def python_unpickler(path):
173170
return pickle.load(fh)
174171

175172

173+
def flatten(data: dict) -> list[tuple[str, Any]]:
174+
"""Flatten create_pickle_data"""
175+
return [
176+
(typ, example)
177+
for typ, examples in data.items()
178+
for example in examples.values()
179+
]
180+
181+
176182
@pytest.mark.parametrize(
177183
"pickle_writer",
178184
[
@@ -190,29 +196,27 @@ def python_unpickler(path):
190196
],
191197
)
192198
@pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler])
193-
def test_round_trip_current(current_pickle_data, pickle_writer, writer):
194-
data = current_pickle_data
195-
for typ, dv in data.items():
196-
for dt, expected in dv.items():
197-
with tm.ensure_clean() as path:
198-
# test writing with each pickler
199-
pickle_writer(expected, path)
200-
201-
# test reading with each unpickler
202-
result = pd.read_pickle(path)
203-
compare_element(result, expected, typ)
204-
205-
result = python_unpickler(path)
206-
compare_element(result, expected, typ)
207-
208-
# and the same for file objects (GH 35679)
209-
with open(path, mode="wb") as handle:
210-
writer(expected, path)
211-
handle.seek(0) # shouldn't close file handle
212-
with open(path, mode="rb") as handle:
213-
result = pd.read_pickle(handle)
214-
handle.seek(0) # shouldn't close file handle
215-
compare_element(result, expected, typ)
199+
@pytest.mark.parametrize("typ, expected", flatten(create_pickle_data()))
200+
def test_round_trip_current(typ, expected, pickle_writer, writer):
201+
with tm.ensure_clean() as path:
202+
# test writing with each pickler
203+
pickle_writer(expected, path)
204+
205+
# test reading with each unpickler
206+
result = pd.read_pickle(path)
207+
compare_element(result, expected, typ)
208+
209+
result = python_unpickler(path)
210+
compare_element(result, expected, typ)
211+
212+
# and the same for file objects (GH 35679)
213+
with open(path, mode="wb") as handle:
214+
writer(expected, path)
215+
handle.seek(0) # shouldn't close file handle
216+
with open(path, mode="rb") as handle:
217+
result = pd.read_pickle(handle)
218+
handle.seek(0) # shouldn't close file handle
219+
compare_element(result, expected, typ)
216220

217221

218222
def test_pickle_path_pathlib():

pandas/tests/reshape/concat/test_dataframe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def test_concat_duplicates_in_index_with_keys(self):
197197
@pytest.mark.parametrize("axis", [0, 1])
198198
def test_concat_copies(self, axis, order, ignore_index, using_copy_on_write):
199199
# based on asv ConcatDataFrames
200-
df = DataFrame(np.zeros((10000, 200), dtype=np.float32, order=order))
200+
df = DataFrame(np.zeros((10, 5), dtype=np.float32, order=order))
201201

202202
res = concat([df] * 5, axis=axis, ignore_index=ignore_index, copy=True)
203203

0 commit comments

Comments
 (0)