Skip to content

Commit e973b42

Browse files
authored
TST/CLN: Remove makeCategoricalIndex (#56186)
* TST/CLN: Remove makeCategoricalIndex * Remove usage in asv_bench * Adjust xarray test for more categories * Remove rands_array
1 parent 5ad9abd commit e973b42

File tree

7 files changed

+22
-51
lines changed

7 files changed

+22
-51
lines changed

asv_bench/benchmarks/categoricals.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66

77
import pandas as pd
88

9-
from .pandas_vb_common import tm
10-
119
try:
1210
from pandas.api.types import union_categoricals
1311
except ImportError:
@@ -189,7 +187,7 @@ def setup(self):
189187
N = 10**5
190188
ncats = 15
191189

192-
self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
190+
self.s_str = pd.Series(np.random.randint(0, ncats, size=N).astype(str))
193191
self.s_str_cat = pd.Series(self.s_str, dtype="category")
194192
with warnings.catch_warnings(record=True):
195193
str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
@@ -242,7 +240,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
242240
class Contains:
243241
def setup(self):
244242
N = 10**5
245-
self.ci = tm.makeCategoricalIndex(N)
243+
self.ci = pd.CategoricalIndex(np.arange(N))
246244
self.c = self.ci.values
247245
self.key = self.ci.categories[0]
248246

@@ -325,7 +323,7 @@ def time_sort_values(self):
325323
class SearchSorted:
326324
def setup(self):
327325
N = 10**5
328-
self.ci = tm.makeCategoricalIndex(N).sort_values()
326+
self.ci = pd.CategoricalIndex(np.arange(N)).sort_values()
329327
self.c = self.ci.values
330328
self.key = self.ci.categories[1]
331329

pandas/_testing/__init__.py

-28
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
from pandas import (
4040
ArrowDtype,
4141
Categorical,
42-
CategoricalIndex,
4342
DataFrame,
4443
DatetimeIndex,
4544
Index,
@@ -348,36 +347,10 @@ def to_array(obj):
348347
# Others
349348

350349

351-
def rands_array(
352-
nchars, size: int, dtype: NpDtype = "O", replace: bool = True
353-
) -> np.ndarray:
354-
"""
355-
Generate an array of byte strings.
356-
"""
357-
chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
358-
retval = (
359-
np.random.default_rng(2)
360-
.choice(chars, size=nchars * np.prod(size), replace=replace)
361-
.view((np.str_, nchars))
362-
.reshape(size)
363-
)
364-
return retval.astype(dtype)
365-
366-
367350
def getCols(k) -> str:
368351
return string.ascii_uppercase[:k]
369352

370353

371-
def makeCategoricalIndex(
372-
k: int = 10, n: int = 3, name=None, **kwargs
373-
) -> CategoricalIndex:
374-
"""make a length k index or n categories"""
375-
x = rands_array(nchars=4, size=n, replace=False)
376-
return CategoricalIndex(
377-
Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs
378-
)
379-
380-
381354
def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
382355
dtype = pandas_dtype(dtype)
383356
assert isinstance(dtype, np.dtype)
@@ -998,7 +971,6 @@ def shares_memory(left, right) -> bool:
998971
"iat",
999972
"iloc",
1000973
"loc",
1001-
"makeCategoricalIndex",
1002974
"makeCustomDataframe",
1003975
"makeCustomIndex",
1004976
"makeDataFrame",

pandas/conftest.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959

6060
import pandas as pd
6161
from pandas import (
62+
CategoricalIndex,
6263
DataFrame,
6364
Interval,
6465
IntervalIndex,
@@ -632,7 +633,7 @@ def _create_mi_with_dt64tz_level():
632633
"bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0),
633634
"complex64": tm.makeNumericIndex(100, dtype="float64").astype("complex64"),
634635
"complex128": tm.makeNumericIndex(100, dtype="float64").astype("complex128"),
635-
"categorical": tm.makeCategoricalIndex(100),
636+
"categorical": CategoricalIndex(list("abcd") * 25),
636637
"interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
637638
"empty": Index([]),
638639
"tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),

pandas/tests/frame/methods/test_set_index.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from pandas import (
1414
Categorical,
15+
CategoricalIndex,
1516
DataFrame,
1617
DatetimeIndex,
1718
Index,
@@ -398,8 +399,7 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append):
398399
tm.assert_frame_equal(result, expected)
399400

400401
def test_construction_with_categorical_index(self):
401-
ci = tm.makeCategoricalIndex(10)
402-
ci.name = "B"
402+
ci = CategoricalIndex(list("ab") * 5, name="B")
403403

404404
# with Categorical
405405
df = DataFrame(

pandas/tests/generic/test_to_xarray.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@ class TestDataFrameToXArray:
1818
def df(self):
1919
return DataFrame(
2020
{
21-
"a": list("abc"),
22-
"b": list(range(1, 4)),
23-
"c": np.arange(3, 6).astype("u1"),
24-
"d": np.arange(4.0, 7.0, dtype="float64"),
25-
"e": [True, False, True],
26-
"f": Categorical(list("abc")),
27-
"g": date_range("20130101", periods=3),
28-
"h": date_range("20130101", periods=3, tz="US/Eastern"),
21+
"a": list("abcd"),
22+
"b": list(range(1, 5)),
23+
"c": np.arange(3, 7).astype("u1"),
24+
"d": np.arange(4.0, 8.0, dtype="float64"),
25+
"e": [True, False, True, False],
26+
"f": Categorical(list("abcd")),
27+
"g": date_range("20130101", periods=4),
28+
"h": date_range("20130101", periods=4, tz="US/Eastern"),
2929
}
3030
)
3131

@@ -37,11 +37,11 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
3737

3838
from xarray import Dataset
3939

40-
df.index = index[:3]
40+
df.index = index[:4]
4141
df.index.name = "foo"
4242
df.columns.name = "bar"
4343
result = df.to_xarray()
44-
assert result.dims["foo"] == 3
44+
assert result.dims["foo"] == 4
4545
assert len(result.coords) == 1
4646
assert len(result.data_vars) == 8
4747
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
@@ -69,10 +69,10 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
6969
from xarray import Dataset
7070

7171
# MultiIndex
72-
df.index = MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
72+
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
7373
result = df.to_xarray()
7474
assert result.dims["one"] == 1
75-
assert result.dims["two"] == 3
75+
assert result.dims["two"] == 4
7676
assert len(result.coords) == 2
7777
assert len(result.data_vars) == 8
7878
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])

pandas/tests/indexes/categorical/test_category.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def test_ensure_copied_data(self):
248248
#
249249
# Must be tested separately from other indexes because
250250
# self.values is not an ndarray.
251-
index = tm.makeCategoricalIndex(10)
251+
index = CategoricalIndex(list("ab") * 5)
252252

253253
result = CategoricalIndex(index.values, copy=True)
254254
tm.assert_index_equal(index, result)
@@ -261,7 +261,7 @@ def test_ensure_copied_data(self):
261261
class TestCategoricalIndex2:
262262
def test_view_i8(self):
263263
# GH#25464
264-
ci = tm.makeCategoricalIndex(100)
264+
ci = CategoricalIndex(list("ab") * 50)
265265
msg = "When changing to a larger dtype, its size must be a divisor"
266266
with pytest.raises(ValueError, match=msg):
267267
ci.view("i8")

pandas/tests/series/test_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def test_tab_completion_with_categorical(self):
6969
@pytest.mark.parametrize(
7070
"index",
7171
[
72+
Index(list("ab") * 5, dtype="category"),
7273
Index([str(i) for i in range(10)]),
73-
tm.makeCategoricalIndex(10),
7474
Index(["foo", "bar", "baz"] * 2),
7575
tm.makeDateIndex(10),
7676
tm.makePeriodIndex(10),

0 commit comments

Comments
 (0)