Skip to content

TST/CLN: Remove makeCategoricalIndex #56186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 28, 2023
8 changes: 3 additions & 5 deletions asv_bench/benchmarks/categoricals.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@

import pandas as pd

from .pandas_vb_common import tm

try:
from pandas.api.types import union_categoricals
except ImportError:
Expand Down Expand Up @@ -189,7 +187,7 @@ def setup(self):
N = 10**5
ncats = 15

self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
self.s_str = pd.Series(np.random.randint(0, ncats, size=N).astype(str))
self.s_str_cat = pd.Series(self.s_str, dtype="category")
with warnings.catch_warnings(record=True):
str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
Expand Down Expand Up @@ -242,7 +240,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
class Contains:
def setup(self):
N = 10**5
self.ci = tm.makeCategoricalIndex(N)
self.ci = pd.CategoricalIndex(np.arange(N))
self.c = self.ci.values
self.key = self.ci.categories[0]

Expand Down Expand Up @@ -325,7 +323,7 @@ def time_sort_values(self):
class SearchSorted:
def setup(self):
N = 10**5
self.ci = tm.makeCategoricalIndex(N).sort_values()
self.ci = pd.CategoricalIndex(np.arange(N)).sort_values()
self.c = self.ci.values
self.key = self.ci.categories[1]

Expand Down
28 changes: 0 additions & 28 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
from pandas import (
ArrowDtype,
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
Expand Down Expand Up @@ -350,36 +349,10 @@ def to_array(obj):
# Others


def rands_array(
nchars, size: int, dtype: NpDtype = "O", replace: bool = True
) -> np.ndarray:
"""
Generate an array of byte strings.
"""
chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
retval = (
np.random.default_rng(2)
.choice(chars, size=nchars * np.prod(size), replace=replace)
.view((np.str_, nchars))
.reshape(size)
)
return retval.astype(dtype)


def getCols(k) -> str:
return string.ascii_uppercase[:k]


def makeCategoricalIndex(
k: int = 10, n: int = 3, name=None, **kwargs
) -> CategoricalIndex:
"""make a length k index or n categories"""
x = rands_array(nchars=4, size=n, replace=False)
return CategoricalIndex(
Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs
)


def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
dtype = pandas_dtype(dtype)
assert isinstance(dtype, np.dtype)
Expand Down Expand Up @@ -1017,7 +990,6 @@ def shares_memory(left, right) -> bool:
"iat",
"iloc",
"loc",
"makeCategoricalIndex",
"makeCustomDataframe",
"makeCustomIndex",
"makeDataFrame",
Expand Down
3 changes: 2 additions & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@

import pandas as pd
from pandas import (
CategoricalIndex,
DataFrame,
Interval,
IntervalIndex,
Expand Down Expand Up @@ -630,7 +631,7 @@ def _create_mi_with_dt64tz_level():
"bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0),
"complex64": tm.makeNumericIndex(100, dtype="float64").astype("complex64"),
"complex128": tm.makeNumericIndex(100, dtype="float64").astype("complex128"),
"categorical": tm.makeCategoricalIndex(100),
"categorical": CategoricalIndex(list("abcd") * 25),
"interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
"empty": Index([]),
"tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from pandas import (
Categorical,
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
Expand Down Expand Up @@ -398,8 +399,7 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append):
tm.assert_frame_equal(result, expected)

def test_construction_with_categorical_index(self):
ci = tm.makeCategoricalIndex(10)
ci.name = "B"
ci = CategoricalIndex(list("ab") * 5, name="B")

# with Categorical
df = DataFrame(
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/generic/test_to_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ class TestDataFrameToXArray:
def df(self):
return DataFrame(
{
"a": list("abc"),
"b": list(range(1, 4)),
"c": np.arange(3, 6).astype("u1"),
"d": np.arange(4.0, 7.0, dtype="float64"),
"e": [True, False, True],
"f": Categorical(list("abc")),
"g": date_range("20130101", periods=3),
"h": date_range("20130101", periods=3, tz="US/Eastern"),
"a": list("abcd"),
"b": list(range(1, 5)),
"c": np.arange(3, 7).astype("u1"),
"d": np.arange(4.0, 8.0, dtype="float64"),
"e": [True, False, True, False],
"f": Categorical(list("abcd")),
"g": date_range("20130101", periods=4),
"h": date_range("20130101", periods=4, tz="US/Eastern"),
}
)

Expand All @@ -37,11 +37,11 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):

from xarray import Dataset

df.index = index[:3]
df.index = index[:4]
df.index.name = "foo"
df.columns.name = "bar"
result = df.to_xarray()
assert result.dims["foo"] == 3
assert result.dims["foo"] == 4
assert len(result.coords) == 1
assert len(result.data_vars) == 8
tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
Expand Down Expand Up @@ -69,10 +69,10 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
from xarray import Dataset

# MultiIndex
df.index = MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
result = df.to_xarray()
assert result.dims["one"] == 1
assert result.dims["two"] == 3
assert result.dims["two"] == 4
assert len(result.coords) == 2
assert len(result.data_vars) == 8
tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_ensure_copied_data(self):
#
# Must be tested separately from other indexes because
# self.values is not an ndarray.
index = tm.makeCategoricalIndex(10)
index = CategoricalIndex(list("ab") * 5)

result = CategoricalIndex(index.values, copy=True)
tm.assert_index_equal(index, result)
Expand All @@ -261,7 +261,7 @@ def test_ensure_copied_data(self):
class TestCategoricalIndex2:
def test_view_i8(self):
# GH#25464
ci = tm.makeCategoricalIndex(100)
ci = CategoricalIndex(list("ab") * 50)
msg = "When changing to a larger dtype, its size must be a divisor"
with pytest.raises(ValueError, match=msg):
ci.view("i8")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def test_tab_completion_with_categorical(self):
@pytest.mark.parametrize(
"index",
[
Index(list("ab") * 5, dtype="category"),
Index([str(i) for i in range(10)]),
tm.makeCategoricalIndex(10),
Index(["foo", "bar", "baz"] * 2),
tm.makeDateIndex(10),
tm.makePeriodIndex(10),
Expand Down