Skip to content

Commit a38ecd5

Browse files
authored
TST/CLN: Remove makeStringIndex (#56155)
* TST/CLN: Remove makeStringIndex * Fix failures * Fix one more test * Remove makeBoolIndex too * Remove name * Adjust test * Fix benchmarks * Fix another benchmark
1 parent 27ec887 commit a38ecd5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+253
-274
lines changed

asv_bench/benchmarks/algorithms.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
import pandas as pd
66

7-
from .pandas_vb_common import tm
8-
97
for imp in ["pandas.util", "pandas.tools.hashing"]:
108
try:
119
hashing = import_module(imp)
@@ -47,9 +45,12 @@ def setup(self, unique, sort, dtype):
4745
elif dtype == "datetime64[ns, tz]":
4846
data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
4947
elif dtype == "object_str":
50-
data = tm.makeStringIndex(N)
48+
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
5149
elif dtype == "string[pyarrow]":
52-
data = pd.array(tm.makeStringIndex(N), dtype="string[pyarrow]")
50+
data = pd.array(
51+
pd.Index([f"i-{i}" for i in range(N)], dtype=object),
52+
dtype="string[pyarrow]",
53+
)
5354
else:
5455
raise NotImplementedError
5556

@@ -88,7 +89,7 @@ def setup(self, unique, keep, dtype):
8889
elif dtype == "float64":
8990
data = pd.Index(np.random.randn(N), dtype="float64")
9091
elif dtype == "string":
91-
data = tm.makeStringIndex(N)
92+
data = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
9293
elif dtype == "datetime64[ns]":
9394
data = pd.date_range("2011-01-01", freq="h", periods=N)
9495
elif dtype == "datetime64[ns, tz]":
@@ -136,7 +137,9 @@ def setup_cache(self):
136137
df = pd.DataFrame(
137138
{
138139
"strings": pd.Series(
139-
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
140+
pd.Index([f"i-{i}" for i in range(10000)], dtype=object).take(
141+
np.random.randint(0, 10000, size=N)
142+
)
140143
),
141144
"floats": np.random.randn(N),
142145
"ints": np.arange(N),

asv_bench/benchmarks/algos/isin.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
date_range,
99
)
1010

11-
from ..pandas_vb_common import tm
12-
1311

1412
class IsIn:
1513
params = [
@@ -60,7 +58,9 @@ def setup(self, dtype):
6058

6159
elif dtype in ["str", "string[python]", "string[pyarrow]"]:
6260
try:
63-
self.series = Series(tm.makeStringIndex(N), dtype=dtype)
61+
self.series = Series(
62+
Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype
63+
)
6464
except ImportError:
6565
raise NotImplementedError
6666
self.values = list(self.series[:2])

asv_bench/benchmarks/ctors.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
date_range,
1010
)
1111

12-
from .pandas_vb_common import tm
13-
1412

1513
def no_change(arr):
1614
return arr
@@ -115,7 +113,7 @@ def time_dtindex_from_index_with_series(self):
115113
class MultiIndexConstructor:
116114
def setup(self):
117115
N = 10**4
118-
self.iterables = [tm.makeStringIndex(N), range(20)]
116+
self.iterables = [Index([f"i-{i}" for i in range(N)], dtype=object), range(20)]
119117

120118
def time_multiindex_from_iterables(self):
121119
MultiIndex.from_product(self.iterables)

asv_bench/benchmarks/dtypes.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import numpy as np
44

55
import pandas as pd
6-
from pandas import DataFrame
6+
from pandas import (
7+
DataFrame,
8+
Index,
9+
)
710
import pandas._testing as tm
811
from pandas.api.types import (
912
is_extension_array_dtype,
@@ -73,8 +76,8 @@ class SelectDtypes:
7376

7477
def setup(self, dtype):
7578
N, K = 5000, 50
76-
self.index = tm.makeStringIndex(N)
77-
self.columns = tm.makeStringIndex(K)
79+
self.index = Index([f"i-{i}" for i in range(N)], dtype=object)
80+
self.columns = Index([f"i-{i}" for i in range(K)], dtype=object)
7881

7982
def create_df(data):
8083
return DataFrame(data, index=self.index, columns=self.columns)

asv_bench/benchmarks/frame_ctor.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
date_range,
1313
)
1414

15-
from .pandas_vb_common import tm
16-
1715
try:
1816
from pandas.tseries.offsets import (
1917
Hour,
@@ -30,8 +28,8 @@
3028
class FromDicts:
3129
def setup(self):
3230
N, K = 5000, 50
33-
self.index = tm.makeStringIndex(N)
34-
self.columns = tm.makeStringIndex(K)
31+
self.index = pd.Index([f"i-{i}" for i in range(N)], dtype=object)
32+
self.columns = pd.Index([f"i-{i}" for i in range(K)], dtype=object)
3533
frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns)
3634
self.data = frame.to_dict()
3735
self.dict_list = frame.to_dict(orient="records")

asv_bench/benchmarks/frame_methods.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas import (
77
DataFrame,
8+
Index,
89
MultiIndex,
910
NaT,
1011
Series,
@@ -14,8 +15,6 @@
1415
timedelta_range,
1516
)
1617

17-
from .pandas_vb_common import tm
18-
1918

2019
class AsType:
2120
params = [
@@ -703,8 +702,12 @@ def setup(self, monotonic):
703702
K = 10
704703
df = DataFrame(
705704
{
706-
"key1": tm.makeStringIndex(N).values.repeat(K),
707-
"key2": tm.makeStringIndex(N).values.repeat(K),
705+
"key1": Index([f"i-{i}" for i in range(N)], dtype=object).values.repeat(
706+
K
707+
),
708+
"key2": Index([f"i-{i}" for i in range(N)], dtype=object).values.repeat(
709+
K
710+
),
708711
"value": np.random.randn(N * K),
709712
}
710713
)

asv_bench/benchmarks/gil.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@
55

66
from pandas import (
77
DataFrame,
8+
Index,
89
Series,
910
date_range,
1011
factorize,
1112
read_csv,
1213
)
1314
from pandas.core.algorithms import take_nd
1415

15-
from .pandas_vb_common import tm
16-
1716
try:
1817
from pandas import (
1918
rolling_kurt,
@@ -34,7 +33,6 @@
3433
except ImportError:
3534
from pandas import algos
3635

37-
3836
from .pandas_vb_common import BaseIO # isort:skip
3937

4038

@@ -305,7 +303,7 @@ class ParallelFactorize:
305303
param_names = ["threads"]
306304

307305
def setup(self, threads):
308-
strings = tm.makeStringIndex(100000)
306+
strings = Index([f"i-{i}" for i in range(100000)], dtype=object)
309307

310308
@test_parallel(num_threads=threads)
311309
def parallel():

asv_bench/benchmarks/groupby.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
to_timedelta,
1818
)
1919

20-
from .pandas_vb_common import tm
21-
2220
method_blocklist = {
2321
"object": {
2422
"diff",
@@ -167,10 +165,14 @@ def setup_cache(self):
167165
"int64_small": Series(np.random.randint(0, 100, size=size)),
168166
"int64_large": Series(np.random.randint(0, 10000, size=size)),
169167
"object_small": Series(
170-
tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size))
168+
Index([f"i-{i}" for i in range(100)], dtype=object).take(
169+
np.random.randint(0, 100, size=size)
170+
)
171171
),
172172
"object_large": Series(
173-
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size))
173+
Index([f"i-{i}" for i in range(10000)], dtype=object).take(
174+
np.random.randint(0, 10000, size=size)
175+
)
174176
),
175177
}
176178
return data
@@ -912,7 +914,7 @@ def setup(self):
912914
n1 = 400
913915
n2 = 250
914916
index = MultiIndex(
915-
levels=[np.arange(n1), tm.makeStringIndex(n2)],
917+
levels=[np.arange(n1), Index([f"i-{i}" for i in range(n2)], dtype=object)],
916918
codes=[np.repeat(range(n1), n2).tolist(), list(range(n2)) * n1],
917919
names=["lev1", "lev2"],
918920
)

asv_bench/benchmarks/index_object.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
date_range,
1313
)
1414

15-
from .pandas_vb_common import tm
16-
1715

1816
class SetOperations:
1917
params = (
@@ -30,7 +28,7 @@ def setup(self, index_structure, dtype, method):
3028
date_str_left = Index(dates_left.strftime(fmt))
3129
int_left = Index(np.arange(N))
3230
ea_int_left = Index(np.arange(N), dtype="Int64")
33-
str_left = tm.makeStringIndex(N)
31+
str_left = Index([f"i-{i}" for i in range(N)], dtype=object)
3432

3533
data = {
3634
"datetime": dates_left,
@@ -155,7 +153,12 @@ class Indexing:
155153

156154
def setup(self, dtype):
157155
N = 10**6
158-
self.idx = getattr(tm, f"make{dtype}Index")(N)
156+
if dtype == "String":
157+
self.idx = Index([f"i-{i}" for i in range(N)], dtype=object)
158+
elif dtype == "Float":
159+
self.idx = Index(np.arange(N), dtype=np.float64)
160+
elif dtype == "Int":
161+
self.idx = Index(np.arange(N), dtype=np.int64)
159162
self.array_mask = (np.arange(N) % 3) == 0
160163
self.series_mask = Series(self.array_mask)
161164
self.sorted = self.idx.sort_values()

asv_bench/benchmarks/indexing.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
period_range,
2323
)
2424

25-
from .pandas_vb_common import tm
26-
2725

2826
class NumericSeriesIndexing:
2927
params = [
@@ -124,7 +122,7 @@ class NonNumericSeriesIndexing:
124122
def setup(self, index, index_structure):
125123
N = 10**6
126124
if index == "string":
127-
index = tm.makeStringIndex(N)
125+
index = Index([f"i-{i}" for i in range(N)], dtype=object)
128126
elif index == "datetime":
129127
index = date_range("1900", periods=N, freq="s")
130128
elif index == "period":
@@ -156,8 +154,8 @@ def time_getitem_list_like(self, index, index_structure):
156154

157155
class DataFrameStringIndexing:
158156
def setup(self):
159-
index = tm.makeStringIndex(1000)
160-
columns = tm.makeStringIndex(30)
157+
index = Index([f"i-{i}" for i in range(1000)], dtype=object)
158+
columns = Index([f"i-{i}" for i in range(30)], dtype=object)
161159
with warnings.catch_warnings(record=True):
162160
self.df = DataFrame(np.random.randn(1000, 30), index=index, columns=columns)
163161
self.idx_scalar = index[100]

asv_bench/benchmarks/inference.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import numpy as np
1010

1111
from pandas import (
12+
Index,
1213
NaT,
1314
Series,
1415
date_range,
@@ -17,10 +18,7 @@
1718
to_timedelta,
1819
)
1920

20-
from .pandas_vb_common import (
21-
lib,
22-
tm,
23-
)
21+
from .pandas_vb_common import lib
2422

2523

2624
class ToNumeric:
@@ -31,7 +29,7 @@ def setup(self, errors):
3129
N = 10000
3230
self.float = Series(np.random.randn(N))
3331
self.numstr = self.float.astype("str")
34-
self.str = Series(tm.makeStringIndex(N))
32+
self.str = Series(Index([f"i-{i}" for i in range(N)], dtype=object))
3533

3634
def time_from_float(self, errors):
3735
to_numeric(self.float, errors=errors)

asv_bench/benchmarks/io/csv.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,15 @@
1010
from pandas import (
1111
Categorical,
1212
DataFrame,
13+
Index,
1314
concat,
1415
date_range,
1516
period_range,
1617
read_csv,
1718
to_datetime,
1819
)
1920

20-
from ..pandas_vb_common import (
21-
BaseIO,
22-
tm,
23-
)
21+
from ..pandas_vb_common import BaseIO
2422

2523

2624
class ToCSV(BaseIO):
@@ -288,7 +286,7 @@ class ReadCSVSkipRows(BaseIO):
288286

289287
def setup(self, skiprows, engine):
290288
N = 20000
291-
index = tm.makeStringIndex(N)
289+
index = Index([f"i-{i}" for i in range(N)], dtype=object)
292290
df = DataFrame(
293291
{
294292
"float1": np.random.randn(N),

asv_bench/benchmarks/io/excel.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,11 @@
1212
from pandas import (
1313
DataFrame,
1414
ExcelWriter,
15+
Index,
1516
date_range,
1617
read_excel,
1718
)
1819

19-
from ..pandas_vb_common import tm
20-
2120

2221
def _generate_dataframe():
2322
N = 2000
@@ -27,7 +26,7 @@ def _generate_dataframe():
2726
columns=[f"float{i}" for i in range(C)],
2827
index=date_range("20000101", periods=N, freq="h"),
2928
)
30-
df["object"] = tm.makeStringIndex(N)
29+
df["object"] = Index([f"i-{i}" for i in range(N)], dtype=object)
3130
return df
3231

3332

0 commit comments

Comments
 (0)