Skip to content

Commit 0b9e784

Browse files
authored
TST/ CLN: Remove makeCustomIndex/DataFrame (#56331)
* Remove makeCustom * Fix mismatches * Remove makeCustomIndex in all * Fix excel
1 parent dc4c474 commit 0b9e784

File tree

19 files changed

+311
-403
lines changed

19 files changed

+311
-403
lines changed

doc/source/user_guide/io.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -1490,9 +1490,9 @@ rows will skip the intervening rows.
14901490

14911491
.. ipython:: python
14921492
1493-
from pandas._testing import makeCustomDataframe as mkdf
1494-
1495-
df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
1493+
mi_idx = pd.MultiIndex.from_arrays([[1, 2, 3, 4], list("abcd")], names=list("ab"))
1494+
mi_col = pd.MultiIndex.from_arrays([[1, 2], list("ab")], names=list("cd"))
1495+
df = pd.DataFrame(np.ones((4, 2)), index=mi_idx, columns=mi_col)
14961496
df.to_csv("mi.csv")
14971497
print(open("mi.csv").read())
14981498
pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1])

doc/source/whatsnew/v0.12.0.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,9 @@ IO enhancements
250250

251251
.. ipython:: python
252252
253-
from pandas._testing import makeCustomDataframe as mkdf
254-
255-
df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
253+
mi_idx = pd.MultiIndex.from_arrays([[1, 2, 3, 4], list("abcd")], names=list("ab"))
254+
mi_col = pd.MultiIndex.from_arrays([[1, 2], list("ab")], names=list("cd"))
255+
df = pd.DataFrame(np.ones((4, 2)), index=mi_idx, columns=mi_col)
256256
df.to_csv("mi.csv")
257257
print(open("mi.csv").read())
258258
pd.read_csv("mi.csv", header=[0, 1, 2, 3], index_col=[0, 1])

pandas/_testing/__init__.py

+1-234
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from __future__ import annotations
22

3-
import collections
4-
from collections import Counter
53
from decimal import Decimal
64
import operator
75
import os
@@ -24,10 +22,7 @@
2422

2523
from pandas.compat import pa_version_under10p1
2624

27-
from pandas.core.dtypes.common import (
28-
is_sequence,
29-
is_string_dtype,
30-
)
25+
from pandas.core.dtypes.common import is_string_dtype
3126

3227
import pandas as pd
3328
from pandas import (
@@ -38,9 +33,6 @@
3833
MultiIndex,
3934
RangeIndex,
4035
Series,
41-
date_range,
42-
period_range,
43-
timedelta_range,
4436
)
4537
from pandas._testing._io import (
4638
round_trip_localpath,
@@ -332,229 +324,6 @@ def to_array(obj):
332324
return extract_array(obj, extract_numpy=True)
333325

334326

335-
# -----------------------------------------------------------------------------
336-
# Others
337-
338-
339-
def makeCustomIndex(
340-
nentries,
341-
nlevels,
342-
prefix: str = "#",
343-
names: bool | str | list[str] | None = False,
344-
ndupe_l=None,
345-
idx_type=None,
346-
) -> Index:
347-
"""
348-
Create an index/multindex with given dimensions, levels, names, etc'
349-
350-
nentries - number of entries in index
351-
nlevels - number of levels (> 1 produces multindex)
352-
prefix - a string prefix for labels
353-
names - (Optional), bool or list of strings. if True will use default
354-
names, if false will use no names, if a list is given, the name of
355-
each level in the index will be taken from the list.
356-
ndupe_l - (Optional), list of ints, the number of rows for which the
357-
label will repeated at the corresponding level, you can specify just
358-
the first few, the rest will use the default ndupe_l of 1.
359-
len(ndupe_l) <= nlevels.
360-
idx_type - "i"/"f"/"s"/"dt"/"p"/"td".
361-
If idx_type is not None, `idx_nlevels` must be 1.
362-
"i"/"f" creates an integer/float index,
363-
"s" creates a string
364-
"dt" create a datetime index.
365-
"td" create a datetime index.
366-
367-
if unspecified, string labels will be generated.
368-
"""
369-
if ndupe_l is None:
370-
ndupe_l = [1] * nlevels
371-
assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels
372-
assert names is None or names is False or names is True or len(names) is nlevels
373-
assert idx_type is None or (
374-
idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1
375-
)
376-
377-
if names is True:
378-
# build default names
379-
names = [prefix + str(i) for i in range(nlevels)]
380-
if names is False:
381-
# pass None to index constructor for no name
382-
names = None
383-
384-
# make singleton case uniform
385-
if isinstance(names, str) and nlevels == 1:
386-
names = [names]
387-
388-
# specific 1D index type requested?
389-
idx_func_dict: dict[str, Callable[..., Index]] = {
390-
"i": lambda n: Index(np.arange(n), dtype=np.int64),
391-
"f": lambda n: Index(np.arange(n), dtype=np.float64),
392-
"s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]),
393-
"dt": lambda n: date_range("2020-01-01", periods=n),
394-
"td": lambda n: timedelta_range("1 day", periods=n),
395-
"p": lambda n: period_range("2020-01-01", periods=n, freq="D"),
396-
}
397-
idx_func = idx_func_dict.get(idx_type)
398-
if idx_func:
399-
idx = idx_func(nentries)
400-
# but we need to fill in the name
401-
if names:
402-
idx.name = names[0]
403-
return idx
404-
elif idx_type is not None:
405-
raise ValueError(
406-
f"{repr(idx_type)} is not a legal value for `idx_type`, "
407-
"use 'i'/'f'/'s'/'dt'/'p'/'td'."
408-
)
409-
410-
if len(ndupe_l) < nlevels:
411-
ndupe_l.extend([1] * (nlevels - len(ndupe_l)))
412-
assert len(ndupe_l) == nlevels
413-
414-
assert all(x > 0 for x in ndupe_l)
415-
416-
list_of_lists = []
417-
for i in range(nlevels):
418-
419-
def keyfunc(x):
420-
numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_")
421-
return [int(num) for num in numeric_tuple]
422-
423-
# build a list of lists to create the index from
424-
div_factor = nentries // ndupe_l[i] + 1
425-
426-
# Deprecated since version 3.9: collections.Counter now supports []. See PEP 585
427-
# and Generic Alias Type.
428-
cnt: Counter[str] = collections.Counter()
429-
for j in range(div_factor):
430-
label = f"{prefix}_l{i}_g{j}"
431-
cnt[label] = ndupe_l[i]
432-
# cute Counter trick
433-
result = sorted(cnt.elements(), key=keyfunc)[:nentries]
434-
list_of_lists.append(result)
435-
436-
tuples = list(zip(*list_of_lists))
437-
438-
# convert tuples to index
439-
if nentries == 1:
440-
# we have a single level of tuples, i.e. a regular Index
441-
name = None if names is None else names[0]
442-
index = Index(tuples[0], name=name)
443-
elif nlevels == 1:
444-
name = None if names is None else names[0]
445-
index = Index((x[0] for x in tuples), name=name)
446-
else:
447-
index = MultiIndex.from_tuples(tuples, names=names)
448-
return index
449-
450-
451-
def makeCustomDataframe(
452-
nrows,
453-
ncols,
454-
c_idx_names: bool | list[str] = True,
455-
r_idx_names: bool | list[str] = True,
456-
c_idx_nlevels: int = 1,
457-
r_idx_nlevels: int = 1,
458-
data_gen_f=None,
459-
c_ndupe_l=None,
460-
r_ndupe_l=None,
461-
dtype=None,
462-
c_idx_type=None,
463-
r_idx_type=None,
464-
) -> DataFrame:
465-
"""
466-
Create a DataFrame using supplied parameters.
467-
468-
Parameters
469-
----------
470-
nrows, ncols - number of data rows/cols
471-
c_idx_names, r_idx_names - False/True/list of strings, yields No names ,
472-
default names or uses the provided names for the levels of the
473-
corresponding index. You can provide a single string when
474-
c_idx_nlevels ==1.
475-
c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex
476-
r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex
477-
data_gen_f - a function f(row,col) which return the data value
478-
at that position, the default generator used yields values of the form
479-
"RxCy" based on position.
480-
c_ndupe_l, r_ndupe_l - list of integers, determines the number
481-
of duplicates for each label at a given level of the corresponding
482-
index. The default `None` value produces a multiplicity of 1 across
483-
all levels, i.e. a unique index. Will accept a partial list of length
484-
N < idx_nlevels, for just the first N levels. If ndupe doesn't divide
485-
nrows/ncol, the last label might have lower multiplicity.
486-
dtype - passed to the DataFrame constructor as is, in case you wish to
487-
have more control in conjunction with a custom `data_gen_f`
488-
r_idx_type, c_idx_type - "i"/"f"/"s"/"dt"/"td".
489-
If idx_type is not None, `idx_nlevels` must be 1.
490-
"i"/"f" creates an integer/float index,
491-
"s" creates a string index
492-
"dt" create a datetime index.
493-
"td" create a timedelta index.
494-
495-
if unspecified, string labels will be generated.
496-
497-
Examples
498-
--------
499-
# 5 row, 3 columns, default names on both, single index on both axis
500-
>> makeCustomDataframe(5,3)
501-
502-
# make the data a random int between 1 and 100
503-
>> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100))
504-
505-
# 2-level multiindex on rows with each label duplicated
506-
# twice on first level, default names on both axis, single
507-
# index on both axis
508-
>> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2])
509-
510-
# DatetimeIndex on row, index with unicode labels on columns
511-
# no names on either axis
512-
>> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False,
513-
r_idx_type="dt",c_idx_type="u")
514-
515-
# 4-level multindex on rows with names provided, 2-level multindex
516-
# on columns with default labels and default names.
517-
>> a=makeCustomDataframe(5,3,r_idx_nlevels=4,
518-
r_idx_names=["FEE","FIH","FOH","FUM"],
519-
c_idx_nlevels=2)
520-
521-
>> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
522-
"""
523-
assert c_idx_nlevels > 0
524-
assert r_idx_nlevels > 0
525-
assert r_idx_type is None or (
526-
r_idx_type in ("i", "f", "s", "dt", "p", "td") and r_idx_nlevels == 1
527-
)
528-
assert c_idx_type is None or (
529-
c_idx_type in ("i", "f", "s", "dt", "p", "td") and c_idx_nlevels == 1
530-
)
531-
532-
columns = makeCustomIndex(
533-
ncols,
534-
nlevels=c_idx_nlevels,
535-
prefix="C",
536-
names=c_idx_names,
537-
ndupe_l=c_ndupe_l,
538-
idx_type=c_idx_type,
539-
)
540-
index = makeCustomIndex(
541-
nrows,
542-
nlevels=r_idx_nlevels,
543-
prefix="R",
544-
names=r_idx_names,
545-
ndupe_l=r_ndupe_l,
546-
idx_type=r_idx_type,
547-
)
548-
549-
# by default, generate data based on location
550-
if data_gen_f is None:
551-
data_gen_f = lambda r, c: f"R{r}C{c}"
552-
553-
data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)]
554-
555-
return DataFrame(data, index, columns, dtype=dtype)
556-
557-
558327
class SubclassedSeries(Series):
559328
_metadata = ["testattr", "name"]
560329

@@ -868,8 +637,6 @@ def shares_memory(left, right) -> bool:
868637
"iat",
869638
"iloc",
870639
"loc",
871-
"makeCustomDataframe",
872-
"makeCustomIndex",
873640
"maybe_produces_warning",
874641
"NARROW_NP_DTYPES",
875642
"NP_NAT_OBJECTS",

0 commit comments

Comments
 (0)