|
| 1 | +import numpy as np |
| 2 | +import pytest |
| 3 | + |
| 4 | +from pandas import Series |
| 5 | +from pandas.core import strings as strings |
| 6 | + |
| 7 | +_any_string_method = [ |
| 8 | + ("cat", (), {"sep": ","}), |
| 9 | + ("cat", (Series(list("zyx")),), {"sep": ",", "join": "left"}), |
| 10 | + ("center", (10,), {}), |
| 11 | + ("contains", ("a",), {}), |
| 12 | + ("count", ("a",), {}), |
| 13 | + ("decode", ("UTF-8",), {}), |
| 14 | + ("encode", ("UTF-8",), {}), |
| 15 | + ("endswith", ("a",), {}), |
| 16 | + ("endswith", ("a",), {"na": True}), |
| 17 | + ("endswith", ("a",), {"na": False}), |
| 18 | + ("extract", ("([a-z]*)",), {"expand": False}), |
| 19 | + ("extract", ("([a-z]*)",), {"expand": True}), |
| 20 | + ("extractall", ("([a-z]*)",), {}), |
| 21 | + ("find", ("a",), {}), |
| 22 | + ("findall", ("a",), {}), |
| 23 | + ("get", (0,), {}), |
| 24 | + # because "index" (and "rindex") fail intentionally |
| 25 | + # if the string is not found, search only for empty string |
| 26 | + ("index", ("",), {}), |
| 27 | + ("join", (",",), {}), |
| 28 | + ("ljust", (10,), {}), |
| 29 | + ("match", ("a",), {}), |
| 30 | + ("fullmatch", ("a",), {}), |
| 31 | + ("normalize", ("NFC",), {}), |
| 32 | + ("pad", (10,), {}), |
| 33 | + ("partition", (" ",), {"expand": False}), |
| 34 | + ("partition", (" ",), {"expand": True}), |
| 35 | + ("repeat", (3,), {}), |
| 36 | + ("replace", ("a", "z"), {}), |
| 37 | + ("rfind", ("a",), {}), |
| 38 | + ("rindex", ("",), {}), |
| 39 | + ("rjust", (10,), {}), |
| 40 | + ("rpartition", (" ",), {"expand": False}), |
| 41 | + ("rpartition", (" ",), {"expand": True}), |
| 42 | + ("slice", (0, 1), {}), |
| 43 | + ("slice_replace", (0, 1, "z"), {}), |
| 44 | + ("split", (" ",), {"expand": False}), |
| 45 | + ("split", (" ",), {"expand": True}), |
| 46 | + ("startswith", ("a",), {}), |
| 47 | + ("startswith", ("a",), {"na": True}), |
| 48 | + ("startswith", ("a",), {"na": False}), |
| 49 | + # translating unicode points of "a" to "d" |
| 50 | + ("translate", ({97: 100},), {}), |
| 51 | + ("wrap", (2,), {}), |
| 52 | + ("zfill", (10,), {}), |
| 53 | +] + list( |
| 54 | + zip( |
| 55 | + [ |
| 56 | + # methods without positional arguments: zip with empty tuple and empty dict |
| 57 | + "capitalize", |
| 58 | + "cat", |
| 59 | + "get_dummies", |
| 60 | + "isalnum", |
| 61 | + "isalpha", |
| 62 | + "isdecimal", |
| 63 | + "isdigit", |
| 64 | + "islower", |
| 65 | + "isnumeric", |
| 66 | + "isspace", |
| 67 | + "istitle", |
| 68 | + "isupper", |
| 69 | + "len", |
| 70 | + "lower", |
| 71 | + "lstrip", |
| 72 | + "partition", |
| 73 | + "rpartition", |
| 74 | + "rsplit", |
| 75 | + "rstrip", |
| 76 | + "slice", |
| 77 | + "slice_replace", |
| 78 | + "split", |
| 79 | + "strip", |
| 80 | + "swapcase", |
| 81 | + "title", |
| 82 | + "upper", |
| 83 | + "casefold", |
| 84 | + ], |
| 85 | + [()] * 100, |
| 86 | + [{}] * 100, |
| 87 | + ) |
| 88 | +) |
| 89 | +ids, _, _ = zip(*_any_string_method) # use method name as fixture-id |
| 90 | +missing_methods = { |
| 91 | + f for f in dir(strings.StringMethods) if not f.startswith("_") |
| 92 | +} - set(ids) |
| 93 | + |
| 94 | +# test that the above list captures all methods of StringMethods |
| 95 | +assert not missing_methods |
| 96 | + |
| 97 | + |
| 98 | +@pytest.fixture(params=_any_string_method, ids=ids) |
| 99 | +def any_string_method(request): |
| 100 | + """ |
| 101 | + Fixture for all public methods of `StringMethods` |
| 102 | +
|
| 103 | + This fixture returns a tuple of the method name and sample arguments |
| 104 | + necessary to call the method. |
| 105 | +
|
| 106 | + Returns |
| 107 | + ------- |
| 108 | + method_name : str |
| 109 | + The name of the method in `StringMethods` |
| 110 | + args : tuple |
| 111 | + Sample values for the positional arguments |
| 112 | + kwargs : dict |
| 113 | + Sample values for the keyword arguments |
| 114 | +
|
| 115 | + Examples |
| 116 | + -------- |
| 117 | + >>> def test_something(any_string_method): |
| 118 | + ... s = Series(['a', 'b', np.nan, 'd']) |
| 119 | + ... |
| 120 | + ... method_name, args, kwargs = any_string_method |
| 121 | + ... method = getattr(s.str, method_name) |
| 122 | + ... # will not raise |
| 123 | + ... method(*args, **kwargs) |
| 124 | + """ |
| 125 | + return request.param |
| 126 | + |
| 127 | + |
| 128 | +# subset of the full set from pandas/conftest.py |
| 129 | +_any_allowed_skipna_inferred_dtype = [ |
| 130 | + ("string", ["a", np.nan, "c"]), |
| 131 | + ("bytes", [b"a", np.nan, b"c"]), |
| 132 | + ("empty", [np.nan, np.nan, np.nan]), |
| 133 | + ("empty", []), |
| 134 | + ("mixed-integer", ["a", np.nan, 2]), |
| 135 | +] |
| 136 | +ids, _ = zip(*_any_allowed_skipna_inferred_dtype) # use inferred type as id |
| 137 | + |
| 138 | + |
| 139 | +@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids) |
| 140 | +def any_allowed_skipna_inferred_dtype(request): |
| 141 | + """ |
| 142 | + Fixture for all (inferred) dtypes allowed in StringMethods.__init__ |
| 143 | +
|
| 144 | + The covered (inferred) types are: |
| 145 | + * 'string' |
| 146 | + * 'empty' |
| 147 | + * 'bytes' |
| 148 | + * 'mixed' |
| 149 | + * 'mixed-integer' |
| 150 | +
|
| 151 | + Returns |
| 152 | + ------- |
| 153 | + inferred_dtype : str |
| 154 | + The string for the inferred dtype from _libs.lib.infer_dtype |
| 155 | + values : np.ndarray |
| 156 | + An array of object dtype that will be inferred to have |
| 157 | + `inferred_dtype` |
| 158 | +
|
| 159 | + Examples |
| 160 | + -------- |
| 161 | + >>> import pandas._libs.lib as lib |
| 162 | + >>> |
| 163 | + >>> def test_something(any_allowed_skipna_inferred_dtype): |
| 164 | + ... inferred_dtype, values = any_allowed_skipna_inferred_dtype |
| 165 | + ... # will pass |
| 166 | + ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype |
| 167 | + ... |
| 168 | + ... # constructor for .str-accessor will also pass |
| 169 | + ... Series(values).str |
| 170 | + """ |
| 171 | + inferred_dtype, values = request.param |
| 172 | + values = np.array(values, dtype=object) # object dtype to avoid casting |
| 173 | + |
| 174 | + # correctness of inference tested in tests/dtypes/test_inference.py |
| 175 | + return inferred_dtype, values |
0 commit comments