Skip to content

Commit 6b4b956

Browse files
h-vetinarijreback
authored andcommitted
TST: add method/dtype coverage to str-accessor; precursor to #23167 (#23582)
1 parent ca70fe6 commit 6b4b956

File tree

3 files changed

+235
-7
lines changed

3 files changed

+235
-7
lines changed

pandas/conftest.py

+81-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from datetime import date, time, timedelta
2+
from decimal import Decimal
13
import importlib
24
import os
35

@@ -8,7 +10,7 @@
810
import pytest
911
from pytz import FixedOffset, utc
1012

11-
from pandas.compat import PY3
13+
from pandas.compat import PY3, u
1214
import pandas.util._test_decorators as td
1315

1416
import pandas as pd
@@ -514,6 +516,84 @@ def any_numpy_dtype(request):
514516
return request.param
515517

516518

519+
# categoricals are handled separately
520+
_any_skipna_inferred_dtype = [
521+
('string', ['a', np.nan, 'c']),
522+
('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
523+
('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
524+
('empty', [np.nan, np.nan, np.nan]),
525+
('empty', []),
526+
('mixed-integer', ['a', np.nan, 2]),
527+
('mixed', ['a', np.nan, 2.0]),
528+
('floating', [1.0, np.nan, 2.0]),
529+
('integer', [1, np.nan, 2]),
530+
('mixed-integer-float', [1, np.nan, 2.0]),
531+
('decimal', [Decimal(1), np.nan, Decimal(2)]),
532+
('boolean', [True, np.nan, False]),
533+
('datetime64', [np.datetime64('2013-01-01'), np.nan,
534+
np.datetime64('2018-01-01')]),
535+
('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]),
536+
('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
537+
# The following two dtypes are commented out due to GH 23554
538+
# ('complex', [1 + 1j, np.nan, 2 + 2j]),
539+
# ('timedelta64', [np.timedelta64(1, 'D'),
540+
# np.nan, np.timedelta64(2, 'D')]),
541+
('timedelta', [timedelta(1), np.nan, timedelta(2)]),
542+
('time', [time(1), np.nan, time(2)]),
543+
('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]),
544+
('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])]
545+
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id
546+
547+
548+
@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
549+
def any_skipna_inferred_dtype(request):
550+
"""
551+
Fixture for all inferred dtypes from _libs.lib.infer_dtype
552+
553+
The covered (inferred) types are:
554+
* 'string'
555+
* 'unicode' (if PY2)
556+
* 'empty'
557+
* 'bytes' (if PY3)
558+
* 'mixed'
559+
* 'mixed-integer'
560+
* 'mixed-integer-float'
561+
* 'floating'
562+
* 'integer'
563+
* 'decimal'
564+
* 'boolean'
565+
* 'datetime64'
566+
* 'datetime'
567+
* 'date'
568+
* 'timedelta'
569+
* 'time'
570+
* 'period'
571+
* 'interval'
572+
573+
Returns
574+
-------
575+
inferred_dtype : str
576+
The string for the inferred dtype from _libs.lib.infer_dtype
577+
values : np.ndarray
578+
An array of object dtype that will be inferred to have
579+
`inferred_dtype`
580+
581+
Examples
582+
--------
583+
>>> import pandas._libs.lib as lib
584+
>>>
585+
>>> def test_something(any_skipna_inferred_dtype):
586+
... inferred_dtype, values = any_skipna_inferred_dtype
587+
... # will pass
588+
... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
589+
"""
590+
inferred_dtype, values = request.param
591+
values = np.array(values, dtype=object) # object dtype to avoid casting
592+
593+
# correctness of inference tested in tests/dtypes/test_inference.py
594+
return inferred_dtype, values
595+
596+
517597
@pytest.fixture
518598
def mock():
519599
"""

pandas/tests/dtypes/test_inference.py

+7
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,13 @@ class TestTypeInference(object):
496496
class Dummy():
497497
pass
498498

499+
def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype):
500+
# see pandas/conftest.py
501+
inferred_dtype, values = any_skipna_inferred_dtype
502+
503+
# make sure the inferred dtype of the fixture is as requested
504+
assert inferred_dtype == lib.infer_dtype(values, skipna=True)
505+
499506
def test_length_zero(self):
500507
result = lib.infer_dtype(np.array([], dtype='i4'))
501508
assert result == 'integer'

pandas/tests/test_strings.py

+147-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy as np
1010
from numpy.random import randint
1111

12-
from pandas.compat import range, u
12+
from pandas.compat import range, u, PY3
1313
import pandas.compat as compat
1414
from pandas import Index, Series, DataFrame, isna, MultiIndex, notna, concat
1515

@@ -118,6 +118,55 @@ def any_string_method(request):
118118
return request.param
119119

120120

121+
# subset of the full set from pandas/conftest.py
122+
_any_allowed_skipna_inferred_dtype = [
123+
('string', ['a', np.nan, 'c']),
124+
('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]),
125+
('bytes' if PY3 else 'string', [b'a', np.nan, b'c']),
126+
('empty', [np.nan, np.nan, np.nan]),
127+
('empty', []),
128+
('mixed-integer', ['a', np.nan, 2])
129+
]
130+
ids, _ = zip(*_any_allowed_skipna_inferred_dtype) # use inferred type as id
131+
132+
133+
@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids)
134+
def any_allowed_skipna_inferred_dtype(request):
135+
"""
136+
Fixture for all (inferred) dtypes allowed in StringMethods.__init__
137+
138+
The covered (inferred) types are:
139+
* 'string'
140+
* 'unicode' (if PY2)
141+
* 'empty'
142+
* 'bytes' (if PY3)
143+
* 'mixed'
144+
* 'mixed-integer'
145+
146+
Returns
147+
-------
148+
inferred_dtype : str
149+
The string for the inferred dtype from _libs.lib.infer_dtype
150+
values : np.ndarray
151+
An array of object dtype that will be inferred to have
152+
`inferred_dtype`
153+
154+
Examples
155+
--------
156+
>>> import pandas._libs.lib as lib
157+
>>>
158+
>>> def test_something(any_allowed_skipna_inferred_dtype):
159+
... inferred_dtype, values = any_skipna_inferred_dtype
160+
... # will pass
161+
... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
162+
"""
163+
inferred_dtype, values = request.param
164+
values = np.array(values, dtype=object) # object dtype to avoid casting
165+
166+
# correctness of inference tested in tests/dtypes/test_inference.py
167+
return inferred_dtype, values
168+
169+
121170
class TestStringMethods(object):
122171

123172
def test_api(self):
@@ -126,11 +175,103 @@ def test_api(self):
126175
assert Series.str is strings.StringMethods
127176
assert isinstance(Series(['']).str, strings.StringMethods)
128177

129-
# GH 9184
130-
invalid = Series([1])
131-
with pytest.raises(AttributeError, match="only use .str accessor"):
132-
invalid.str
133-
assert not hasattr(invalid, 'str')
178+
@pytest.mark.parametrize('dtype', [object, 'category'])
179+
@pytest.mark.parametrize('box', [Series, Index])
180+
def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
181+
# one instance of parametrized fixture
182+
inferred_dtype, values = any_skipna_inferred_dtype
183+
184+
t = box(values, dtype=dtype) # explicit dtype to avoid casting
185+
186+
# TODO: get rid of these xfails
187+
if dtype == 'category' and inferred_dtype in ['period', 'interval']:
188+
pytest.xfail(reason='Conversion to numpy array fails because '
189+
'the ._values-attribute is not a numpy array for '
190+
'PeriodArray/IntervalArray; see GH 23553')
191+
if box == Index and inferred_dtype in ['empty', 'bytes']:
192+
pytest.xfail(reason='Raising too restrictively; '
193+
'solved by GH 23167')
194+
if (box == Index and dtype == object
195+
and inferred_dtype in ['boolean', 'date', 'time']):
196+
pytest.xfail(reason='Inferring incorrectly because of NaNs; '
197+
'solved by GH 23167')
198+
if (box == Series
199+
and (dtype == object and inferred_dtype not in [
200+
'string', 'unicode', 'empty',
201+
'bytes', 'mixed', 'mixed-integer'])
202+
or (dtype == 'category'
203+
and inferred_dtype in ['decimal', 'boolean', 'time'])):
204+
pytest.xfail(reason='Not raising correctly; solved by GH 23167')
205+
206+
types_passing_constructor = ['string', 'unicode', 'empty',
207+
'bytes', 'mixed', 'mixed-integer']
208+
if inferred_dtype in types_passing_constructor:
209+
# GH 6106
210+
assert isinstance(t.str, strings.StringMethods)
211+
else:
212+
# GH 9184, GH 23011, GH 23163
213+
with pytest.raises(AttributeError, match='Can only use .str '
214+
'accessor with string values.*'):
215+
t.str
216+
assert not hasattr(t, 'str')
217+
218+
@pytest.mark.parametrize('dtype', [object, 'category'])
219+
@pytest.mark.parametrize('box', [Series, Index])
220+
def test_api_per_method(self, box, dtype,
221+
any_allowed_skipna_inferred_dtype,
222+
any_string_method):
223+
# this test does not check correctness of the different methods,
224+
# just that the methods work on the specified (inferred) dtypes,
225+
# and raise on all others
226+
227+
# one instance of each parametrized fixture
228+
inferred_dtype, values = any_allowed_skipna_inferred_dtype
229+
method_name, args, kwargs = any_string_method
230+
231+
# TODO: get rid of these xfails
232+
if (method_name not in ['encode', 'decode', 'len']
233+
and inferred_dtype == 'bytes'):
234+
pytest.xfail(reason='Not raising for "bytes", see GH 23011;'
235+
'Also: malformed method names, see GH 23551; '
236+
'solved by GH 23167')
237+
if (method_name == 'cat'
238+
and inferred_dtype in ['mixed', 'mixed-integer']):
239+
pytest.xfail(reason='Bad error message; should raise better; '
240+
'solved by GH 23167')
241+
if box == Index and inferred_dtype in ['empty', 'bytes']:
242+
pytest.xfail(reason='Raising too restrictively; '
243+
'solved by GH 23167')
244+
if (box == Index and dtype == object
245+
and inferred_dtype in ['boolean', 'date', 'time']):
246+
pytest.xfail(reason='Inferring incorrectly because of NaNs; '
247+
'solved by GH 23167')
248+
if box == Index and dtype == 'category':
249+
pytest.xfail(reason='Broken methods on CategoricalIndex; '
250+
'see GH 23556')
251+
252+
t = box(values, dtype=dtype) # explicit dtype to avoid casting
253+
method = getattr(t.str, method_name)
254+
255+
bytes_allowed = method_name in ['encode', 'decode', 'len']
256+
# as of v0.23.4, all methods except 'cat' are very lenient with the
257+
# allowed data types, just returning NaN for entries that error.
258+
# This could be changed with an 'errors'-kwarg to the `str`-accessor,
259+
# see discussion in GH 13877
260+
mixed_allowed = method_name not in ['cat']
261+
262+
allowed_types = (['string', 'unicode', 'empty']
263+
+ ['bytes'] * bytes_allowed
264+
+ ['mixed', 'mixed-integer'] * mixed_allowed)
265+
266+
if inferred_dtype in allowed_types:
267+
method(*args, **kwargs) # works!
268+
else:
269+
# GH 23011, GH 23163
270+
msg = ('Cannot use .str.{name} with values of inferred dtype '
271+
'{inferred_dtype!r}.'.format(name=method_name,
272+
inferred_dtype=inferred_dtype))
273+
with pytest.raises(TypeError, match=msg):
274+
method(*args, **kwargs)
134275

135276
def test_api_for_categorical(self, any_string_method):
136277
# https://github.com/pandas-dev/pandas/issues/10661

0 commit comments

Comments
 (0)