Skip to content

COMPAT: avoid invalid buffer comparisions #18252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 13, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions pandas/_libs/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ cdef class Validator:
self.dtype = dtype
self.skipna = skipna

cdef bint validate(self, object[:] values) except -1:
cdef bint validate(self, ndarray values) except -1:
if not self.n:
return False

Expand All @@ -629,7 +629,7 @@ cdef class Validator:

@cython.wraparound(False)
@cython.boundscheck(False)
cdef bint _validate(self, object[:] values) except -1:
cdef bint _validate(self, ndarray values) except -1:
cdef:
Py_ssize_t i
Py_ssize_t n = self.n
Expand All @@ -642,7 +642,7 @@ cdef class Validator:

@cython.wraparound(False)
@cython.boundscheck(False)
cdef bint _validate_skipna(self, object[:] values) except -1:
cdef bint _validate_skipna(self, ndarray values) except -1:
cdef:
Py_ssize_t i
Py_ssize_t n = self.n
Expand Down Expand Up @@ -852,7 +852,7 @@ cdef class DatetimeValidator(TemporalValidator):
return is_null_datetime64(value)


cpdef bint is_datetime_array(ndarray[object] values):
cpdef bint is_datetime_array(ndarray values):
cdef:
DatetimeValidator validator = DatetimeValidator(
len(values),
Expand All @@ -876,7 +876,7 @@ cpdef bint is_datetime64_array(ndarray values):
return validator.validate(values)


cpdef bint is_datetime_with_singletz_array(ndarray[object] values):
cpdef bint is_datetime_with_singletz_array(ndarray values):
"""
Check values have the same tzinfo attribute.
Doesn't check values are datetime-like types.
Expand Down Expand Up @@ -959,7 +959,7 @@ cdef class DateValidator(Validator):
return is_date(value)


cpdef bint is_date_array(ndarray[object] values, bint skipna=False):
cpdef bint is_date_array(ndarray values, bint skipna=False):
cdef DateValidator validator = DateValidator(len(values), skipna=skipna)
return validator.validate(values)

Expand All @@ -970,7 +970,7 @@ cdef class TimeValidator(Validator):
return is_time(value)


cpdef bint is_time_array(ndarray[object] values, bint skipna=False):
cpdef bint is_time_array(ndarray values, bint skipna=False):
cdef TimeValidator validator = TimeValidator(len(values), skipna=skipna)
return validator.validate(values)

Expand All @@ -984,7 +984,7 @@ cdef class PeriodValidator(TemporalValidator):
return is_null_period(value)


cpdef bint is_period_array(ndarray[object] values):
cpdef bint is_period_array(ndarray values):
cdef PeriodValidator validator = PeriodValidator(len(values), skipna=True)
return validator.validate(values)

Expand All @@ -995,7 +995,7 @@ cdef class IntervalValidator(Validator):
return is_interval(value)


cpdef bint is_interval_array(ndarray[object] values):
cpdef bint is_interval_array(ndarray values):
cdef:
IntervalValidator validator = IntervalValidator(
len(values),
Expand Down
142 changes: 100 additions & 42 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pandas.util import testing as tm


@pytest.fixture(params=[True, False], ids=lambda val: str(val))
@pytest.fixture(params=[True, False], ids=str)
def coerce(request):
return request.param

Expand All @@ -60,16 +60,20 @@ def __getitem__(self):
assert (not is_seq(A()))


def test_is_list_like():
passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
Series([]), Series(['a']).str)
fails = (1, '2', object(), str)
@pytest.mark.parametrize(
"ll",
[
[], [1], (1, ), (1, 2), {'a': 1},
set([1, 'a']), Series([1]),
Series([]), Series(['a']).str])
def test_is_list_like_passes(ll):
assert inference.is_list_like(ll)

for p in passes:
assert inference.is_list_like(p)

for f in fails:
assert not inference.is_list_like(f)
@pytest.mark.parametrize(
"ll", [1, '2', object(), str])
def test_is_list_like_fails(ll):
assert not inference.is_list_like(ll)


@pytest.mark.parametrize('inner', [
Expand All @@ -93,15 +97,16 @@ def test_is_nested_list_like_fails(obj):
assert not inference.is_nested_list_like(obj)


def test_is_dict_like():
passes = [{}, {'A': 1}, Series([1])]
fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])]
@pytest.mark.parametrize(
"ll", [{}, {'A': 1}, Series([1])])
def test_is_dict_like_passes(ll):
assert inference.is_dict_like(ll)

for p in passes:
assert inference.is_dict_like(p)

for f in fails:
assert not inference.is_dict_like(f)
@pytest.mark.parametrize(
"ll", ['1', 1, [1, 2], (1, 2), range(2), Index([1])])
def test_is_dict_like_fails(ll):
assert not inference.is_dict_like(ll)


def test_is_file_like():
Expand Down Expand Up @@ -148,15 +153,16 @@ class MockFile(object):
assert not is_file(mock.Mock())


def test_is_named_tuple():
passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), )
fails = ((1, 2, 3), 'a', Series({'pi': 3.14}))
@pytest.mark.parametrize(
"ll", [collections.namedtuple('Test', list('abc'))(1, 2, 3)])
def test_is_names_tuple_passes(ll):
assert inference.is_named_tuple(ll)

for p in passes:
assert inference.is_named_tuple(p)

for f in fails:
assert not inference.is_named_tuple(f)
@pytest.mark.parametrize(
"ll", [(1, 2, 3), 'a', Series({'pi': 3.14})])
def test_is_names_tuple_fails(ll):
assert not inference.is_named_tuple(ll)


def test_is_hashable():
Expand Down Expand Up @@ -208,27 +214,32 @@ class OldStyleClass():
hash(c) # this will not raise


def test_is_re():
passes = re.compile('ad'),
fails = 'x', 2, 3, object()
@pytest.mark.parametrize(
"ll", [re.compile('ad')])
def test_is_re_passes(ll):
assert inference.is_re(ll)

for p in passes:
assert inference.is_re(p)

for f in fails:
assert not inference.is_re(f)
@pytest.mark.parametrize(
"ll", ['x', 2, 3, object()])
def test_is_re_fails(ll):
assert not inference.is_re(ll)


def test_is_recompilable():
passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'),
re.compile(r''))
fails = 1, [], object()
@pytest.mark.parametrize(
"ll", [r'a', u('x'),
r'asdf',
re.compile('adsf'),
u(r'\u2233\s*'),
re.compile(r'')])
def test_is_recompilable_passes(ll):
assert inference.is_re_compilable(ll)

for p in passes:
assert inference.is_re_compilable(p)

for f in fails:
assert not inference.is_re_compilable(f)
@pytest.mark.parametrize(
"ll", [1, [], object()])
def test_is_recompilable_fails(ll):
assert not inference.is_re_compilable(ll)


class TestInference(object):
Expand Down Expand Up @@ -300,15 +311,14 @@ def test_maybe_convert_numeric_infinities(self):
np.array(['foo_' + infinity], dtype=object),
na_values, maybe_int)

def test_maybe_convert_numeric_post_floatify_nan(self):
def test_maybe_convert_numeric_post_floatify_nan(self, coerce):
# see gh-13314
data = np.array(['1.200', '-999.000', '4.500'], dtype=object)
expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
nan_values = set([-999, -999.0])

for coerce_type in (True, False):
out = lib.maybe_convert_numeric(data, nan_values, coerce_type)
tm.assert_numpy_array_equal(out, expected)
out = lib.maybe_convert_numeric(data, nan_values, coerce)
tm.assert_numpy_array_equal(out, expected)

def test_convert_infs(self):
arr = np.array(['inf', 'inf', 'inf'], dtype='O')
Expand Down Expand Up @@ -739,6 +749,36 @@ def test_is_datetimelike_array_all_nan_nat_like(self):
assert not lib.is_timedelta64_array(arr)
assert not lib.is_timedelta_or_timedelta64_array(arr)

assert lib.is_datetime_with_singletz_array(
np.array([pd.Timestamp('20130101', tz='US/Eastern'),
pd.Timestamp('20130102', tz='US/Eastern')],
dtype=object))
assert not lib.is_datetime_with_singletz_array(
np.array([pd.Timestamp('20130101', tz='US/Eastern'),
pd.Timestamp('20130102', tz='CET')],
dtype=object))

@pytest.mark.parametrize(
"func",
[
'is_datetime_array',
'is_datetime64_array',
'is_bool_array',
'is_timedelta_array',
'is_timedelta64_array',
'is_timedelta_or_timedelta64_array',
'is_date_array',
'is_time_array',
'is_interval_array',
'is_period_array'])
def test_other_dtypes_for_array(self, func):
func = getattr(lib, func)
arr = np.array(['foo', 'bar'])
assert not func(arr)

arr = np.array([1, 2])
assert not func(arr)

def test_date(self):

dates = [date(2012, 1, day) for day in range(1, 20)]
Expand All @@ -752,6 +792,24 @@ def test_date(self):
result = lib.infer_dtype(dates, skipna=True)
assert result == 'date'

def test_is_numeric_array(self):

assert lib.is_float_array(np.array([1, 2.0]))
assert lib.is_float_array(np.array([1, 2.0, np.nan]))
assert not lib.is_float_array(np.array([1, 2]))

assert lib.is_integer_array(np.array([1, 2]))
assert not lib.is_integer_array(np.array([1, 2.0]))

def test_is_string_array(self):

assert lib.is_string_array(np.array(['foo', 'bar']))
assert not lib.is_string_array(
np.array(['foo', 'bar', np.nan], dtype=object), skipna=False)
assert lib.is_string_array(
np.array(['foo', 'bar', np.nan], dtype=object), skipna=True)
assert not lib.is_string_array(np.array([1, 2]))

def test_to_object_array_tuples(self):
r = (5, 6)
values = [r]
Expand Down