diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/arrays/test_integer.py similarity index 70% rename from pandas/tests/extension/integer/test_integer.py rename to pandas/tests/arrays/test_integer.py index 3af127091d2d8..349a6aee5701e 100644 --- a/pandas/tests/extension/integer/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -1,11 +1,10 @@ +# -*- coding: utf-8 -*- import numpy as np import pandas as pd import pandas.util.testing as tm import pytest -from pandas.tests.extension import base -from pandas.api.types import ( - is_integer, is_scalar, is_float, is_float_dtype) +from pandas.api.types import is_integer, is_float, is_float_dtype, is_scalar from pandas.core.dtypes.generic import ABCIndexClass from pandas.core.arrays import ( @@ -14,6 +13,8 @@ Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) +from pandas.tests.extension.base import BaseOpsUtil + def make_data(): return (list(range(8)) + @@ -39,42 +40,13 @@ def data_missing(dtype): return integer_array([np.nan, 1], dtype=dtype) -@pytest.fixture -def data_repeated(data): - def gen(count): - for _ in range(count): - yield data - yield gen - - -@pytest.fixture -def data_for_sorting(dtype): - return integer_array([1, 2, 0], dtype=dtype) - - -@pytest.fixture -def data_missing_for_sorting(dtype): - return integer_array([1, np.nan, 0], dtype=dtype) - - -@pytest.fixture -def na_cmp(): - # we are np.nan - return lambda x, y: np.isnan(x) and np.isnan(y) - - -@pytest.fixture -def na_value(): - return np.nan - - -@pytest.fixture -def data_for_grouping(dtype): - b = 1 - a = 0 - c = 2 - na = np.nan - return integer_array([b, b, na, na, a, a, b, c], dtype=dtype) +@pytest.fixture(params=['data', 'data_missing']) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == 'data': + return data + elif request.param == 'data_missing': + return data_missing def test_dtypes(dtype): @@ -87,61 +59,50 @@ def test_dtypes(dtype): assert dtype.name is not None -class BaseInteger(object): - - def assert_index_equal(self, left, right, *args, **kwargs): - - left_na = left.isna() - right_na = right.isna() +class TestInterface(object): - tm.assert_numpy_array_equal(left_na, right_na) - return tm.assert_index_equal(left[~left_na], - right[~right_na], - *args, **kwargs) - - def assert_series_equal(self, left, right, *args, **kwargs): + def test_repr_array(self, data): + result = repr(data) - left_na = left.isna() - right_na = right.isna() + # not long + assert '...' not in result - tm.assert_series_equal(left_na, right_na) - return tm.assert_series_equal(left[~left_na], - right[~right_na], - *args, **kwargs) + assert 'dtype=' in result + assert 'IntegerArray' in result - def assert_frame_equal(self, left, right, *args, **kwargs): - # TODO(EA): select_dtypes - tm.assert_index_equal( - left.columns, right.columns, - exact=kwargs.get('check_column_type', 'equiv'), - check_names=kwargs.get('check_names', True), - check_exact=kwargs.get('check_exact', False), - check_categorical=kwargs.get('check_categorical', True), - obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame'))) + def test_repr_array_long(self, data): + # some arrays may be able to assert a ... in the repr + with pd.option_context('display.max_seq_items', 1): + result = repr(data) - integers = (left.dtypes == 'integer').index + assert '...' in result + assert 'length' in result - for col in integers: - self.assert_series_equal(left[col], right[col], - *args, **kwargs) - left = left.drop(columns=integers) - right = right.drop(columns=integers) - tm.assert_frame_equal(left, right, *args, **kwargs) +class TestConstructors(object): + def test_from_dtype_from_float(self, data): + # construct from our dtype & string dtype + dtype = data.dtype -class TestDtype(BaseInteger, base.BaseDtypeTests): + # from float + expected = pd.Series(data) + result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) + tm.assert_series_equal(result, expected) - @pytest.mark.skip(reason="using multiple dtypes") - def test_is_dtype_unboxes_dtype(self): - # we have multiple dtypes, so skip - pass + # from int / list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + tm.assert_series_equal(result, expected) - def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type() is IntegerArray + # from int / array + expected = pd.Series(data).dropna().reset_index(drop=True) + dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) + result = pd.Series(dropped, dtype=str(dtype)) + tm.assert_series_equal(result, expected) -class TestArithmeticOps(BaseInteger, base.BaseArithmeticOpsTests): +class TestArithmeticOps(BaseOpsUtil): def _check_divmod_op(self, s, op, other, exc=None): super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) @@ -178,7 +139,7 @@ def _check_op_float(self, result, expected, mask, s, op_name, other): # check comparisions that are resulting in float dtypes expected[mask] = np.nan - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def _check_op_integer(self, result, expected, mask, s, op_name, other): # check comparisions that are resulting in integer dtypes @@ -231,10 +192,10 @@ def _check_op_integer(self, result, expected, mask, s, op_name, other): original = original.astype('float') original[mask] = np.nan - self.assert_series_equal(original, expected.astype('float')) + tm.assert_series_equal(original, expected.astype('float')) # assert our expected result - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_arith_integer_array(self, data, all_arithmetic_operators): # we operate with a rhs of an integer array @@ -319,7 +280,7 @@ def test_error(self, data, all_arithmetic_operators): opa(np.arange(len(s)).reshape(-1, len(s))) -class TestComparisonOps(BaseInteger, base.BaseComparisonOpsTests): +class TestComparisonOps(BaseOpsUtil): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) @@ -345,144 +306,21 @@ def _compare_other(self, s, data, op_name, other): tm.assert_series_equal(result, expected) + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + self._compare_other(s, data, op_name, 0) -class TestInterface(BaseInteger, base.BaseInterfaceTests): - - def test_repr_array(self, data): - result = repr(data) - - # not long - assert '...' not in result - - assert 'dtype=' in result - assert 'IntegerArray' in result - - def test_repr_array_long(self, data): - # some arrays may be able to assert a ... in the repr - with pd.option_context('display.max_seq_items', 1): - result = repr(data) - - assert '...' in result - assert 'length' in result - - -class TestConstructors(BaseInteger, base.BaseConstructorsTests): - - def test_from_dtype_from_float(self, data): - # construct from our dtype & string dtype - dtype = data.dtype - - # from float - expected = pd.Series(data) - result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) - self.assert_series_equal(result, expected) - - # from int / list - expected = pd.Series(data) - result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) - self.assert_series_equal(result, expected) - - # from int / array - expected = pd.Series(data).dropna().reset_index(drop=True) - dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) - result = pd.Series(dropped, dtype=str(dtype)) - self.assert_series_equal(result, expected) - - -class TestReshaping(BaseInteger, base.BaseReshapingTests): - - def test_concat_mixed_dtypes(self, data): - # https://github.com/pandas-dev/pandas/issues/20762 - df1 = pd.DataFrame({'A': data[:3]}) - df2 = pd.DataFrame({"A": [1, 2, 3]}) - df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category') - df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])}) - dfs = [df1, df2, df3, df4] - - # dataframes - result = pd.concat(dfs) - expected = pd.concat([x.astype(object) for x in dfs]) - self.assert_frame_equal(result, expected) - - # series - result = pd.concat([x['A'] for x in dfs]) - expected = pd.concat([x['A'].astype(object) for x in dfs]) - self.assert_series_equal(result, expected) - - result = pd.concat([df1, df2]) - expected = pd.concat([df1.astype('object'), df2.astype('object')]) - self.assert_frame_equal(result, expected) - - # concat of an Integer and Int coerces to object dtype - # TODO(jreback) once integrated this would - # be a result of Integer - result = pd.concat([df1['A'], df2['A']]) - expected = pd.concat([df1['A'].astype('object'), - df2['A'].astype('object')]) - self.assert_series_equal(result, expected) - - -class TestGetitem(BaseInteger, base.BaseGetitemTests): - pass + def test_compare_array(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + other = pd.Series([0] * len(data)) + self._compare_other(s, data, op_name, other) -class TestMissing(BaseInteger, base.BaseMissingTests): +class TestCasting(object): pass - -class TestMethods(BaseInteger, base.BaseMethodsTests): - - @pytest.mark.parametrize('dropna', [True, False]) - def test_value_counts(self, all_data, dropna): - all_data = all_data[:10] - if dropna: - other = np.array(all_data[~all_data.isna()]) - else: - other = all_data - - result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() - expected = pd.Series(other).value_counts( - dropna=dropna).sort_index() - expected.index = expected.index.astype(all_data.dtype) - - self.assert_series_equal(result, expected) - - def test_combine_add(self, data_repeated): - # GH 20825 - orig_data1, orig_data2 = data_repeated(2) - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) - - # fundamentally this is not a great operation - # as overflow / underflow can easily happen here - # e.g. int8 + int8 - def scalar_add(a, b): - - # TODO; should really be a type specific NA - if pd.isna(a) or pd.isna(b): - return np.nan - if is_integer(a): - a = int(a) - elif is_integer(b): - b = int(b) - return a + b - - result = s1.combine(s2, scalar_add) - expected = pd.Series( - orig_data1._from_sequence([scalar_add(a, b) for (a, b) in - zip(orig_data1, - orig_data2)])) - self.assert_series_equal(result, expected) - - val = s1.iloc[0] - result = s1.combine(val, lambda x1, x2: x1 + x2) - expected = pd.Series( - orig_data1._from_sequence([a + val for a in list(orig_data1)])) - self.assert_series_equal(result, expected) - - -class TestCasting(BaseInteger, base.BaseCastingTests): - @pytest.mark.parametrize('dropna', [True, False]) def test_construct_index(self, all_data, dropna): # ensure that we do not coerce to Float64Index, rather @@ -497,7 +335,7 @@ def test_construct_index(self, all_data, dropna): result = pd.Index(integer_array(other, dtype=all_data.dtype)) expected = pd.Index(other, dtype=object) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize('dropna', [True, False]) def test_astype_index(self, all_data, dropna): @@ -515,7 +353,7 @@ def test_astype_index(self, all_data, dropna): result = idx.astype(dtype) expected = idx.astype(object).astype(dtype) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_astype(self, all_data): all_data = all_data[:10] @@ -528,13 +366,13 @@ def test_astype(self, all_data): s = pd.Series(ints) result = s.astype(all_data.dtype) expected = pd.Series(ints) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same other - ints s = pd.Series(ints) result = s.astype(dtype) expected = pd.Series(ints, dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - ints s = pd.Series(ints) @@ -547,13 +385,13 @@ def test_astype(self, all_data): s = pd.Series(mixed) result = s.astype(all_data.dtype) expected = pd.Series(mixed) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same other - mixed s = pd.Series(mixed) result = s.astype(dtype) expected = pd.Series(mixed, dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - mixed s = pd.Series(mixed) @@ -572,12 +410,12 @@ def test_astype_specific_casting(self, dtype): s = pd.Series([1, 2, 3], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_construct_cast_invalid(self, dtype): @@ -597,24 +435,6 @@ def test_construct_cast_invalid(self, dtype): pd.Series(arr).astype(dtype) -class TestGroupby(BaseInteger, base.BaseGroupbyTests): - - @pytest.mark.xfail(reason="groupby not working", strict=True) - def test_groupby_extension_no_sort(self, data_for_grouping): - super(TestGroupby, self).test_groupby_extension_no_sort( - data_for_grouping) - - @pytest.mark.parametrize('as_index', [ - pytest.param(True, - marks=pytest.mark.xfail(reason="groupby not working", - strict=True)), - False - ]) - def test_groupby_extension_agg(self, as_index, data_for_grouping): - super(TestGroupby, self).test_groupby_extension_agg( - as_index, data_for_grouping) - - def test_frame_repr(data_missing): df = pd.DataFrame({'A': data_missing}) diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/test_interval.py new file mode 100644 index 0000000000000..bcf4cea795978 --- /dev/null +++ b/pandas/tests/arrays/test_interval.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +import pytest +import numpy as np + +from pandas import Index, IntervalIndex, date_range, timedelta_range +from pandas.core.arrays import IntervalArray +import pandas.util.testing as tm + + +@pytest.fixture(params=[ + (Index([0, 2, 4]), Index([1, 3, 5])), + (Index([0., 1., 2.]), Index([1., 2., 3.])), + (timedelta_range('0 days', periods=3), + timedelta_range('1 day', periods=3)), + (date_range('20170101', periods=3), date_range('20170102', periods=3)), + (date_range('20170101', periods=3, tz='US/Eastern'), + date_range('20170102', periods=3, tz='US/Eastern'))], + ids=lambda x: str(x[0].dtype)) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +class TestMethods(object): + + @pytest.mark.parametrize('repeats', [0, 1, 5]) + def test_repeat(self, left_right_dtypes, repeats): + left, right = left_right_dtypes + result = IntervalArray.from_arrays(left, right).repeat(repeats) + expected = IntervalArray.from_arrays( + left.repeat(repeats), right.repeat(repeats)) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize('bad_repeats, msg', [ + (-1, 'negative dimensions are not allowed'), + ('foo', r'invalid literal for (int|long)\(\) with base 10')]) + def test_repeat_errors(self, bad_repeats, msg): + array = IntervalArray.from_breaks(range(4)) + with tm.assert_raises_regex(ValueError, msg): + array.repeat(bad_repeats) + + @pytest.mark.parametrize('new_closed', [ + 'left', 'right', 'both', 'neither']) + def test_set_closed(self, closed, new_closed): + # GH 21670 + array = IntervalArray.from_breaks(range(10), closed=closed) + result = array.set_closed(new_closed) + expected = IntervalArray.from_breaks(range(10), closed=new_closed) + tm.assert_extension_array_equal(result, expected) + + +class TestSetitem(object): + + def test_set_na(self, left_right_dtypes): + left, right = left_right_dtypes + result = IntervalArray.from_arrays(left, right) + result[0] = np.nan + + expected_left = Index([left._na_value] + list(left[1:])) + expected_right = Index([right._na_value] + list(right[1:])) + expected = IntervalArray.from_arrays(expected_left, expected_right) + + tm.assert_extension_array_equal(result, expected) + + +def test_repr_matches(): + idx = IntervalIndex.from_breaks([1, 2, 3]) + a = repr(idx) + b = repr(idx.values) + assert a.replace("Index", "Array") == b diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c8656808739c4..4e7886dd2e943 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -127,10 +127,11 @@ def test_combine_add(self, data_repeated): s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) - expected = pd.Series( - orig_data1._from_sequence([a + b for (a, b) in - zip(list(orig_data1), - list(orig_data2))])) + with np.errstate(over='ignore'): + expected = pd.Series( + orig_data1._from_sequence([a + b for (a, b) in + zip(list(orig_data1), + list(orig_data2))])) self.assert_series_equal(result, expected) val = s1.iloc[0] diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index f7bfdb8ec218a..05351c56862b8 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -23,9 +23,9 @@ def get_op_from_name(self, op_name): def check_opname(self, s, op_name, other, exc=NotImplementedError): op = self.get_op_from_name(op_name) - self._check_op(s, op, other, exc) + self._check_op(s, op, other, op_name, exc) - def _check_op(self, s, op, other, exc=NotImplementedError): + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): if exc is None: result = op(s, other) expected = s.combine(other, op) @@ -69,7 +69,8 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators s = pd.Series(data) - self.check_opname(s, op_name, [s.iloc[0]] * len(s), exc=TypeError) + self.check_opname(s, op_name, pd.Series([s.iloc[0]] * len(s)), + exc=TypeError) def test_divmod(self, data): s = pd.Series(data) @@ -113,5 +114,5 @@ def test_compare_scalar(self, data, all_compare_operators): def test_compare_array(self, data, all_compare_operators): op_name = all_compare_operators s = pd.Series(data) - other = [0] * len(data) + other = pd.Series([data[0]] * len(data)) self._compare_other(s, data, op_name, other) diff --git a/pandas/tests/extension/category/__init__.py b/pandas/tests/extension/category/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/interval/__init__.py b/pandas/tests/extension/interval/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/test_categorical.py similarity index 85% rename from pandas/tests/extension/category/test_categorical.py rename to pandas/tests/extension/test_categorical.py index 76f6b03907ef8..b8c73a9efdae8 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -1,3 +1,18 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" import string import pytest @@ -204,10 +219,14 @@ class TestComparisonOps(base.BaseComparisonOpsTests): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) if op_name == '__eq__': - assert not op(data, other).all() + result = op(s, other) + expected = s.combine(other, lambda x, y: x == y) + assert (result == expected).all() elif op_name == '__ne__': - assert op(data, other).all() + result = op(s, other) + expected = s.combine(other, lambda x, y: x != y) + assert (result == expected).all() else: with pytest.raises(TypeError): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py new file mode 100644 index 0000000000000..50c0e6dd8b347 --- /dev/null +++ b/pandas/tests/extension/test_integer.py @@ -0,0 +1,229 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pandas as pd +import pytest + +from pandas.tests.extension import base +from pandas.core.dtypes.common import is_extension_array_dtype + +from pandas.core.arrays import IntegerArray, integer_array +from pandas.core.arrays.integer import ( + Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) + + +def make_data(): + return (list(range(1, 9)) + [np.nan] + list(range(10, 98)) + + [np.nan] + [99, 100]) + + +@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype]) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return integer_array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return integer_array([np.nan, 1], dtype=dtype) + + +@pytest.fixture +def data_repeated(data): + def gen(count): + for _ in range(count): + yield data + yield gen + + +@pytest.fixture +def data_for_sorting(dtype): + return integer_array([1, 2, 0], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return integer_array([1, np.nan, 0], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are np.nan + return lambda x, y: np.isnan(x) and np.isnan(y) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(dtype): + b = 1 + a = 0 + c = 2 + na = np.nan + return integer_array([b, b, na, na, a, a, b, c], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + + @pytest.mark.skip(reason="using multiple dtypes") + def test_is_dtype_unboxes_dtype(self): + # we have multiple dtypes, so skip + pass + + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type() is IntegerArray + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super(TestArithmeticOps, self).check_opname(s, op_name, + other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + if s.dtype.is_unsigned_integer and (op_name == '__rsub__'): + # TODO see https://github.com/pandas-dev/pandas/issues/22023 + pytest.skip("unsigned subtraction gives negative values") + + if (hasattr(other, 'dtype') + and not is_extension_array_dtype(other.dtype) + and pd.api.types.is_integer_dtype(other.dtype)): + # other is np.int64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(s.dtype.numpy_dtype) + + result = op(s, other) + expected = s.combine(other, op) + + if op_name == '__rdiv__': + # combine is not giving the correct result for this case + pytest.skip("skipping reverse div in python 2") + elif op_name in ('__rtruediv__', '__truediv__', '__div__'): + expected = expected.astype(float) + if op_name == '__rtruediv__': + # TODO reverse operators result in object dtype + result = result.astype(float) + elif op_name.startswith('__r'): + # TODO reverse operators result in object dtype + # see https://github.com/pandas-dev/pandas/issues/22024 + expected = expected.astype(s.dtype) + result = result.astype(s.dtype) + else: + # combine method result in 'biggest' (int64) dtype + expected = expected.astype(s.dtype) + pass + if (op_name == '__rpow__') and isinstance(other, pd.Series): + # TODO pow on Int arrays gives different result with NA + # see https://github.com/pandas-dev/pandas/issues/22022 + result = result.fillna(1) + + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) + + @pytest.mark.skip(reason="intNA does not error on ops") + def test_error(self, data, all_arithmetic_operators): + # other specific errors tested in the integer array specific tests + pass + + +class TestComparisonOps(base.BaseComparisonOpsTests): + + def check_opname(self, s, op_name, other, exc=None): + super(TestComparisonOps, self).check_opname(s, op_name, + other, exc=None) + + def _compare_other(self, s, data, op_name, other): + self.check_opname(s, op_name, other) + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + # for test_concat_mixed_dtypes test + # concat of an Integer and Int coerces to object dtype + # TODO(jreback) once integrated this would + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + + @pytest.mark.parametrize('dropna', [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts( + dropna=dropna).sort_index() + expected.index = expected.index.astype(all_data.dtype) + + self.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + + @pytest.mark.xfail(reason="groupby not working", strict=True) + def test_groupby_extension_no_sort(self, data_for_grouping): + super(TestGroupby, self).test_groupby_extension_no_sort( + data_for_grouping) + + @pytest.mark.parametrize('as_index', [ + pytest.param(True, + marks=pytest.mark.xfail(reason="groupby not working", + strict=True)), + False + ]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + super(TestGroupby, self).test_groupby_extension_agg( + as_index, data_for_grouping) diff --git a/pandas/tests/extension/interval/test_interval.py b/pandas/tests/extension/test_interval.py similarity index 54% rename from pandas/tests/extension/interval/test_interval.py rename to pandas/tests/extension/test_interval.py index a10a56ddfdfac..625619a90ed4c 100644 --- a/pandas/tests/extension/interval/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -1,7 +1,22 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" import pytest import numpy as np -from pandas import Index, Interval, IntervalIndex, date_range, timedelta_range +from pandas import Interval from pandas.core.arrays import IntervalArray from pandas.core.dtypes.dtypes import IntervalDtype from pandas.tests.extension import base @@ -15,22 +30,6 @@ def make_data(): return [Interval(l, r) for l, r in zip(left, right)] -@pytest.fixture(params=[ - (Index([0, 2, 4]), Index([1, 3, 5])), - (Index([0., 1., 2.]), Index([1., 2., 3.])), - (timedelta_range('0 days', periods=3), - timedelta_range('1 day', periods=3)), - (date_range('20170101', periods=3), date_range('20170102', periods=3)), - (date_range('20170101', periods=3, tz='US/Eastern'), - date_range('20170102', periods=3, tz='US/Eastern'))], - ids=lambda x: str(x[0].dtype)) -def left_right_dtypes(request): - """ - Fixture for building an IntervalArray from various dtypes - """ - return request.param - - @pytest.fixture def dtype(): return IntervalDtype() @@ -111,30 +110,6 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests): class TestMethods(BaseInterval, base.BaseMethodsTests): - @pytest.mark.parametrize('repeats', [0, 1, 5]) - def test_repeat(self, left_right_dtypes, repeats): - left, right = left_right_dtypes - result = IntervalArray.from_arrays(left, right).repeat(repeats) - expected = IntervalArray.from_arrays( - left.repeat(repeats), right.repeat(repeats)) - tm.assert_extension_array_equal(result, expected) - - @pytest.mark.parametrize('bad_repeats, msg', [ - (-1, 'negative dimensions are not allowed'), - ('foo', r'invalid literal for (int|long)\(\) with base 10')]) - def test_repeat_errors(self, bad_repeats, msg): - array = IntervalArray.from_breaks(range(4)) - with tm.assert_raises_regex(ValueError, msg): - array.repeat(bad_repeats) - - @pytest.mark.parametrize('new_closed', [ - 'left', 'right', 'both', 'neither']) - def test_set_closed(self, closed, new_closed): - # GH 21670 - array = IntervalArray.from_breaks(range(10), closed=closed) - result = array.set_closed(new_closed) - expected = IntervalArray.from_breaks(range(10), closed=new_closed) - tm.assert_extension_array_equal(result, expected) @pytest.mark.skip(reason='addition is not defined for intervals') def test_combine_add(self, data_repeated): @@ -173,21 +148,4 @@ class TestReshaping(BaseInterval, base.BaseReshapingTests): class TestSetitem(BaseInterval, base.BaseSetitemTests): - - def test_set_na(self, left_right_dtypes): - left, right = left_right_dtypes - result = IntervalArray.from_arrays(left, right) - result[0] = np.nan - - expected_left = Index([left._na_value] + list(left[1:])) - expected_right = Index([right._na_value] + list(right[1:])) - expected = IntervalArray.from_arrays(expected_left, expected_right) - - self.assert_extension_array_equal(result, expected) - - -def test_repr_matches(): - idx = IntervalIndex.from_breaks([1, 2, 3]) - a = repr(idx) - b = repr(idx.values) - assert a.replace("Index", "Array") == b + pass