From f3e30859769974d62ed297d885e15b43c7c74626 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Jul 2018 16:39:12 +0200 Subject: [PATCH 01/13] Split integer array tests in tests/arrays/integer and tests/extension/integer --- pandas/tests/arrays/test_integer.py | 525 ++++++++++++++++ pandas/tests/extension/base/methods.py | 9 +- pandas/tests/extension/base/ops.py | 9 +- .../extension/category/test_categorical.py | 8 +- .../tests/extension/integer/test_integer.py | 564 ++---------------- 5 files changed, 596 insertions(+), 519 deletions(-) create mode 100644 pandas/tests/arrays/test_integer.py diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py new file mode 100644 index 0000000000000..c4a42c61b0df9 --- /dev/null +++ b/pandas/tests/arrays/test_integer.py @@ -0,0 +1,525 @@ +# -*- coding: utf-8 -*- +import numpy as np +import pandas as pd +import pandas.util.testing as tm +import pytest + +from pandas.api.types import is_integer, is_float, is_float_dtype, is_scalar +from pandas.core.dtypes.generic import ABCIndexClass + +from pandas.core.arrays import ( + to_integer_array, IntegerArray) +from pandas.core.arrays.integer import ( + Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) + +from ..extension.integer.test_integer import BaseInteger +from ..extension.base import BaseOpsUtil + + +def make_data(): + return (list(range(8)) + + [np.nan] + + list(range(10, 98)) + + [np.nan] + + [99, 100]) + + +@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype]) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return IntegerArray(make_data(), dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return IntegerArray([np.nan, 1], dtype=dtype) + + +@pytest.fixture(params=['data', 'data_missing']) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == 'data': + return data + elif request.param == 'data_missing': + return data_missing + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + if dtype.is_signed_integer: + assert np.dtype(dtype.type).kind == 'i' + else: + assert np.dtype(dtype.type).kind == 'u' + assert dtype.name is not None + + +class TestInterface: + + def test_repr_array(self, data): + result = repr(data) + + # not long + assert '...' not in result + + assert 'dtype=' in result + assert 'IntegerArray' in result + + def test_repr_array_long(self, data): + # some arrays may be able to assert a ... in the repr + with pd.option_context('display.max_seq_items', 1): + result = repr(data) + + assert '...' in result + assert 'length' in result + + +class TestConstructors(BaseInteger): + + def test_from_dtype_from_float(self, data): + # construct from our dtype & string dtype + dtype = data.dtype + + # from float + expected = pd.Series(data) + result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) + self.assert_series_equal(result, expected) + + # from int / list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + self.assert_series_equal(result, expected) + + # from int / array + expected = pd.Series(data).dropna().reset_index(drop=True) + dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) + result = pd.Series(dropped, dtype=str(dtype)) + self.assert_series_equal(result, expected) + + +class TestArithmeticOps(BaseOpsUtil, BaseInteger): + + def _check_divmod_op(self, s, op, other, exc=None): + super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) + + def _check_op(self, s, op_name, other, exc=None): + op = self.get_op_from_name(op_name) + result = op(s, other) + + # compute expected + mask = s.isna() + + # other array is an Integer + if isinstance(other, IntegerArray): + omask = getattr(other, 'mask', None) + mask = getattr(other, 'data', other) + if omask is not None: + mask |= omask + + # float result type or float op + if ((is_float_dtype(other) or is_float(other) or + op_name in ['__rtruediv__', '__truediv__', + '__rdiv__', '__div__'])): + rs = s.astype('float') + expected = op(rs, other) + self._check_op_float(result, expected, mask, s, op_name, other) + + # integer result type + else: + rs = pd.Series(s.values._data) + expected = op(rs, other) + self._check_op_integer(result, expected, mask, s, op_name, other) + + def _check_op_float(self, result, expected, mask, s, op_name, other): + # check comparisions that are resulting in float dtypes + + expected[mask] = np.nan + self.assert_series_equal(result, expected) + + def _check_op_integer(self, result, expected, mask, s, op_name, other): + # check comparisions that are resulting in integer dtypes + + # to compare properly, we convert the expected + # to float, mask to nans and convert infs + # if we have uints then we process as uints + # then conert to float + # and we ultimately want to create a IntArray + # for comparisons + + fill_value = 0 + + # mod/rmod turn floating 0 into NaN while + # integer works as expected (no nan) + if op_name in ['__mod__', '__rmod__']: + if is_scalar(other): + if other == 0: + expected[s.values == 0] = 0 + else: + expected = expected.fillna(0) + else: + expected[(s.values == 0) & + ((expected == 0) | expected.isna())] = 0 + + try: + expected[(expected == np.inf) | (expected == -np.inf)] = fill_value + original = expected + expected = expected.astype(s.dtype) + + except ValueError: + + expected = expected.astype(float) + expected[(expected == np.inf) | (expected == -np.inf)] = fill_value + original = expected + expected = expected.astype(s.dtype) + + expected[mask] = np.nan + + # assert that the expected astype is ok + # (skip for unsigned as they have wrap around) + if not s.dtype.is_unsigned_integer: + original = pd.Series(original) + + # we need to fill with 0's to emulate what an astype('int') does + # (truncation) for certain ops + if op_name in ['__rtruediv__', '__rdiv__']: + mask |= original.isna() + original = original.fillna(0).astype('int') + + original = original.astype('float') + original[mask] = np.nan + self.assert_series_equal(original, expected.astype('float')) + + # assert our expected result + self.assert_series_equal(result, expected) + + def test_arith_integer_array(self, data, all_arithmetic_operators): + # we operate with a rhs of an integer array + + op = all_arithmetic_operators + + s = pd.Series(data) + rhs = pd.Series([1] * len(data), dtype=data.dtype) + rhs.iloc[-1] = np.nan + + self._check_op(s, op, rhs) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # scalar + op = all_arithmetic_operators + + s = pd.Series(data) + self._check_op(s, op, 1, exc=TypeError) + + @pytest.mark.xfail(run=False, reason="_reduce needs implementation") + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + op = all_arithmetic_operators + + df = pd.DataFrame({'A': data}) + self._check_op(df, op, 1, exc=TypeError) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + # ndarray & other series + op = all_arithmetic_operators + + s = pd.Series(data) + other = np.ones(len(s), dtype=s.dtype.type) + self._check_op(s, op, other, exc=TypeError) + + def test_arith_coerce_scalar(self, data, all_arithmetic_operators): + + op = all_arithmetic_operators + s = pd.Series(data) + + other = 0.01 + self._check_op(s, op, other) + + @pytest.mark.parametrize("other", [1., 1.0, np.array(1.), np.array([1.])]) + def test_arithmetic_conversion(self, all_arithmetic_operators, other): + # if we have a float operand we should have a float result + # if if that is equal to an integer + op = self.get_op_from_name(all_arithmetic_operators) + + s = pd.Series([1, 2, 3], dtype='Int64') + result = op(s, other) + assert result.dtype is np.dtype('float') + + def test_error(self, data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + opa = getattr(data, op) + + # invalid scalars + with pytest.raises(TypeError): + ops('foo') + with pytest.raises(TypeError): + ops(pd.Timestamp('20180101')) + + # invalid array-likes + with pytest.raises(TypeError): + ops(pd.Series('foo', index=s.index)) + + if op != '__rpow__': + # TODO(extension) + # rpow with a datetimelike coerces the integer array incorrectly + with pytest.raises(TypeError): + ops(pd.Series(pd.date_range('20180101', periods=len(s)))) + + # 2d + with pytest.raises(NotImplementedError): + opa(pd.DataFrame({'A': s})) + with pytest.raises(NotImplementedError): + opa(np.arange(len(s)).reshape(-1, len(s))) + + +class TestComparisonOps(BaseOpsUtil, BaseInteger): + + def _compare_other(self, s, data, op_name, other): + op = self.get_op_from_name(op_name) + + # array + result = op(s, other) + expected = pd.Series(op(data._data, other)) + + # fill the nan locations + expected[data._mask] = True if op_name == '__ne__' else False + + tm.assert_series_equal(result, expected) + + # series + s = pd.Series(data) + result = op(s, other) + + expected = pd.Series(data._data) + expected = op(expected, other) + + # fill the nan locations + expected[data._mask] = True if op_name == '__ne__' else False + + tm.assert_series_equal(result, expected) + + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + self._compare_other(s, data, op_name, 0) + + def test_compare_array(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + other = pd.Series([0] * len(data)) + self._compare_other(s, data, op_name, other) + + +class TestCasting(BaseInteger): + pass + + @pytest.mark.parametrize('dropna', [True, False]) + def test_construct_index(self, all_data, dropna): + # ensure that we do not coerce to Float64Index, rather + # keep as Index + + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Index(IntegerArray(other, + dtype=all_data.dtype)) + expected = pd.Index(other, dtype=object) + + self.assert_index_equal(result, expected) + + @pytest.mark.parametrize('dropna', [True, False]) + def test_astype_index(self, all_data, dropna): + # as an int/uint index to Index + + all_data = all_data[:10] + if dropna: + other = all_data[~all_data.isna()] + else: + other = all_data + + dtype = all_data.dtype + idx = pd.Index(np.array(other)) + assert isinstance(idx, ABCIndexClass) + + result = idx.astype(dtype) + expected = idx.astype(object).astype(dtype) + self.assert_index_equal(result, expected) + + def test_astype(self, all_data): + all_data = all_data[:10] + + ints = all_data[~all_data.isna()] + mixed = all_data + dtype = Int8Dtype() + + # coerce to same type - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype) + expected = pd.Series(ints) + self.assert_series_equal(result, expected) + + # coerce to same other - ints + s = pd.Series(ints) + result = s.astype(dtype) + expected = pd.Series(ints, dtype=dtype) + self.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype.numpy_dtype) + expected = pd.Series(ints._data.astype( + all_data.dtype.numpy_dtype)) + tm.assert_series_equal(result, expected) + + # coerce to same type - mixed + s = pd.Series(mixed) + result = s.astype(all_data.dtype) + expected = pd.Series(mixed) + self.assert_series_equal(result, expected) + + # coerce to same other - mixed + s = pd.Series(mixed) + result = s.astype(dtype) + expected = pd.Series(mixed, dtype=dtype) + self.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - mixed + s = pd.Series(mixed) + with pytest.raises(ValueError): + s.astype(all_data.dtype.numpy_dtype) + + # coerce to object + s = pd.Series(mixed) + result = s.astype('object') + expected = pd.Series(np.asarray(mixed)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize('dtype', [Int8Dtype(), 'Int8']) + def test_astype_specific_casting(self, dtype): + s = pd.Series([1, 2, 3], dtype='Int64') + result = s.astype(dtype) + expected = pd.Series([1, 2, 3], dtype='Int8') + self.assert_series_equal(result, expected) + + s = pd.Series([1, 2, 3, None], dtype='Int64') + result = s.astype(dtype) + expected = pd.Series([1, 2, 3, None], dtype='Int8') + self.assert_series_equal(result, expected) + + def test_construct_cast_invalid(self, dtype): + + msg = "cannot safely" + arr = [1.2, 2.3, 3.7] + with tm.assert_raises_regex(TypeError, msg): + IntegerArray(arr, dtype=dtype) + + with tm.assert_raises_regex(TypeError, msg): + pd.Series(arr).astype(dtype) + + arr = [1.2, 2.3, 3.7, np.nan] + with tm.assert_raises_regex(TypeError, msg): + IntegerArray(arr, dtype=dtype) + + with tm.assert_raises_regex(TypeError, msg): + pd.Series(arr).astype(dtype) + + +def test_frame_repr(data_missing): + + df = pd.DataFrame({'A': data_missing}) + result = repr(df) + expected = ' A\n0 NaN\n1 1' + assert result == expected + + +def test_conversions(data_missing): + + # astype to object series + df = pd.DataFrame({'A': data_missing}) + result = df['A'].astype('object') + expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A') + tm.assert_series_equal(result, expected) + + # convert to object ndarray + # we assert that we are exactly equal + # including type conversions of scalars + result = df['A'].astype('object').values + expected = np.array([np.nan, 1], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + for r, e in zip(result, expected): + if pd.isnull(r): + assert pd.isnull(e) + elif is_integer(r): + # PY2 can be int or long + assert r == e + assert is_integer(e) + else: + assert r == e + assert type(r) == type(e) + + +@pytest.mark.parametrize( + 'values', + [ + ['foo', 'bar'], + 'foo', + 1, + 1.0, + pd.date_range('20130101', periods=2), + np.array(['foo'])]) +def test_to_integer_array_error(values): + # error in converting existing arrays to IntegerArrays + with pytest.raises(TypeError): + to_integer_array(values) + + +@pytest.mark.parametrize( + 'values, to_dtype, result_dtype', + [ + (np.array([1], dtype='int64'), None, Int64Dtype), + (np.array([1, np.nan]), None, Int64Dtype), + (np.array([1, np.nan]), 'int8', Int8Dtype)]) +def test_to_integer_array(values, to_dtype, result_dtype): + # convert existing arrays to IntegerArrays + result = to_integer_array(values, dtype=to_dtype) + expected = IntegerArray(values, dtype=result_dtype()) + tm.assert_extension_array_equal(result, expected) + + +def test_cross_type_arithmetic(): + + df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'), + 'B': pd.Series([1, np.nan, 3], dtype='UInt8'), + 'C': [1, 2, 3]}) + + result = df.A + df.C + expected = pd.Series([2, 4, np.nan], dtype='Int64') + tm.assert_series_equal(result, expected) + + result = (df.A + df.C) * 3 == 12 + expected = pd.Series([False, True, False]) + tm.assert_series_equal(result, expected) + + result = df.A + df.B + expected = pd.Series([2, np.nan, np.nan], dtype='Int64') + tm.assert_series_equal(result, expected) + + +# TODO(jreback) - these need testing / are broken + +# shift + +# set_index (destroys type) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c660687f16590..b9275be1956e0 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -127,10 +127,11 @@ def test_combine_add(self, data_repeated): s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) - expected = pd.Series( - orig_data1._from_sequence([a + b for (a, b) in - zip(list(orig_data1), - list(orig_data2))])) + with np.errstate(over='ignore'): + expected = pd.Series( + orig_data1._from_sequence([a + b for (a, b) in + zip(list(orig_data1), + list(orig_data2))])) self.assert_series_equal(result, expected) val = s1.iloc[0] diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index f7bfdb8ec218a..05351c56862b8 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -23,9 +23,9 @@ def get_op_from_name(self, op_name): def check_opname(self, s, op_name, other, exc=NotImplementedError): op = self.get_op_from_name(op_name) - self._check_op(s, op, other, exc) + self._check_op(s, op, other, op_name, exc) - def _check_op(self, s, op, other, exc=NotImplementedError): + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): if exc is None: result = op(s, other) expected = s.combine(other, op) @@ -69,7 +69,8 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators s = pd.Series(data) - self.check_opname(s, op_name, [s.iloc[0]] * len(s), exc=TypeError) + self.check_opname(s, op_name, pd.Series([s.iloc[0]] * len(s)), + exc=TypeError) def test_divmod(self, data): s = pd.Series(data) @@ -113,5 +114,5 @@ def test_compare_scalar(self, data, all_compare_operators): def test_compare_array(self, data, all_compare_operators): op_name = all_compare_operators s = pd.Series(data) - other = [0] * len(data) + other = pd.Series([data[0]] * len(data)) self._compare_other(s, data, op_name, other) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 76f6b03907ef8..f8a2f09065dbc 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -204,10 +204,14 @@ class TestComparisonOps(base.BaseComparisonOpsTests): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) if op_name == '__eq__': - assert not op(data, other).all() + result = op(s, other) + expected = s.combine(other, lambda x, y: x == y) + assert (result == expected).all() elif op_name == '__ne__': - assert op(data, other).all() + result = op(s, other) + expected = s.combine(other, lambda x, y: x != y) + assert (result == expected).all() else: with pytest.raises(TypeError): diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py index 451f7488bd38a..5745248ee3564 100644 --- a/pandas/tests/extension/integer/test_integer.py +++ b/pandas/tests/extension/integer/test_integer.py @@ -4,19 +4,15 @@ import pytest from pandas.tests.extension import base -from pandas.api.types import ( - is_integer, is_scalar, is_float, is_float_dtype) -from pandas.core.dtypes.generic import ABCIndexClass -from pandas.core.arrays import ( - to_integer_array, IntegerArray) +from pandas.core.arrays import IntegerArray from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) def make_data(): - return (list(range(8)) + + return (list(range(1, 9)) + [np.nan] + list(range(10, 98)) + [np.nan] + @@ -77,16 +73,6 @@ def data_for_grouping(dtype): return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype) -def test_dtypes(dtype): - # smoke tests on auto dtype construction - - if dtype.is_signed_integer: - assert np.dtype(dtype.type).kind == 'i' - else: - assert np.dtype(dtype.type).kind == 'u' - assert dtype.name is not None - - class BaseInteger(object): def assert_index_equal(self, left, right, *args, **kwargs): @@ -143,283 +129,77 @@ def test_array_type_with_arg(self, data, dtype): class TestArithmeticOps(BaseInteger, base.BaseArithmeticOpsTests): - def _check_divmod_op(self, s, op, other, exc=None): - super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) - - def _check_op(self, s, op_name, other, exc=None): - op = self.get_op_from_name(op_name) - result = op(s, other) - - # compute expected - mask = s.isna() - - # other array is an Integer - if isinstance(other, IntegerArray): - omask = getattr(other, 'mask', None) - mask = getattr(other, 'data', other) - if omask is not None: - mask |= omask - - # float result type or float op - if ((is_float_dtype(other) or is_float(other) or - op_name in ['__rtruediv__', '__truediv__', - '__rdiv__', '__div__'])): - rs = s.astype('float') - expected = op(rs, other) - self._check_op_float(result, expected, mask, s, op_name, other) - - # integer result type - else: - rs = pd.Series(s.values._data) - expected = op(rs, other) - self._check_op_integer(result, expected, mask, s, op_name, other) - - def _check_op_float(self, result, expected, mask, s, op_name, other): - # check comparisions that are resulting in float dtypes - - expected[mask] = np.nan - self.assert_series_equal(result, expected) - - def _check_op_integer(self, result, expected, mask, s, op_name, other): - # check comparisions that are resulting in integer dtypes - - # to compare properly, we convert the expected - # to float, mask to nans and convert infs - # if we have uints then we process as uints - # then conert to float - # and we ultimately want to create a IntArray - # for comparisons - - fill_value = 0 - - # mod/rmod turn floating 0 into NaN while - # integer works as expected (no nan) - if op_name in ['__mod__', '__rmod__']: - if is_scalar(other): - if other == 0: - expected[s.values == 0] = 0 - else: - expected = expected.fillna(0) + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super(TestArithmeticOps, self).check_opname(s, op_name, + other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + if s.dtype.is_unsigned_integer and (op_name == '__rsub__'): + # TODO see https://github.com/pandas-dev/pandas/issues/22023 + pytest.skip("unsigned subtraction gives negative values") + + result = op(s, other) + expected = s.combine(other, op) + + if op_name in ('__rtruediv__', '__truediv__'): + expected = expected.astype(float) + if op_name == '__rtruediv__': + # TODO reverse operators result in object dtype + result = result.astype(float) + elif op_name.startswith('__r'): + # TODO reverse operators result in object dtype + # see https://github.com/pandas-dev/pandas/issues/22024 + expected = expected.astype(s.dtype) + result = result.astype(s.dtype) else: - expected[(s.values == 0) & - ((expected == 0) | expected.isna())] = 0 - - try: - expected[(expected == np.inf) | (expected == -np.inf)] = fill_value - original = expected - expected = expected.astype(s.dtype) - - except ValueError: - - expected = expected.astype(float) - expected[(expected == np.inf) | (expected == -np.inf)] = fill_value - original = expected - expected = expected.astype(s.dtype) - - expected[mask] = np.nan - - # assert that the expected astype is ok - # (skip for unsigned as they have wrap around) - if not s.dtype.is_unsigned_integer: - original = pd.Series(original) - - # we need to fill with 0's to emulate what an astype('int') does - # (truncation) for certain ops - if op_name in ['__rtruediv__', '__rdiv__']: - mask |= original.isna() - original = original.fillna(0).astype('int') - - original = original.astype('float') - original[mask] = np.nan - self.assert_series_equal(original, expected.astype('float')) - - # assert our expected result - self.assert_series_equal(result, expected) - - def test_arith_integer_array(self, data, all_arithmetic_operators): - # we operate with a rhs of an integer array - - op = all_arithmetic_operators - - s = pd.Series(data) - rhs = pd.Series([1] * len(data), dtype=data.dtype) - rhs.iloc[-1] = np.nan - - self._check_op(s, op, rhs) - - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): - # scalar - op = all_arithmetic_operators - - s = pd.Series(data) - self._check_op(s, op, 1, exc=TypeError) - - @pytest.mark.xfail(run=False, reason="_reduce needs implementation") - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): - # frame & scalar - op = all_arithmetic_operators - - df = pd.DataFrame({'A': data}) - self._check_op(df, op, 1, exc=TypeError) - - def test_arith_series_with_array(self, data, all_arithmetic_operators): - # ndarray & other series - op = all_arithmetic_operators - - s = pd.Series(data) - other = np.ones(len(s), dtype=s.dtype.type) - self._check_op(s, op, other, exc=TypeError) - - def test_arith_coerce_scalar(self, data, all_arithmetic_operators): - - op = all_arithmetic_operators - s = pd.Series(data) - - other = 0.01 - self._check_op(s, op, other) - - @pytest.mark.parametrize("other", [1., 1.0, np.array(1.), np.array([1.])]) - def test_arithmetic_conversion(self, all_arithmetic_operators, other): - # if we have a float operand we should have a float result - # if if that is equal to an integer - op = self.get_op_from_name(all_arithmetic_operators) + # combine method result in 'biggest' (int64) dtype + expected = expected.astype(s.dtype) + pass + if (op_name == '__rpow__') and isinstance(other, pd.Series): + # TODO pow on Int arrays gives different result with NA + # see https://github.com/pandas-dev/pandas/issues/22022 + result = result.fillna(1) + + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) - s = pd.Series([1, 2, 3], dtype='Int64') - result = op(s, other) - assert result.dtype is np.dtype('float') + def _check_divmod_op(self, s, op, other, exc=None): + super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) + @pytest.mark.skip(reason="intNA does not error on ops") def test_error(self, data, all_arithmetic_operators): - # invalid ops - - op = all_arithmetic_operators - s = pd.Series(data) - ops = getattr(s, op) - opa = getattr(data, op) - - # invalid scalars - with pytest.raises(TypeError): - ops('foo') - with pytest.raises(TypeError): - ops(pd.Timestamp('20180101')) - - # invalid array-likes - with pytest.raises(TypeError): - ops(pd.Series('foo', index=s.index)) - - if op != '__rpow__': - # TODO(extension) - # rpow with a datetimelike coerces the integer array incorrectly - with pytest.raises(TypeError): - ops(pd.Series(pd.date_range('20180101', periods=len(s)))) - - # 2d - with pytest.raises(NotImplementedError): - opa(pd.DataFrame({'A': s})) - with pytest.raises(NotImplementedError): - opa(np.arange(len(s)).reshape(-1, len(s))) + # other specific errors tested in the integer array specific tests + pass class TestComparisonOps(BaseInteger, base.BaseComparisonOpsTests): - def _compare_other(self, s, data, op_name, other): - op = self.get_op_from_name(op_name) + def check_opname(self, s, op_name, other, exc=None): + super(TestComparisonOps, self).check_opname(s, op_name, + other, exc=None) - # array - result = op(s, other) - expected = pd.Series(op(data._data, other)) - - # fill the nan locations - expected[data._mask] = True if op_name == '__ne__' else False - - tm.assert_series_equal(result, expected) - - # series - s = pd.Series(data) - result = op(s, other) - - expected = pd.Series(data._data) - expected = op(expected, other) - - # fill the nan locations - expected[data._mask] = True if op_name == '__ne__' else False - - tm.assert_series_equal(result, expected) + def _compare_other(self, s, data, op_name, other): + self.check_opname(s, op_name, other) class TestInterface(BaseInteger, base.BaseInterfaceTests): - - def test_repr_array(self, data): - result = repr(data) - - # not long - assert '...' not in result - - assert 'dtype=' in result - assert 'IntegerArray' in result - - def test_repr_array_long(self, data): - # some arrays may be able to assert a ... in the repr - with pd.option_context('display.max_seq_items', 1): - result = repr(data) - - assert '...' in result - assert 'length' in result + pass class TestConstructors(BaseInteger, base.BaseConstructorsTests): - - def test_from_dtype_from_float(self, data): - # construct from our dtype & string dtype - dtype = data.dtype - - # from float - expected = pd.Series(data) - result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) - self.assert_series_equal(result, expected) - - # from int / list - expected = pd.Series(data) - result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) - self.assert_series_equal(result, expected) - - # from int / array - expected = pd.Series(data).dropna().reset_index(drop=True) - dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) - result = pd.Series(dropped, dtype=str(dtype)) - self.assert_series_equal(result, expected) + pass class TestReshaping(BaseInteger, base.BaseReshapingTests): + pass - def test_concat_mixed_dtypes(self, data): - # https://github.com/pandas-dev/pandas/issues/20762 - df1 = pd.DataFrame({'A': data[:3]}) - df2 = pd.DataFrame({"A": [1, 2, 3]}) - df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category') - df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])}) - dfs = [df1, df2, df3, df4] - - # dataframes - result = pd.concat(dfs) - expected = pd.concat([x.astype(object) for x in dfs]) - self.assert_frame_equal(result, expected) - - # series - result = pd.concat([x['A'] for x in dfs]) - expected = pd.concat([x['A'].astype(object) for x in dfs]) - self.assert_series_equal(result, expected) - - result = pd.concat([df1, df2]) - expected = pd.concat([df1.astype('object'), df2.astype('object')]) - self.assert_frame_equal(result, expected) - - # concat of an Integer and Int coerces to object dtype - # TODO(jreback) once integrated this would - # be a result of Integer - result = pd.concat([df1['A'], df2['A']]) - expected = pd.concat([df1['A'].astype('object'), - df2['A'].astype('object')]) - self.assert_series_equal(result, expected) + # for test_concat_mixed_dtypes test + # concat of an Integer and Int coerces to object dtype + # TODO(jreback) once integrated this would class TestGetitem(BaseInteger, base.BaseGetitemTests): @@ -447,154 +227,9 @@ def test_value_counts(self, all_data, dropna): self.assert_series_equal(result, expected) - def test_combine_add(self, data_repeated): - # GH 20825 - orig_data1, orig_data2 = data_repeated(2) - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) - - # fundamentally this is not a great operation - # as overflow / underflow can easily happen here - # e.g. int8 + int8 - def scalar_add(a, b): - - # TODO; should really be a type specific NA - if pd.isna(a) or pd.isna(b): - return np.nan - if is_integer(a): - a = int(a) - elif is_integer(b): - b = int(b) - return a + b - - result = s1.combine(s2, scalar_add) - expected = pd.Series( - orig_data1._from_sequence([scalar_add(a, b) for (a, b) in - zip(orig_data1, - orig_data2)])) - self.assert_series_equal(result, expected) - - val = s1.iloc[0] - result = s1.combine(val, lambda x1, x2: x1 + x2) - expected = pd.Series( - orig_data1._from_sequence([a + val for a in list(orig_data1)])) - self.assert_series_equal(result, expected) - class TestCasting(BaseInteger, base.BaseCastingTests): - - @pytest.mark.parametrize('dropna', [True, False]) - def test_construct_index(self, all_data, dropna): - # ensure that we do not coerce to Float64Index, rather - # keep as Index - - all_data = all_data[:10] - if dropna: - other = np.array(all_data[~all_data.isna()]) - else: - other = all_data - - result = pd.Index(IntegerArray(other, - dtype=all_data.dtype)) - expected = pd.Index(other, dtype=object) - - self.assert_index_equal(result, expected) - - @pytest.mark.parametrize('dropna', [True, False]) - def test_astype_index(self, all_data, dropna): - # as an int/uint index to Index - - all_data = all_data[:10] - if dropna: - other = all_data[~all_data.isna()] - else: - other = all_data - - dtype = all_data.dtype - idx = pd.Index(np.array(other)) - assert isinstance(idx, ABCIndexClass) - - result = idx.astype(dtype) - expected = idx.astype(object).astype(dtype) - self.assert_index_equal(result, expected) - - def test_astype(self, all_data): - all_data = all_data[:10] - - ints = all_data[~all_data.isna()] - mixed = all_data - dtype = Int8Dtype() - - # coerce to same type - ints - s = pd.Series(ints) - result = s.astype(all_data.dtype) - expected = pd.Series(ints) - self.assert_series_equal(result, expected) - - # coerce to same other - ints - s = pd.Series(ints) - result = s.astype(dtype) - expected = pd.Series(ints, dtype=dtype) - self.assert_series_equal(result, expected) - - # coerce to same numpy_dtype - ints - s = pd.Series(ints) - result = s.astype(all_data.dtype.numpy_dtype) - expected = pd.Series(ints._data.astype( - all_data.dtype.numpy_dtype)) - tm.assert_series_equal(result, expected) - - # coerce to same type - mixed - s = pd.Series(mixed) - result = s.astype(all_data.dtype) - expected = pd.Series(mixed) - self.assert_series_equal(result, expected) - - # coerce to same other - mixed - s = pd.Series(mixed) - result = s.astype(dtype) - expected = pd.Series(mixed, dtype=dtype) - self.assert_series_equal(result, expected) - - # coerce to same numpy_dtype - mixed - s = pd.Series(mixed) - with pytest.raises(ValueError): - s.astype(all_data.dtype.numpy_dtype) - - # coerce to object - s = pd.Series(mixed) - result = s.astype('object') - expected = pd.Series(np.asarray(mixed)) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('dtype', [Int8Dtype(), 'Int8']) - def test_astype_specific_casting(self, dtype): - s = pd.Series([1, 2, 3], dtype='Int64') - result = s.astype(dtype) - expected = pd.Series([1, 2, 3], dtype='Int8') - self.assert_series_equal(result, expected) - - s = pd.Series([1, 2, 3, None], dtype='Int64') - result = s.astype(dtype) - expected = pd.Series([1, 2, 3, None], dtype='Int8') - self.assert_series_equal(result, expected) - - def test_construct_cast_invalid(self, dtype): - - msg = "cannot safely" - arr = [1.2, 2.3, 3.7] - with tm.assert_raises_regex(TypeError, msg): - IntegerArray(arr, dtype=dtype) - - with tm.assert_raises_regex(TypeError, msg): - pd.Series(arr).astype(dtype) - - arr = [1.2, 2.3, 3.7, np.nan] - with tm.assert_raises_regex(TypeError, msg): - IntegerArray(arr, dtype=dtype) - - with tm.assert_raises_regex(TypeError, msg): - pd.Series(arr).astype(dtype) + pass class TestGroupby(BaseInteger, base.BaseGroupbyTests): @@ -609,92 +244,3 @@ def test_groupby_extension_no_sort(self, data_for_grouping): def test_groupby_extension_agg(self, as_index, data_for_grouping): super(TestGroupby, self).test_groupby_extension_agg( as_index, data_for_grouping) - - -def test_frame_repr(data_missing): - - df = pd.DataFrame({'A': data_missing}) - result = repr(df) - expected = ' A\n0 NaN\n1 1' - assert result == expected - - -def test_conversions(data_missing): - - # astype to object series - df = pd.DataFrame({'A': data_missing}) - result = df['A'].astype('object') - expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A') - tm.assert_series_equal(result, expected) - - # convert to object ndarray - # we assert that we are exactly equal - # including type conversions of scalars - result = df['A'].astype('object').values - expected = np.array([np.nan, 1], dtype=object) - tm.assert_numpy_array_equal(result, expected) - - for r, e in zip(result, expected): - if pd.isnull(r): - assert pd.isnull(e) - elif is_integer(r): - # PY2 can be int or long - assert r == e - assert is_integer(e) - else: - assert r == e - assert type(r) == type(e) - - -@pytest.mark.parametrize( - 'values', - [ - ['foo', 'bar'], - 'foo', - 1, - 1.0, - pd.date_range('20130101', periods=2), - np.array(['foo'])]) -def test_to_integer_array_error(values): - # error in converting existing arrays to IntegerArrays - with pytest.raises(TypeError): - to_integer_array(values) - - -@pytest.mark.parametrize( - 'values, to_dtype, result_dtype', - [ - (np.array([1], dtype='int64'), None, Int64Dtype), - (np.array([1, np.nan]), None, Int64Dtype), - (np.array([1, np.nan]), 'int8', Int8Dtype)]) -def test_to_integer_array(values, to_dtype, result_dtype): - # convert existing arrays to IntegerArrays - result = to_integer_array(values, dtype=to_dtype) - expected = IntegerArray(values, dtype=result_dtype()) - tm.assert_extension_array_equal(result, expected) - - -def test_cross_type_arithmetic(): - - df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'), - 'B': pd.Series([1, np.nan, 3], dtype='UInt8'), - 'C': [1, 2, 3]}) - - result = df.A + df.C - expected = pd.Series([2, 4, np.nan], dtype='Int64') - tm.assert_series_equal(result, expected) - - result = (df.A + df.C) * 3 == 12 - expected = pd.Series([False, True, False]) - tm.assert_series_equal(result, expected) - - result = df.A + df.B - expected = pd.Series([2, np.nan, np.nan], dtype='Int64') - tm.assert_series_equal(result, expected) - - -# TODO(jreback) - these need testing / are broken - -# shift - -# set_index (destroys type) From 6408ee0a378f0a4bad989c43df76ded1f8401982 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Jul 2018 16:49:31 +0200 Subject: [PATCH 02/13] split interval tests --- pandas/tests/arrays/test_interval.py | 72 +++++++++++++++++++ .../tests/extension/interval/test_interval.py | 61 +--------------- 2 files changed, 74 insertions(+), 59 deletions(-) create mode 100644 pandas/tests/arrays/test_interval.py diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/test_interval.py new file mode 100644 index 0000000000000..5e57679a76b60 --- /dev/null +++ b/pandas/tests/arrays/test_interval.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +import pytest +import numpy as np + +from pandas import Index, IntervalIndex, date_range, timedelta_range +from pandas.core.arrays import IntervalArray +import pandas.util.testing as tm + + +@pytest.fixture(params=[ + (Index([0, 2, 4]), Index([1, 3, 5])), + (Index([0., 1., 2.]), Index([1., 2., 3.])), + (timedelta_range('0 days', periods=3), + timedelta_range('1 day', periods=3)), + (date_range('20170101', periods=3), date_range('20170102', periods=3)), + (date_range('20170101', periods=3, tz='US/Eastern'), + date_range('20170102', periods=3, tz='US/Eastern'))], + ids=lambda x: str(x[0].dtype)) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +class TestMethods: + + @pytest.mark.parametrize('repeats', [0, 1, 5]) + def test_repeat(self, left_right_dtypes, repeats): + left, right = left_right_dtypes + result = IntervalArray.from_arrays(left, right).repeat(repeats) + expected = IntervalArray.from_arrays( + left.repeat(repeats), right.repeat(repeats)) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize('bad_repeats, msg', [ + (-1, 'negative dimensions are not allowed'), + ('foo', r'invalid literal for (int|long)\(\) with base 10')]) + def test_repeat_errors(self, bad_repeats, msg): + array = IntervalArray.from_breaks(range(4)) + with tm.assert_raises_regex(ValueError, msg): + array.repeat(bad_repeats) + + @pytest.mark.parametrize('new_closed', [ + 'left', 'right', 'both', 'neither']) + def test_set_closed(self, closed, new_closed): + # GH 21670 + array = IntervalArray.from_breaks(range(10), closed=closed) + result = array.set_closed(new_closed) + expected = IntervalArray.from_breaks(range(10), closed=new_closed) + tm.assert_extension_array_equal(result, expected) + + +class TestSetitem: + + def test_set_na(self, left_right_dtypes): + left, right = left_right_dtypes + result = IntervalArray.from_arrays(left, right) + result[0] = np.nan + + expected_left = Index([left._na_value] + list(left[1:])) + expected_right = Index([right._na_value] + list(right[1:])) + expected = IntervalArray.from_arrays(expected_left, expected_right) + + tm.assert_extension_array_equal(result, expected) + + +def test_repr_matches(): + idx = IntervalIndex.from_breaks([1, 2, 3]) + a = repr(idx) + b = repr(idx.values) + assert a.replace("Index", "Array") == b diff --git a/pandas/tests/extension/interval/test_interval.py b/pandas/tests/extension/interval/test_interval.py index a10a56ddfdfac..c4606e85cfb3f 100644 --- a/pandas/tests/extension/interval/test_interval.py +++ b/pandas/tests/extension/interval/test_interval.py @@ -1,7 +1,7 @@ import pytest import numpy as np -from pandas import Index, Interval, IntervalIndex, date_range, timedelta_range +from pandas import Interval from pandas.core.arrays import IntervalArray from pandas.core.dtypes.dtypes import IntervalDtype from pandas.tests.extension import base @@ -15,22 +15,6 @@ def make_data(): return [Interval(l, r) for l, r in zip(left, right)] -@pytest.fixture(params=[ - (Index([0, 2, 4]), Index([1, 3, 5])), - (Index([0., 1., 2.]), Index([1., 2., 3.])), - (timedelta_range('0 days', periods=3), - timedelta_range('1 day', periods=3)), - (date_range('20170101', periods=3), date_range('20170102', periods=3)), - (date_range('20170101', periods=3, tz='US/Eastern'), - date_range('20170102', periods=3, tz='US/Eastern'))], - ids=lambda x: str(x[0].dtype)) -def left_right_dtypes(request): - """ - Fixture for building an IntervalArray from various dtypes - """ - return request.param - - @pytest.fixture def dtype(): return IntervalDtype() @@ -111,30 +95,6 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests): class TestMethods(BaseInterval, base.BaseMethodsTests): - @pytest.mark.parametrize('repeats', [0, 1, 5]) - def test_repeat(self, left_right_dtypes, repeats): - left, right = left_right_dtypes - result = IntervalArray.from_arrays(left, right).repeat(repeats) - expected = IntervalArray.from_arrays( - left.repeat(repeats), right.repeat(repeats)) - tm.assert_extension_array_equal(result, expected) - - @pytest.mark.parametrize('bad_repeats, msg', [ - (-1, 'negative dimensions are not allowed'), - ('foo', r'invalid literal for (int|long)\(\) with base 10')]) - def test_repeat_errors(self, bad_repeats, msg): - array = IntervalArray.from_breaks(range(4)) - with tm.assert_raises_regex(ValueError, msg): - array.repeat(bad_repeats) - - @pytest.mark.parametrize('new_closed', [ - 'left', 'right', 'both', 'neither']) - def test_set_closed(self, closed, new_closed): - # GH 21670 - array = IntervalArray.from_breaks(range(10), closed=closed) - result = array.set_closed(new_closed) - expected = IntervalArray.from_breaks(range(10), closed=new_closed) - tm.assert_extension_array_equal(result, expected) @pytest.mark.skip(reason='addition is not defined for intervals') def test_combine_add(self, data_repeated): @@ -173,21 +133,4 @@ class TestReshaping(BaseInterval, base.BaseReshapingTests): class TestSetitem(BaseInterval, base.BaseSetitemTests): - - def test_set_na(self, left_right_dtypes): - left, right = left_right_dtypes - result = IntervalArray.from_arrays(left, right) - result[0] = np.nan - - expected_left = Index([left._na_value] + list(left[1:])) - expected_right = Index([right._na_value] + list(right[1:])) - expected = IntervalArray.from_arrays(expected_left, expected_right) - - self.assert_extension_array_equal(result, expected) - - -def test_repr_matches(): - idx = IntervalIndex.from_breaks([1, 2, 3]) - a = repr(idx) - b = repr(idx.values) - assert a.replace("Index", "Array") == b + pass From de105918e7032f780020ac47af23fcfb9a193284 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 24 Jul 2018 10:38:23 +0200 Subject: [PATCH 03/13] use tm.assert methods instead of class methods --- pandas/tests/arrays/test_integer.py | 37 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index c4a42c61b0df9..82c1d40cdd4d8 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -13,7 +13,6 @@ Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) -from ..extension.integer.test_integer import BaseInteger from ..extension.base import BaseOpsUtil @@ -80,7 +79,7 @@ def test_repr_array_long(self, data): assert 'length' in result -class TestConstructors(BaseInteger): +class TestConstructors: def test_from_dtype_from_float(self, data): # construct from our dtype & string dtype @@ -89,21 +88,21 @@ def test_from_dtype_from_float(self, data): # from float expected = pd.Series(data) result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # from int / list expected = pd.Series(data) result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # from int / array expected = pd.Series(data).dropna().reset_index(drop=True) dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) result = pd.Series(dropped, dtype=str(dtype)) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) -class TestArithmeticOps(BaseOpsUtil, BaseInteger): +class TestArithmeticOps(BaseOpsUtil): def _check_divmod_op(self, s, op, other, exc=None): super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) @@ -140,7 +139,7 @@ def _check_op_float(self, result, expected, mask, s, op_name, other): # check comparisions that are resulting in float dtypes expected[mask] = np.nan - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def _check_op_integer(self, result, expected, mask, s, op_name, other): # check comparisions that are resulting in integer dtypes @@ -193,10 +192,10 @@ def _check_op_integer(self, result, expected, mask, s, op_name, other): original = original.astype('float') original[mask] = np.nan - self.assert_series_equal(original, expected.astype('float')) + tm.assert_series_equal(original, expected.astype('float')) # assert our expected result - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_arith_integer_array(self, data, all_arithmetic_operators): # we operate with a rhs of an integer array @@ -281,7 +280,7 @@ def test_error(self, data, all_arithmetic_operators): opa(np.arange(len(s)).reshape(-1, len(s))) -class TestComparisonOps(BaseOpsUtil, BaseInteger): +class TestComparisonOps(BaseOpsUtil): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) @@ -319,7 +318,7 @@ def test_compare_array(self, data, all_compare_operators): self._compare_other(s, data, op_name, other) -class TestCasting(BaseInteger): +class TestCasting: pass @pytest.mark.parametrize('dropna', [True, False]) @@ -337,7 +336,7 @@ def test_construct_index(self, all_data, dropna): dtype=all_data.dtype)) expected = pd.Index(other, dtype=object) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize('dropna', [True, False]) def test_astype_index(self, all_data, dropna): @@ -355,7 +354,7 @@ def test_astype_index(self, all_data, dropna): result = idx.astype(dtype) expected = idx.astype(object).astype(dtype) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_astype(self, all_data): all_data = all_data[:10] @@ -368,13 +367,13 @@ def test_astype(self, all_data): s = pd.Series(ints) result = s.astype(all_data.dtype) expected = pd.Series(ints) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same other - ints s = pd.Series(ints) result = s.astype(dtype) expected = pd.Series(ints, dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - ints s = pd.Series(ints) @@ -387,13 +386,13 @@ def test_astype(self, all_data): s = pd.Series(mixed) result = s.astype(all_data.dtype) expected = pd.Series(mixed) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same other - mixed s = pd.Series(mixed) result = s.astype(dtype) expected = pd.Series(mixed, dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - mixed s = pd.Series(mixed) @@ -411,12 +410,12 @@ def test_astype_specific_casting(self, dtype): s = pd.Series([1, 2, 3], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype='Int8') - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype='Int8') - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_construct_cast_invalid(self, dtype): From 1c40fd7b4cd3b9c9fc668745453a6b9a1414db53 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 24 Jul 2018 11:17:45 +0200 Subject: [PATCH 04/13] fix py2 --- pandas/tests/extension/integer/test_integer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py index 5745248ee3564..b5bdd02d516b4 100644 --- a/pandas/tests/extension/integer/test_integer.py +++ b/pandas/tests/extension/integer/test_integer.py @@ -143,7 +143,10 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError): result = op(s, other) expected = s.combine(other, op) - if op_name in ('__rtruediv__', '__truediv__'): + if op_name == '__rdiv__': + # combine is not giving the correct result for this case + pytest.skip("skipping reverse div in python 2") + elif op_name in ('__rtruediv__', '__truediv__', '__div__'): expected = expected.astype(float) if op_name == '__rtruediv__': # TODO reverse operators result in object dtype From facfc445f7861641eb888f07cfa1fcb5d44d1ffc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 24 Jul 2018 13:12:41 +0200 Subject: [PATCH 05/13] linter --- pandas/tests/arrays/test_integer.py | 6 +++--- pandas/tests/arrays/test_interval.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 82c1d40cdd4d8..9b74b33d21d12 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -59,7 +59,7 @@ def test_dtypes(dtype): assert dtype.name is not None -class TestInterface: +class TestInterface(object): def test_repr_array(self, data): result = repr(data) @@ -79,7 +79,7 @@ def test_repr_array_long(self, data): assert 'length' in result -class TestConstructors: +class TestConstructors(object): def test_from_dtype_from_float(self, data): # construct from our dtype & string dtype @@ -318,7 +318,7 @@ def test_compare_array(self, data, all_compare_operators): self._compare_other(s, data, op_name, other) -class TestCasting: +class TestCasting(object): pass @pytest.mark.parametrize('dropna', [True, False]) diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/test_interval.py index 5e57679a76b60..bcf4cea795978 100644 --- a/pandas/tests/arrays/test_interval.py +++ b/pandas/tests/arrays/test_interval.py @@ -23,7 +23,7 @@ def left_right_dtypes(request): return request.param -class TestMethods: +class TestMethods(object): @pytest.mark.parametrize('repeats', [0, 1, 5]) def test_repeat(self, left_right_dtypes, repeats): @@ -51,7 +51,7 @@ def test_set_closed(self, closed, new_closed): tm.assert_extension_array_equal(result, expected) -class TestSetitem: +class TestSetitem(object): def test_set_na(self, left_right_dtypes): left, right = left_right_dtypes From 6edec385ec6453091529f11773485aec22784c14 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 26 Jul 2018 13:29:06 +0200 Subject: [PATCH 06/13] move test_integer/test_interval/test_categorical to main tests/extension dir instead of each in own subdir --- pandas/tests/extension/category/__init__.py | 0 pandas/tests/extension/integer/__init__.py | 0 pandas/tests/extension/interval/__init__.py | 0 pandas/tests/extension/{category => }/test_categorical.py | 0 pandas/tests/extension/{integer => }/test_integer.py | 0 pandas/tests/extension/{interval => }/test_interval.py | 0 6 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 pandas/tests/extension/category/__init__.py delete mode 100644 pandas/tests/extension/integer/__init__.py delete mode 100644 pandas/tests/extension/interval/__init__.py rename pandas/tests/extension/{category => }/test_categorical.py (100%) rename pandas/tests/extension/{integer => }/test_integer.py (100%) rename pandas/tests/extension/{interval => }/test_interval.py (100%) diff --git a/pandas/tests/extension/category/__init__.py b/pandas/tests/extension/category/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/interval/__init__.py b/pandas/tests/extension/interval/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/test_categorical.py similarity index 100% rename from pandas/tests/extension/category/test_categorical.py rename to pandas/tests/extension/test_categorical.py diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/test_integer.py similarity index 100% rename from pandas/tests/extension/integer/test_integer.py rename to pandas/tests/extension/test_integer.py diff --git a/pandas/tests/extension/interval/test_interval.py b/pandas/tests/extension/test_interval.py similarity index 100% rename from pandas/tests/extension/interval/test_interval.py rename to pandas/tests/extension/test_interval.py From a17deda7ef289fe4cc68dffc4d2b373af48f4324 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 21 Aug 2018 17:24:36 +0200 Subject: [PATCH 07/13] get tests passing --- pandas/core/arrays/integer.py | 3 +-- pandas/tests/extension/test_integer.py | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 3dffabbe473d3..5f6a96833c4f8 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -409,8 +409,7 @@ def astype(self, dtype, copy=True): # if we are astyping to an existing IntegerDtype we can fastpath if isinstance(dtype, _IntegerDtype): - result = self._data.astype(dtype.numpy_dtype, - casting='same_kind', copy=False) + result = self._data.astype(dtype.numpy_dtype, copy=False) return type(self)(result, mask=self._mask, copy=False) # coerce diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index fd197b3eb04fe..c8fbd19524dcf 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -4,6 +4,7 @@ import pytest from pandas.tests.extension import base +from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.arrays import IntegerArray, integer_array from pandas.core.arrays.integer import ( @@ -140,6 +141,13 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError): # TODO see https://github.com/pandas-dev/pandas/issues/22023 pytest.skip("unsigned subtraction gives negative values") + if (hasattr(other, 'dtype') + and not is_extension_array_dtype(other.dtype) + and pd.api.types.is_integer_dtype(other.dtype)): + # other is np.int64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(s.dtype.numpy_dtype) + result = op(s, other) expected = s.combine(other, op) From 8cf71e9a7b63fe86e3f957d99640eed5d03398d3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 Aug 2018 16:42:25 +0200 Subject: [PATCH 08/13] add module docstring to each test file --- pandas/tests/extension/test_categorical.py | 15 +++++++++++++++ pandas/tests/extension/test_integer.py | 15 +++++++++++++++ pandas/tests/extension/test_interval.py | 15 +++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index f8a2f09065dbc..b8c73a9efdae8 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -1,3 +1,18 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" import string import pytest diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index c8fbd19524dcf..539a965c331b2 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -1,3 +1,18 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" import numpy as np import pandas as pd import pandas.util.testing as tm diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index c4606e85cfb3f..625619a90ed4c 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -1,3 +1,18 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" import pytest import numpy as np From edcb3dad0e116b81396f29fbb0bacc77c3e6c710 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 23 Aug 2018 10:38:00 +0200 Subject: [PATCH 09/13] fix rebase mistake --- pandas/tests/arrays/test_integer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 7ae3969e5b07a..5c336d6df738c 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -410,12 +410,12 @@ def test_astype_specific_casting(self, dtype): s = pd.Series([1, 2, 3], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_construct_cast_invalid(self, dtype): From 96a9a8bc17264957088430f68d738d53e6959d32 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 23 Aug 2018 13:53:06 +0200 Subject: [PATCH 10/13] absolute import --- pandas/tests/arrays/test_integer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 5c336d6df738c..349a6aee5701e 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -13,7 +13,7 @@ Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) -from ..extension.base import BaseOpsUtil +from pandas.tests.extension.base import BaseOpsUtil def make_data(): From 700c4051547f1eb77657d6932720974e28c730bb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 23 Aug 2018 13:57:31 +0200 Subject: [PATCH 11/13] remove BaseInteger with assert overwrites --- pandas/tests/extension/test_integer.py | 65 +++++--------------------- 1 file changed, 11 insertions(+), 54 deletions(-) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 539a965c331b2..40f36e96b2123 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -89,50 +89,7 @@ def data_for_grouping(dtype): return integer_array([b, b, na, na, a, a, b, c], dtype=dtype) -class BaseInteger(object): - - def assert_index_equal(self, left, right, *args, **kwargs): - - left_na = left.isna() - right_na = right.isna() - - tm.assert_numpy_array_equal(left_na, right_na) - return tm.assert_index_equal(left[~left_na], - right[~right_na], - *args, **kwargs) - - def assert_series_equal(self, left, right, *args, **kwargs): - - left_na = left.isna() - right_na = right.isna() - - tm.assert_series_equal(left_na, right_na) - return tm.assert_series_equal(left[~left_na], - right[~right_na], - *args, **kwargs) - - def assert_frame_equal(self, left, right, *args, **kwargs): - # TODO(EA): select_dtypes - tm.assert_index_equal( - left.columns, right.columns, - exact=kwargs.get('check_column_type', 'equiv'), - check_names=kwargs.get('check_names', True), - check_exact=kwargs.get('check_exact', False), - check_categorical=kwargs.get('check_categorical', True), - obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame'))) - - integers = (left.dtypes == 'integer').index - - for col in integers: - self.assert_series_equal(left[col], right[col], - *args, **kwargs) - - left = left.drop(columns=integers) - right = right.drop(columns=integers) - tm.assert_frame_equal(left, right, *args, **kwargs) - - -class TestDtype(BaseInteger, base.BaseDtypeTests): +class TestDtype(base.BaseDtypeTests): @pytest.mark.skip(reason="using multiple dtypes") def test_is_dtype_unboxes_dtype(self): @@ -143,7 +100,7 @@ def test_array_type_with_arg(self, data, dtype): assert dtype.construct_array_type() is IntegerArray -class TestArithmeticOps(BaseInteger, base.BaseArithmeticOpsTests): +class TestArithmeticOps(base.BaseArithmeticOpsTests): def check_opname(self, s, op_name, other, exc=None): # overwriting to indicate ops don't raise an error @@ -202,7 +159,7 @@ def test_error(self, data, all_arithmetic_operators): pass -class TestComparisonOps(BaseInteger, base.BaseComparisonOpsTests): +class TestComparisonOps(base.BaseComparisonOpsTests): def check_opname(self, s, op_name, other, exc=None): super(TestComparisonOps, self).check_opname(s, op_name, @@ -212,15 +169,15 @@ def _compare_other(self, s, data, op_name, other): self.check_opname(s, op_name, other) -class TestInterface(BaseInteger, base.BaseInterfaceTests): +class TestInterface(base.BaseInterfaceTests): pass -class TestConstructors(BaseInteger, base.BaseConstructorsTests): +class TestConstructors(base.BaseConstructorsTests): pass -class TestReshaping(BaseInteger, base.BaseReshapingTests): +class TestReshaping(base.BaseReshapingTests): pass # for test_concat_mixed_dtypes test @@ -228,15 +185,15 @@ class TestReshaping(BaseInteger, base.BaseReshapingTests): # TODO(jreback) once integrated this would -class TestGetitem(BaseInteger, base.BaseGetitemTests): +class TestGetitem(base.BaseGetitemTests): pass -class TestMissing(BaseInteger, base.BaseMissingTests): +class TestMissing(base.BaseMissingTests): pass -class TestMethods(BaseInteger, base.BaseMethodsTests): +class TestMethods(base.BaseMethodsTests): @pytest.mark.parametrize('dropna', [True, False]) def test_value_counts(self, all_data, dropna): @@ -254,11 +211,11 @@ def test_value_counts(self, all_data, dropna): self.assert_series_equal(result, expected) -class TestCasting(BaseInteger, base.BaseCastingTests): +class TestCasting(base.BaseCastingTests): pass -class TestGroupby(BaseInteger, base.BaseGroupbyTests): +class TestGroupby(base.BaseGroupbyTests): @pytest.mark.xfail(reason="groupby not working", strict=True) def test_groupby_extension_no_sort(self, data_for_grouping): From c28cab91f0125dac7233ee7d0355bc20da271356 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 23 Aug 2018 16:26:52 +0200 Subject: [PATCH 12/13] fix linter --- pandas/tests/extension/test_integer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 40f36e96b2123..4b27fdde7e2c0 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -15,7 +15,6 @@ """ import numpy as np import pandas as pd -import pandas.util.testing as tm import pytest from pandas.tests.extension import base From db2836c2be2bf7a8cd352449ffc02e5664b4e721 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 5 Sep 2018 14:46:34 +0200 Subject: [PATCH 13/13] pep8 --- pandas/tests/extension/test_integer.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 4b27fdde7e2c0..50c0e6dd8b347 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -27,11 +27,8 @@ def make_data(): - return (list(range(1, 9)) + - [np.nan] + - list(range(10, 98)) + - [np.nan] + - [99, 100]) + return (list(range(1, 9)) + [np.nan] + list(range(10, 98)) + + [np.nan] + [99, 100]) @pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,