From 5c6b7b4def271709f19bbb4b5e2b6d0b935eb143 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 7 Dec 2019 13:19:19 +0100 Subject: [PATCH] split test_base into multiple files --- pandas/core/base.py | 2 +- pandas/tests/base/__init__.py | 0 pandas/tests/base/test_construction.py | 142 +++++ pandas/tests/base/test_conversion.py | 403 ++++++++++++ .../tests/{test_base.py => base/test_ops.py} | 572 +----------------- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/period/test_ops.py | 2 +- pandas/tests/indexes/timedeltas/test_ops.py | 2 +- pandas/tests/series/test_analytics.py | 6 +- 9 files changed, 574 insertions(+), 557 deletions(-) create mode 100644 pandas/tests/base/__init__.py create mode 100644 pandas/tests/base/test_construction.py create mode 100644 pandas/tests/base/test_conversion.py rename pandas/tests/{test_base.py => base/test_ops.py} (66%) diff --git a/pandas/core/base.py b/pandas/core/base.py index b7216d2a70ee6..88b8fe405c8e4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -93,7 +93,7 @@ class NoNewAttributesMixin: Prevents additional attributes via xxx.attribute = "something" after a call to `self.__freeze()`. Mainly used to prevent the user from using - wrong attributes on a accessor (`Series.cat/.str/.dt`). + wrong attributes on an accessor (`Series.cat/.str/.dt`). If you really want to add a new attribute at a later time, you need to use `object.__setattr__(self, key, value)`. diff --git a/pandas/tests/base/__init__.py b/pandas/tests/base/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/base/test_construction.py b/pandas/tests/base/test_construction.py new file mode 100644 index 0000000000000..a9e0473ac067a --- /dev/null +++ b/pandas/tests/base/test_construction.py @@ -0,0 +1,142 @@ +from datetime import datetime +import sys + +import numpy as np +import pytest + +from pandas.compat import PYPY + +import pandas as pd +from pandas import DataFrame, Index, Series +from pandas.core.accessor import PandasDelegate +from pandas.core.base import NoNewAttributesMixin, PandasObject +import pandas.util.testing as tm + + +class TestPandasDelegate: + class Delegator: + _properties = ["foo"] + _methods = ["bar"] + + def _set_foo(self, value): + self.foo = value + + def _get_foo(self): + return self.foo + + foo = property(_get_foo, _set_foo, doc="foo property") + + def bar(self, *args, **kwargs): + """ a test bar method """ + pass + + class Delegate(PandasDelegate, PandasObject): + def __init__(self, obj): + self.obj = obj + + def setup_method(self, method): + pass + + def test_invalid_delegation(self): + # these show that in order for the delegation to work + # the _delegate_* methods need to be overridden to not raise + # a TypeError + + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, + accessors=self.Delegator._properties, + typ="property", + ) + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, accessors=self.Delegator._methods, typ="method" + ) + + delegate = self.Delegate(self.Delegator()) + + with pytest.raises(TypeError): + delegate.foo + + with pytest.raises(TypeError): + delegate.foo = 5 + + with pytest.raises(TypeError): + delegate.foo() + + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") + def test_memory_usage(self): + # Delegate does not implement memory_usage. + # Check that we fall back to in-built `__sizeof__` + # GH 12924 + delegate = self.Delegate(self.Delegator()) + sys.getsizeof(delegate) + + +class TestNoNewAttributesMixin: + def test_mixin(self): + class T(NoNewAttributesMixin): + pass + + t = T() + assert not hasattr(t, "__frozen") + + t.a = "test" + assert t.a == "test" + + t._freeze() + assert "__frozen" in dir(t) + assert getattr(t, "__frozen") + + with pytest.raises(AttributeError): + t.b = "test" + + assert not hasattr(t, "b") + + +class TestConstruction: + # test certain constructor behaviours on dtype inference across Series, + # Index and DataFrame + + @pytest.mark.parametrize( + "klass", + [ + Series, + lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], + pytest.param( + lambda x, **kwargs: DataFrame(x, **kwargs)[0], marks=pytest.mark.xfail + ), + Index, + ], + ) + @pytest.mark.parametrize( + "a", + [ + np.array(["2263-01-01"], dtype="datetime64[D]"), + np.array([datetime(2263, 1, 1)], dtype=object), + np.array([np.datetime64("2263-01-01", "D")], dtype=object), + np.array(["2263-01-01"], dtype=object), + ], + ids=[ + "datetime64[D]", + "object-datetime.datetime", + "object-numpy-scalar", + "object-string", + ], + ) + def test_constructor_datetime_outofbound(self, a, klass): + # GH-26853 (+ bug GH-26206 out of bound non-ns unit) + + # No dtype specified (dtype inference) + # datetime64[non-ns] raise error, other cases result in object dtype + # and preserve original data + if a.dtype.kind == "M": + with pytest.raises(pd.errors.OutOfBoundsDatetime): + klass(a) + else: + result = klass(a) + assert result.dtype == "object" + tm.assert_numpy_array_equal(result.to_numpy(), a) + + # Explicit dtype specified + # Forced conversion fails for all -> all cases raise error + with pytest.raises(pd.errors.OutOfBoundsDatetime): + klass(a, dtype="datetime64[ns]") diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py new file mode 100644 index 0000000000000..8fa52af832907 --- /dev/null +++ b/pandas/tests/base/test_conversion.py @@ -0,0 +1,403 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_datetime64_dtype, is_timedelta64_dtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import CategoricalIndex, Series, Timedelta, Timestamp +from pandas.core.arrays import DatetimeArray, PandasArray, TimedeltaArray +import pandas.util.testing as tm + + +class TestToIterable: + # test that we convert an iterable to python types + + dtypes = [ + ("int8", int), + ("int16", int), + ("int32", int), + ("int64", int), + ("uint8", int), + ("uint16", int), + ("uint32", int), + ("uint64", int), + ("float16", float), + ("float32", float), + ("float64", float), + ("datetime64[ns]", Timestamp), + ("datetime64[ns, US/Eastern]", Timestamp), + ("timedelta64[ns]", Timedelta), + ] + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") + # TODO(GH-24559): Remove the filterwarnings + def test_iterable(self, index_or_series, method, dtype, rdtype): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype, obj", + [ + ("object", object, "a"), + ("object", int, 1), + ("category", object, "a"), + ("category", int, 1), + ], + ) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_iterable_object_and_category( + self, index_or_series, method, dtype, rdtype, obj + ): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([obj], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + def test_iterable_items(self, dtype, rdtype): + # gh-13258 + # test if items yields the correct boxed scalars + # this only applies to series + s = Series([1], dtype=dtype) + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype", dtypes + [("object", int), ("category", int)] + ) + @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") + # TODO(GH-24559): Remove the filterwarnings + def test_iterable_map(self, index_or_series, dtype, rdtype): + # gh-13236 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = s.map(type)[0] + if not isinstance(rdtype, tuple): + rdtype = tuple([rdtype]) + assert result in rdtype + + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_categorial_datetimelike(self, method): + i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")]) + + result = method(i)[0] + assert isinstance(result, Timestamp) + + def test_iter_box(self): + vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz is None + assert res == exp + + vals = [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + + assert s.dtype == "datetime64[ns, US/Eastern]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz == exp.tz + assert res == exp + + # timedelta + vals = [Timedelta("1 days"), Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timedelta) + assert res == exp + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + for res, exp in zip(s, vals): + assert isinstance(res, pd.Period) + assert res.freq == "M" + assert res == exp + + +@pytest.mark.parametrize( + "array, expected_type, dtype", + [ + (np.array([0, 1], dtype=np.int64), np.ndarray, "int64"), + (np.array(["a", "b"]), np.ndarray, "object"), + (pd.Categorical(["a", "b"]), pd.Categorical, "category"), + ( + pd.DatetimeIndex(["2017", "2018"], tz="US/Central"), + DatetimeArray, + "datetime64[ns, US/Central]", + ), + ( + pd.PeriodIndex([2018, 2019], freq="A"), + pd.core.arrays.PeriodArray, + pd.core.dtypes.dtypes.PeriodDtype("A-DEC"), + ), + ( + pd.IntervalIndex.from_breaks([0, 1, 2]), + pd.core.arrays.IntervalArray, + "interval", + ), + # This test is currently failing for datetime64[ns] and timedelta64[ns]. + # The NumPy type system is sufficient for representing these types, so + # we just use NumPy for Series / DataFrame columns of these types (so + # we get consolidation and so on). + # However, DatetimeIndex and TimedeltaIndex use the DateLikeArray + # abstraction to for code reuse. + # At the moment, we've judged that allowing this test to fail is more + # practical that overriding Series._values to special case + # Series[M8[ns]] and Series[m8[ns]] to return a DateLikeArray. + pytest.param( + pd.DatetimeIndex(["2017", "2018"]), + np.ndarray, + "datetime64[ns]", + marks=[pytest.mark.xfail(reason="datetime _values", strict=True)], + ), + pytest.param( + pd.TimedeltaIndex([10 ** 10]), + np.ndarray, + "m8[ns]", + marks=[pytest.mark.xfail(reason="timedelta _values", strict=True)], + ), + ], +) +def test_values_consistent(array, expected_type, dtype): + l_values = pd.Series(array)._values + r_values = pd.Index(array)._values + assert type(l_values) is expected_type + assert type(l_values) is type(r_values) + + tm.assert_equal(l_values, r_values) + + +@pytest.mark.parametrize( + "array, expected", + [ + (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)), + (np.array(["0", "1"]), np.array(["0", "1"], dtype=object)), + (pd.Categorical(["a", "a"]), np.array([0, 0], dtype="int8")), + ( + pd.DatetimeIndex(["2017-01-01T00:00:00"]), + np.array(["2017-01-01T00:00:00"], dtype="M8[ns]"), + ), + ( + pd.DatetimeIndex(["2017-01-01T00:00:00"], tz="US/Eastern"), + np.array(["2017-01-01T05:00:00"], dtype="M8[ns]"), + ), + (pd.TimedeltaIndex([10 ** 10]), np.array([10 ** 10], dtype="m8[ns]")), + ( + pd.PeriodIndex(["2017", "2018"], freq="D"), + np.array([17167, 17532], dtype=np.int64), + ), + ], +) +def test_ndarray_values(array, expected): + l_values = pd.Series(array)._ndarray_values + r_values = pd.Index(array)._ndarray_values + tm.assert_numpy_array_equal(l_values, r_values) + tm.assert_numpy_array_equal(l_values, expected) + + +@pytest.mark.parametrize("arr", [np.array([1, 2, 3])]) +def test_numpy_array(arr): + ser = pd.Series(arr) + result = ser.array + expected = PandasArray(arr) + tm.assert_extension_array_equal(result, expected) + + +def test_numpy_array_all_dtypes(any_numpy_dtype): + ser = pd.Series(dtype=any_numpy_dtype) + result = ser.array + if is_datetime64_dtype(any_numpy_dtype): + assert isinstance(result, DatetimeArray) + elif is_timedelta64_dtype(any_numpy_dtype): + assert isinstance(result, TimedeltaArray) + else: + assert isinstance(result, PandasArray) + + +@pytest.mark.parametrize( + "array, attr", + [ + (pd.Categorical(["a", "b"]), "_codes"), + (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), + (pd.core.arrays.integer_array([0, np.nan]), "_data"), + (pd.core.arrays.IntervalArray.from_breaks([0, 1]), "_left"), + (pd.SparseArray([0, 1]), "_sparse_values"), + (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), + # tz-aware Datetime + ( + DatetimeArray( + np.array( + ["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + "_data", + ), + ], +) +def test_array(array, attr, index_or_series): + box = index_or_series + if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: + pytest.skip("No index type for {}".format(array.dtype)) + result = box(array, copy=False).array + + if attr: + array = getattr(array, attr) + result = getattr(result, attr) + + assert result is array + + +def test_array_multiindex_raises(): + idx = pd.MultiIndex.from_product([["A"], ["a", "b"]]) + with pytest.raises(ValueError, match="MultiIndex"): + idx.array + + +@pytest.mark.parametrize( + "array, expected", + [ + (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)), + (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)), + ( + pd.core.arrays.period_array(["2000", "2001"], freq="D"), + np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]), + ), + ( + pd.core.arrays.integer_array([0, np.nan]), + np.array([0, np.nan], dtype=object), + ), + ( + pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]), + np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), + ), + (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), + # tz-naive datetime + ( + DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")), + np.array(["2000", "2001"], dtype="M8[ns]"), + ), + # tz-aware stays tz`-aware + ( + DatetimeArray( + np.array( + ["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + np.array( + [ + pd.Timestamp("2000-01-01", tz="US/Central"), + pd.Timestamp("2000-01-02", tz="US/Central"), + ] + ), + ), + # Timedelta + ( + TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"), + np.array([0, 3600000000000], dtype="m8[ns]"), + ), + ], +) +def test_to_numpy(array, expected, index_or_series): + box = index_or_series + thing = box(array) + + if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: + pytest.skip("No index type for {}".format(array.dtype)) + + result = thing.to_numpy() + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("as_series", [True, False]) +@pytest.mark.parametrize( + "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] +) +def test_to_numpy_copy(arr, as_series): + obj = pd.Index(arr, copy=False) + if as_series: + obj = pd.Series(obj.values, copy=False) + + # no copy by default + result = obj.to_numpy() + assert np.shares_memory(arr, result) is True + + result = obj.to_numpy(copy=False) + assert np.shares_memory(arr, result) is True + + # copy=True + result = obj.to_numpy(copy=True) + assert np.shares_memory(arr, result) is False + + +@pytest.mark.parametrize("as_series", [True, False]) +def test_to_numpy_dtype(as_series): + tz = "US/Eastern" + obj = pd.DatetimeIndex(["2000", "2001"], tz=tz) + if as_series: + obj = pd.Series(obj) + + # preserve tz by default + result = obj.to_numpy() + expected = np.array( + [pd.Timestamp("2000", tz=tz), pd.Timestamp("2001", tz=tz)], dtype=object + ) + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="M8[ns]") + expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/test_base.py b/pandas/tests/base/test_ops.py similarity index 66% rename from pandas/tests/test_base.py rename to pandas/tests/base/test_ops.py index 6e5f5b729d102..bcd6b931a0f85 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/base/test_ops.py @@ -13,14 +13,11 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_object_dtype, - is_timedelta64_dtype, needs_i8_conversion, ) -from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd from pandas import ( - CategoricalIndex, DataFrame, DatetimeIndex, Index, @@ -32,71 +29,10 @@ TimedeltaIndex, Timestamp, ) -from pandas.core.accessor import PandasDelegate -from pandas.core.arrays import DatetimeArray, PandasArray, TimedeltaArray -from pandas.core.base import NoNewAttributesMixin, PandasObject from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin import pandas.util.testing as tm -class TestPandasDelegate: - class Delegator: - _properties = ["foo"] - _methods = ["bar"] - - def _set_foo(self, value): - self.foo = value - - def _get_foo(self): - return self.foo - - foo = property(_get_foo, _set_foo, doc="foo property") - - def bar(self, *args, **kwargs): - """ a test bar method """ - pass - - class Delegate(PandasDelegate, PandasObject): - def __init__(self, obj): - self.obj = obj - - def setup_method(self, method): - pass - - def test_invalid_delegation(self): - # these show that in order for the delegation to work - # the _delegate_* methods need to be overridden to not raise - # a TypeError - - self.Delegate._add_delegate_accessors( - delegate=self.Delegator, - accessors=self.Delegator._properties, - typ="property", - ) - self.Delegate._add_delegate_accessors( - delegate=self.Delegator, accessors=self.Delegator._methods, typ="method" - ) - - delegate = self.Delegate(self.Delegator()) - - with pytest.raises(TypeError): - delegate.foo - - with pytest.raises(TypeError): - delegate.foo = 5 - - with pytest.raises(TypeError): - delegate.foo() - - @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") - def test_memory_usage(self): - # Delegate does not implement memory_usage. - # Check that we fall back to in-built `__sizeof__` - # GH 12924 - delegate = self.Delegate(self.Delegator()) - sys.getsizeof(delegate) - - class Ops: def _allow_na_ops(self, obj): """Whether to skip test cases including NaN""" @@ -217,6 +153,28 @@ def test_binary_ops_docs(self, klass): assert expected_str in getattr(klass, "r" + op_name).__doc__ +class TestTranspose(Ops): + errmsg = "the 'axes' parameter is not supported" + + def test_transpose(self): + for obj in self.objs: + tm.assert_equal(obj.transpose(), obj) + + def test_transpose_non_default_axes(self): + for obj in self.objs: + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(1) + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(axes=1) + + def test_numpy_transpose(self): + for obj in self.objs: + tm.assert_equal(np.transpose(obj), obj) + + with pytest.raises(ValueError, match=self.errmsg): + np.transpose(obj, axes=1) + + class TestIndexOps(Ops): def setup_method(self, method): super().setup_method(method) @@ -943,489 +901,3 @@ def test_get_indexer_non_unique_dtype_mismatch(self): indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0])) tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing) - - -class TestTranspose(Ops): - errmsg = "the 'axes' parameter is not supported" - - def test_transpose(self): - for obj in self.objs: - tm.assert_equal(obj.transpose(), obj) - - def test_transpose_non_default_axes(self): - for obj in self.objs: - with pytest.raises(ValueError, match=self.errmsg): - obj.transpose(1) - with pytest.raises(ValueError, match=self.errmsg): - obj.transpose(axes=1) - - def test_numpy_transpose(self): - for obj in self.objs: - tm.assert_equal(np.transpose(obj), obj) - - with pytest.raises(ValueError, match=self.errmsg): - np.transpose(obj, axes=1) - - -class TestNoNewAttributesMixin: - def test_mixin(self): - class T(NoNewAttributesMixin): - pass - - t = T() - assert not hasattr(t, "__frozen") - - t.a = "test" - assert t.a == "test" - - t._freeze() - assert "__frozen" in dir(t) - assert getattr(t, "__frozen") - - with pytest.raises(AttributeError): - t.b = "test" - - assert not hasattr(t, "b") - - -class TestToIterable: - # test that we convert an iterable to python types - - dtypes = [ - ("int8", int), - ("int16", int), - ("int32", int), - ("int64", int), - ("uint8", int), - ("uint16", int), - ("uint32", int), - ("uint64", int), - ("float16", float), - ("float32", float), - ("float64", float), - ("datetime64[ns]", Timestamp), - ("datetime64[ns, US/Eastern]", Timestamp), - ("timedelta64[ns]", Timedelta), - ] - - @pytest.mark.parametrize("dtype, rdtype", dtypes) - @pytest.mark.parametrize( - "method", - [ - lambda x: x.tolist(), - lambda x: x.to_list(), - lambda x: list(x), - lambda x: list(x.__iter__()), - ], - ids=["tolist", "to_list", "list", "iter"], - ) - @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") - # TODO(GH-24559): Remove the filterwarnings - def test_iterable(self, index_or_series, method, dtype, rdtype): - # gh-10904 - # gh-13258 - # coerce iteration to underlying python / pandas types - typ = index_or_series - s = typ([1], dtype=dtype) - result = method(s)[0] - assert isinstance(result, rdtype) - - @pytest.mark.parametrize( - "dtype, rdtype, obj", - [ - ("object", object, "a"), - ("object", int, 1), - ("category", object, "a"), - ("category", int, 1), - ], - ) - @pytest.mark.parametrize( - "method", - [ - lambda x: x.tolist(), - lambda x: x.to_list(), - lambda x: list(x), - lambda x: list(x.__iter__()), - ], - ids=["tolist", "to_list", "list", "iter"], - ) - def test_iterable_object_and_category( - self, index_or_series, method, dtype, rdtype, obj - ): - # gh-10904 - # gh-13258 - # coerce iteration to underlying python / pandas types - typ = index_or_series - s = typ([obj], dtype=dtype) - result = method(s)[0] - assert isinstance(result, rdtype) - - @pytest.mark.parametrize("dtype, rdtype", dtypes) - def test_iterable_items(self, dtype, rdtype): - # gh-13258 - # test if items yields the correct boxed scalars - # this only applies to series - s = Series([1], dtype=dtype) - _, result = list(s.items())[0] - assert isinstance(result, rdtype) - - _, result = list(s.items())[0] - assert isinstance(result, rdtype) - - @pytest.mark.parametrize( - "dtype, rdtype", dtypes + [("object", int), ("category", int)] - ) - @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") - # TODO(GH-24559): Remove the filterwarnings - def test_iterable_map(self, index_or_series, dtype, rdtype): - # gh-13236 - # coerce iteration to underlying python / pandas types - typ = index_or_series - s = typ([1], dtype=dtype) - result = s.map(type)[0] - if not isinstance(rdtype, tuple): - rdtype = tuple([rdtype]) - assert result in rdtype - - @pytest.mark.parametrize( - "method", - [ - lambda x: x.tolist(), - lambda x: x.to_list(), - lambda x: list(x), - lambda x: list(x.__iter__()), - ], - ids=["tolist", "to_list", "list", "iter"], - ) - def test_categorial_datetimelike(self, method): - i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")]) - - result = method(i)[0] - assert isinstance(result, Timestamp) - - def test_iter_box(self): - vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] - s = Series(vals) - assert s.dtype == "datetime64[ns]" - for res, exp in zip(s, vals): - assert isinstance(res, Timestamp) - assert res.tz is None - assert res == exp - - vals = [ - Timestamp("2011-01-01", tz="US/Eastern"), - Timestamp("2011-01-02", tz="US/Eastern"), - ] - s = Series(vals) - - assert s.dtype == "datetime64[ns, US/Eastern]" - for res, exp in zip(s, vals): - assert isinstance(res, Timestamp) - assert res.tz == exp.tz - assert res == exp - - # timedelta - vals = [Timedelta("1 days"), Timedelta("2 days")] - s = Series(vals) - assert s.dtype == "timedelta64[ns]" - for res, exp in zip(s, vals): - assert isinstance(res, Timedelta) - assert res == exp - - # period - vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] - s = Series(vals) - assert s.dtype == "Period[M]" - for res, exp in zip(s, vals): - assert isinstance(res, pd.Period) - assert res.freq == "M" - assert res == exp - - -@pytest.mark.parametrize( - "array, expected_type, dtype", - [ - (np.array([0, 1], dtype=np.int64), np.ndarray, "int64"), - (np.array(["a", "b"]), np.ndarray, "object"), - (pd.Categorical(["a", "b"]), pd.Categorical, "category"), - ( - pd.DatetimeIndex(["2017", "2018"], tz="US/Central"), - DatetimeArray, - "datetime64[ns, US/Central]", - ), - ( - pd.PeriodIndex([2018, 2019], freq="A"), - pd.core.arrays.PeriodArray, - pd.core.dtypes.dtypes.PeriodDtype("A-DEC"), - ), - ( - pd.IntervalIndex.from_breaks([0, 1, 2]), - pd.core.arrays.IntervalArray, - "interval", - ), - # This test is currently failing for datetime64[ns] and timedelta64[ns]. - # The NumPy type system is sufficient for representing these types, so - # we just use NumPy for Series / DataFrame columns of these types (so - # we get consolidation and so on). - # However, DatetimeIndex and TimedeltaIndex use the DateLikeArray - # abstraction to for code reuse. - # At the moment, we've judged that allowing this test to fail is more - # practical that overriding Series._values to special case - # Series[M8[ns]] and Series[m8[ns]] to return a DateLikeArray. - pytest.param( - pd.DatetimeIndex(["2017", "2018"]), - np.ndarray, - "datetime64[ns]", - marks=[pytest.mark.xfail(reason="datetime _values", strict=True)], - ), - pytest.param( - pd.TimedeltaIndex([10 ** 10]), - np.ndarray, - "m8[ns]", - marks=[pytest.mark.xfail(reason="timedelta _values", strict=True)], - ), - ], -) -def test_values_consistent(array, expected_type, dtype): - l_values = pd.Series(array)._values - r_values = pd.Index(array)._values - assert type(l_values) is expected_type - assert type(l_values) is type(r_values) - - tm.assert_equal(l_values, r_values) - - -@pytest.mark.parametrize( - "array, expected", - [ - (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)), - (np.array(["0", "1"]), np.array(["0", "1"], dtype=object)), - (pd.Categorical(["a", "a"]), np.array([0, 0], dtype="int8")), - ( - pd.DatetimeIndex(["2017-01-01T00:00:00"]), - np.array(["2017-01-01T00:00:00"], dtype="M8[ns]"), - ), - ( - pd.DatetimeIndex(["2017-01-01T00:00:00"], tz="US/Eastern"), - np.array(["2017-01-01T05:00:00"], dtype="M8[ns]"), - ), - (pd.TimedeltaIndex([10 ** 10]), np.array([10 ** 10], dtype="m8[ns]")), - ( - pd.PeriodIndex(["2017", "2018"], freq="D"), - np.array([17167, 17532], dtype=np.int64), - ), - ], -) -def test_ndarray_values(array, expected): - l_values = pd.Series(array)._ndarray_values - r_values = pd.Index(array)._ndarray_values - tm.assert_numpy_array_equal(l_values, r_values) - tm.assert_numpy_array_equal(l_values, expected) - - -@pytest.mark.parametrize("arr", [np.array([1, 2, 3])]) -def test_numpy_array(arr): - ser = pd.Series(arr) - result = ser.array - expected = PandasArray(arr) - tm.assert_extension_array_equal(result, expected) - - -def test_numpy_array_all_dtypes(any_numpy_dtype): - ser = pd.Series(dtype=any_numpy_dtype) - result = ser.array - if is_datetime64_dtype(any_numpy_dtype): - assert isinstance(result, DatetimeArray) - elif is_timedelta64_dtype(any_numpy_dtype): - assert isinstance(result, TimedeltaArray) - else: - assert isinstance(result, PandasArray) - - -@pytest.mark.parametrize( - "array, attr", - [ - (pd.Categorical(["a", "b"]), "_codes"), - (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), - (pd.core.arrays.integer_array([0, np.nan]), "_data"), - (pd.core.arrays.IntervalArray.from_breaks([0, 1]), "_left"), - (pd.SparseArray([0, 1]), "_sparse_values"), - (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), - # tz-aware Datetime - ( - DatetimeArray( - np.array( - ["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]" - ), - dtype=DatetimeTZDtype(tz="US/Central"), - ), - "_data", - ), - ], -) -def test_array(array, attr, index_or_series): - box = index_or_series - if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: - pytest.skip("No index type for {}".format(array.dtype)) - result = box(array, copy=False).array - - if attr: - array = getattr(array, attr) - result = getattr(result, attr) - - assert result is array - - -def test_array_multiindex_raises(): - idx = pd.MultiIndex.from_product([["A"], ["a", "b"]]) - with pytest.raises(ValueError, match="MultiIndex"): - idx.array - - -@pytest.mark.parametrize( - "array, expected", - [ - (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)), - (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)), - ( - pd.core.arrays.period_array(["2000", "2001"], freq="D"), - np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]), - ), - ( - pd.core.arrays.integer_array([0, np.nan]), - np.array([0, np.nan], dtype=object), - ), - ( - pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]), - np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), - ), - (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), - # tz-naive datetime - ( - DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")), - np.array(["2000", "2001"], dtype="M8[ns]"), - ), - # tz-aware stays tz`-aware - ( - DatetimeArray( - np.array( - ["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]" - ), - dtype=DatetimeTZDtype(tz="US/Central"), - ), - np.array( - [ - pd.Timestamp("2000-01-01", tz="US/Central"), - pd.Timestamp("2000-01-02", tz="US/Central"), - ] - ), - ), - # Timedelta - ( - TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"), - np.array([0, 3600000000000], dtype="m8[ns]"), - ), - ], -) -def test_to_numpy(array, expected, index_or_series): - box = index_or_series - thing = box(array) - - if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: - pytest.skip("No index type for {}".format(array.dtype)) - - result = thing.to_numpy() - tm.assert_numpy_array_equal(result, expected) - - -@pytest.mark.parametrize("as_series", [True, False]) -@pytest.mark.parametrize( - "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] -) -def test_to_numpy_copy(arr, as_series): - obj = pd.Index(arr, copy=False) - if as_series: - obj = pd.Series(obj.values, copy=False) - - # no copy by default - result = obj.to_numpy() - assert np.shares_memory(arr, result) is True - - result = obj.to_numpy(copy=False) - assert np.shares_memory(arr, result) is True - - # copy=True - result = obj.to_numpy(copy=True) - assert np.shares_memory(arr, result) is False - - -@pytest.mark.parametrize("as_series", [True, False]) -def test_to_numpy_dtype(as_series): - tz = "US/Eastern" - obj = pd.DatetimeIndex(["2000", "2001"], tz=tz) - if as_series: - obj = pd.Series(obj) - - # preserve tz by default - result = obj.to_numpy() - expected = np.array( - [pd.Timestamp("2000", tz=tz), pd.Timestamp("2001", tz=tz)], dtype=object - ) - tm.assert_numpy_array_equal(result, expected) - - result = obj.to_numpy(dtype="object") - tm.assert_numpy_array_equal(result, expected) - - result = obj.to_numpy(dtype="M8[ns]") - expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]") - tm.assert_numpy_array_equal(result, expected) - - -class TestConstruction: - # test certain constructor behaviours on dtype inference across Series, - # Index and DataFrame - - @pytest.mark.parametrize( - "klass", - [ - Series, - lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], - pytest.param( - lambda x, **kwargs: DataFrame(x, **kwargs)[0], marks=pytest.mark.xfail - ), - Index, - ], - ) - @pytest.mark.parametrize( - "a", - [ - np.array(["2263-01-01"], dtype="datetime64[D]"), - np.array([datetime(2263, 1, 1)], dtype=object), - np.array([np.datetime64("2263-01-01", "D")], dtype=object), - np.array(["2263-01-01"], dtype=object), - ], - ids=[ - "datetime64[D]", - "object-datetime.datetime", - "object-numpy-scalar", - "object-string", - ], - ) - def test_constructor_datetime_outofbound(self, a, klass): - # GH-26853 (+ bug GH-26206 out of bound non-ns unit) - - # No dtype specified (dtype inference) - # datetime64[non-ns] raise error, other cases result in object dtype - # and preserve original data - if a.dtype.kind == "M": - with pytest.raises(pd.errors.OutOfBoundsDatetime): - klass(a) - else: - result = klass(a) - assert result.dtype == "object" - tm.assert_numpy_array_equal(result.to_numpy(), a) - - # Explicit dtype specified - # Forced conversion fails for all -> all cases raise error - with pytest.raises(pd.errors.OutOfBoundsDatetime): - klass(a, dtype="datetime64[ns]") diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c9c5963e5590c..5d3c541a4def6 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -16,7 +16,7 @@ bdate_range, date_range, ) -from pandas.tests.test_base import Ops +from pandas.tests.base.test_ops import Ops import pandas.util.testing as tm from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 6690a8207eb58..962e674fa607f 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -4,7 +4,7 @@ import pandas as pd from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series from pandas.core.arrays import PeriodArray -from pandas.tests.test_base import Ops +from pandas.tests.base.test_ops import Ops import pandas.util.testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index df448f4332d38..56043cf3edb2d 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -7,7 +7,7 @@ import pandas as pd from pandas import Series, TimedeltaIndex, timedelta_range -from pandas.tests.test_base import Ops +from pandas.tests.base.test_ops import Ops import pandas.util.testing as tm from pandas.tseries.offsets import Day, Hour diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c6c960910214a..9b4610e61e3d5 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1116,7 +1116,7 @@ def test_unstack(self): tm.assert_frame_equal(ts.unstack(level=0), right) def test_value_counts_datetime(self): - # most dtypes are tested in test_base.py + # most dtypes are tested in tests/base values = [ pd.Timestamp("2011-01-01 09:00"), pd.Timestamp("2011-01-01 10:00"), @@ -1192,7 +1192,7 @@ def test_value_counts_period(self): tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_categorical_ordered(self): - # most dtypes are tested in test_base.py + # most dtypes are tested in tests/base values = pd.Categorical([1, 2, 3, 1, 1, 3], ordered=True) exp_idx = pd.CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=True) @@ -1506,7 +1506,7 @@ def test_value_counts(self): tm.assert_series_equal(res, exp) # check object dtype handles the Series.name as the same - # (tested in test_base.py) + # (tested in tests/base) s = Series(["a", "b", "c", "c", "c", "b"], name="xxx") res = s.value_counts() exp = Series([3, 2, 1], name="xxx", index=["c", "b", "a"])