From 23696b248c22aa044b566bce37ddcd9878f0f9a6 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2020 10:16:44 -0500 Subject: [PATCH 1/9] DOC: Deprecate pandas.SparseArray --- .../development/contributing_docstring.rst | 2 +- doc/source/getting_started/basics.rst | 2 +- doc/source/getting_started/dsintro.rst | 2 +- doc/source/reference/arrays.rst | 4 ++-- doc/source/user_guide/sparse.rst | 16 ++++++++-------- doc/source/whatsnew/v1.0.0.rst | 1 + 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst index 34bc5f44eb0c0..d897889ed9eff 100644 --- a/doc/source/development/contributing_docstring.rst +++ b/doc/source/development/contributing_docstring.rst @@ -399,7 +399,7 @@ DataFrame: * DataFrame * pandas.Index * pandas.Categorical -* pandas.SparseArray +* pandas.arrays.SparseArray If the exact type is not relevant, but must be compatible with a numpy array, array-like can be specified. If Any type that can be iterated is diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst index f47fa48eb6202..4fef5efbd1551 100644 --- a/doc/source/getting_started/basics.rst +++ b/doc/source/getting_started/basics.rst @@ -1951,7 +1951,7 @@ documentation sections for more on each type. | period | :class:`PeriodDtype` | :class:`Period` | :class:`arrays.PeriodArray` | ``'period[]'``, | :ref:`timeseries.periods` | | (time spans) | | | | ``'Period[]'`` | | +-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ -| sparse | :class:`SparseDtype` | (none) | :class:`SparseArray` | ``'Sparse'``, ``'Sparse[int]'``, | :ref:`sparse` | +| sparse | :class:`SparseDtype` | (none) | :class:`arrays.SparseArray` | ``'Sparse'``, ``'Sparse[int]'``, | :ref:`sparse` | | | | | | ``'Sparse[float]'`` | | +-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ | intervals | :class:`IntervalDtype` | :class:`Interval` | :class:`arrays.IntervalArray` | ``'interval'``, ``'Interval'``, | :ref:`advanced.intervalindex` | diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst index a07fcbd8b67c4..82d4b5e34e4f8 100644 --- a/doc/source/getting_started/dsintro.rst +++ b/doc/source/getting_started/dsintro.rst @@ -741,7 +741,7 @@ implementation takes precedence and a Series is returned. np.maximum(ser, idx) NumPy ufuncs are safe to apply to :class:`Series` backed by non-ndarray arrays, -for example :class:`SparseArray` (see :ref:`sparse.calculation`). If possible, +for example :class:`arrays.SparseArray` (see :ref:`sparse.calculation`). If possible, the ufunc is applied without converting the underlying data to an ndarray. Console display diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 2c8382e916ed8..c71350ecd73b3 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -444,13 +444,13 @@ Sparse data ----------- Data where a single value is repeated many times (e.g. ``0`` or ``NaN``) may -be stored efficiently as a :class:`SparseArray`. +be stored efficiently as a :class:`arrays.SparseArray`. .. autosummary:: :toctree: api/ :template: autosummary/class_without_autosummary.rst - SparseArray + arrays.SparseArray .. autosummary:: :toctree: api/ diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst index c258a8840b714..8588fac4a18d0 100644 --- a/doc/source/user_guide/sparse.rst +++ b/doc/source/user_guide/sparse.rst @@ -15,7 +15,7 @@ can be chosen, including 0) is omitted. The compressed values are not actually s arr = np.random.randn(10) arr[2:-2] = np.nan - ts = pd.Series(pd.SparseArray(arr)) + ts = pd.Series(pd.arrays.SparseArray(arr)) ts Notice the dtype, ``Sparse[float64, nan]``. The ``nan`` means that elements in the @@ -51,7 +51,7 @@ identical to their dense counterparts. SparseArray ----------- -:class:`SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray` +:class:`arrays.SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray` for storing an array of sparse values (see :ref:`basics.dtypes` for more on extension arrays). It is a 1-dimensional ndarray-like object storing only values distinct from the ``fill_value``: @@ -61,7 +61,7 @@ only values distinct from the ``fill_value``: arr = np.random.randn(10) arr[2:5] = np.nan arr[7:8] = np.nan - sparr = pd.SparseArray(arr) + sparr = pd.arrays.SparseArray(arr) sparr A sparse array can be converted to a regular (dense) ndarray with :meth:`numpy.asarray` @@ -144,7 +144,7 @@ to ``SparseArray`` and get a ``SparseArray`` as a result. .. ipython:: python - arr = pd.SparseArray([1., np.nan, np.nan, -2., np.nan]) + arr = pd.arrays.SparseArray([1., np.nan, np.nan, -2., np.nan]) np.abs(arr) @@ -153,7 +153,7 @@ the correct dense result. .. ipython:: python - arr = pd.SparseArray([1., -1, -1, -2., -1], fill_value=-1) + arr = pd.arrays.SparseArray([1., -1, -1, -2., -1], fill_value=-1) np.abs(arr) np.abs(arr).to_dense() @@ -194,7 +194,7 @@ From an array-like, use the regular :class:`Series` or .. ipython:: python # New way - pd.DataFrame({"A": pd.SparseArray([0, 1])}) + pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])}) From a SciPy sparse matrix, use :meth:`DataFrame.sparse.from_spmatrix`, @@ -256,10 +256,10 @@ Instead, you'll need to ensure that the values being assigned are sparse .. ipython:: python - df = pd.DataFrame({"A": pd.SparseArray([0, 1])}) + df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])}) df['B'] = [0, 0] # remains dense df['B'].dtype - df['B'] = pd.SparseArray([0, 0]) + df['B'] = pd.arrays.SparseArray([0, 0]) df['B'].dtype The ``SparseDataFrame.default_kind`` and ``SparseDataFrame.default_fill_value`` attributes diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a5ea60d0a0d19..59ce49b9144c7 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -577,6 +577,7 @@ Deprecations it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) +- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) **Selecting Columns from a Grouped DataFrame** From 72f702eb6954da086e7e37b0be0ba39df1aa6e7f Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2020 14:31:49 -0500 Subject: [PATCH 2/9] CLN: Deprecate pandas.SparseArray for pandas.arrays.SparseArray --- pandas/__init__.py | 18 +++++++- pandas/core/arrays/sparse/accessor.py | 6 +-- pandas/core/arrays/sparse/array.py | 4 +- pandas/core/dtypes/common.py | 10 ++--- pandas/tests/api/test_api.py | 3 +- pandas/tests/arrays/sparse/test_accessor.py | 13 +++--- .../tests/arrays/sparse/test_arithmetics.py | 41 ++++++++++--------- pandas/tests/arrays/sparse/test_array.py | 22 ++++++---- .../arrays/sparse/test_combine_concat.py | 12 +++--- pandas/tests/arrays/test_array.py | 2 +- pandas/tests/base/test_conversion.py | 4 +- pandas/tests/dtypes/test_common.py | 8 ++-- pandas/tests/dtypes/test_dtypes.py | 4 +- pandas/tests/dtypes/test_generic.py | 2 +- pandas/tests/extension/test_sparse.py | 11 +++-- pandas/tests/frame/indexing/test_indexing.py | 8 ++-- pandas/tests/frame/methods/test_quantile.py | 4 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/internals/test_internals.py | 3 +- pandas/tests/reshape/test_reshape.py | 30 +++++++------- pandas/tests/series/test_missing.py | 4 +- pandas/tests/series/test_ufunc.py | 22 +++++----- pandas/util/testing.py | 2 +- 23 files changed, 129 insertions(+), 106 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index f9de17a2e3914..7ce81b57a74fc 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -115,7 +115,7 @@ DataFrame, ) -from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.arrays.sparse import SparseDtype from pandas.tseries.api import infer_freq from pandas.tseries import offsets @@ -245,6 +245,19 @@ class Panel: return type(name, (), {}) + elif name == "SparseArray": + + warnings.warn( + "The pandas.SparseArray class is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.arrays.SparseArray instead.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.arrays.sparse import SparseArray + + return SparseArray + raise AttributeError(f"module 'pandas' has no attribute '{name}'") @@ -307,6 +320,9 @@ def __getattr__(self, item): datetime = __Datetime().datetime + class SparseArray: + pass + # module level doc-string __doc__ = """ diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index c207b96a8d308..eb4d7cdf2709f 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -163,7 +163,7 @@ def to_dense(self): Examples -------- - >>> series = pd.Series(pd.SparseArray([0, 1, 0])) + >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0])) >>> series 0 0 1 1 @@ -216,7 +216,7 @@ def from_spmatrix(cls, data, index=None, columns=None): ------- DataFrame Each column of the DataFrame is stored as a - :class:`SparseArray`. + :class:`arrays.SparseArray`. Examples -------- @@ -251,7 +251,7 @@ def to_dense(self): Examples -------- - >>> df = pd.DataFrame({"A": pd.SparseArray([0, 1, 0])}) + >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])}) >>> df.sparse.to_dense() A 0 0 diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index adf10642f337a..9838cdfabbb95 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -403,7 +403,7 @@ def from_spmatrix(cls, data): -------- >>> import scipy.sparse >>> mat = scipy.sparse.coo_matrix((4, 1)) - >>> pd.SparseArray.from_spmatrix(mat) + >>> pd.arrays.SparseArray.from_spmatrix(mat) [0.0, 0.0, 0.0, 0.0] Fill: 0.0 IntIndex @@ -1079,7 +1079,7 @@ def map(self, mapper): Examples -------- - >>> arr = pd.SparseArray([0, 1, 2]) + >>> arr = pd.arrays.SparseArray([0, 1, 2]) >>> arr.apply(lambda x: x + 10) [10, 11, 12] Fill: 10 diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 8fc8b8300d21c..a716bc8e0a337 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -269,9 +269,9 @@ def is_sparse(arr) -> bool: -------- Returns `True` if the parameter is a 1-D pandas sparse array. - >>> is_sparse(pd.SparseArray([0, 0, 1, 0])) + >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0])) True - >>> is_sparse(pd.Series(pd.SparseArray([0, 0, 1, 0]))) + >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0]))) True Returns `False` if the parameter is not sparse. @@ -318,7 +318,7 @@ def is_scipy_sparse(arr) -> bool: >>> from scipy.sparse import bsr_matrix >>> is_scipy_sparse(bsr_matrix([1, 2, 3])) True - >>> is_scipy_sparse(pd.SparseArray([1, 2, 3])) + >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3])) False """ @@ -1467,7 +1467,7 @@ def is_bool_dtype(arr_or_dtype) -> bool: True >>> is_bool_dtype(pd.Categorical([True, False])) True - >>> is_bool_dtype(pd.SparseArray([True, False])) + >>> is_bool_dtype(pd.arrays.SparseArray([True, False])) True """ if arr_or_dtype is None: @@ -1529,7 +1529,7 @@ def is_extension_type(arr) -> bool: True >>> is_extension_type(pd.Series(cat)) True - >>> is_extension_type(pd.SparseArray([1, 2, 3])) + >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) True >>> from scipy.sparse import bsr_matrix >>> is_extension_type(bsr_matrix([1, 2, 3])) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index d865f26983579..f928b0af835e8 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -67,7 +67,6 @@ class TestPDApi(Base): "RangeIndex", "UInt64Index", "Series", - "SparseArray", "SparseDtype", "StringDtype", "Timedelta", @@ -91,7 +90,7 @@ class TestPDApi(Base): "NamedAgg", ] if not compat.PY37: - classes.extend(["Panel", "SparseSeries", "SparseDataFrame"]) + classes.extend(["Panel", "SparseSeries", "SparseDataFrame", "SparseArray"]) deprecated_modules.extend(["np", "datetime"]) # these are already deprecated; awaiting removal diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index eab174862818c..a8943c075f83e 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -67,9 +67,10 @@ def test_to_coo(self): def test_to_dense(self): df = pd.DataFrame( { - "A": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)), - "B": pd.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)), - "C": pd.SparseArray([1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)), + "A": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)), + "B": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)), + "C": pd.arrays.SparseArray([1.0, 0.0], + dtype=pd.SparseDtype("float64", 0.0)), }, index=["b", "a"], ) @@ -82,8 +83,8 @@ def test_to_dense(self): def test_density(self): df = pd.DataFrame( { - "A": pd.SparseArray([1, 0, 2, 1], fill_value=0), - "B": pd.SparseArray([0, 1, 1, 1], fill_value=0), + "A": pd.arrays.SparseArray([1, 0, 2, 1], fill_value=0), + "B": pd.arrays.SparseArray([0, 1, 1, 1], fill_value=0), } ) res = df.sparse.density @@ -100,7 +101,7 @@ def test_series_from_coo(self, dtype, dense_index): result = pd.Series.sparse.from_coo(A, dense_index=dense_index) index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) expected = pd.Series( - pd.SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index + pd.arrays.SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index ) if dense_index: expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index f1d2803ce5505..6fcc6d762b1e0 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -24,7 +24,7 @@ def mix(request): class TestSparseArrayArithmetics: _base = np.array - _klass = pd.SparseArray + _klass = pd.arrays.SparseArray def _assert(self, a, b): tm.assert_numpy_array_equal(a, b) @@ -391,15 +391,15 @@ def test_mixed_array_comparison(self, kind): @pytest.mark.parametrize("op", [operator.eq, operator.add]) def test_with_list(op): - arr = pd.SparseArray([0, 1], fill_value=0) + arr = pd.arrays.SparseArray([0, 1], fill_value=0) result = op(arr, [0, 1]) - expected = op(arr, pd.SparseArray([0, 1])) + expected = op(arr, pd.arrays.SparseArray([0, 1])) tm.assert_sp_array_equal(result, expected) def test_with_dataframe(): # GH#27910 - arr = pd.SparseArray([0, 1], fill_value=0) + arr = pd.arrays.SparseArray([0, 1], fill_value=0) df = pd.DataFrame([[1, 2], [3, 4]]) result = arr.__add__(df) assert result is NotImplemented @@ -407,7 +407,7 @@ def test_with_dataframe(): def test_with_zerodim_ndarray(): # GH#27910 - arr = pd.SparseArray([0, 1], fill_value=0) + arr = pd.arrays.SparseArray([0, 1], fill_value=0) result = arr * np.array(2) expected = arr * 2 @@ -416,23 +416,24 @@ def test_with_zerodim_ndarray(): @pytest.mark.parametrize("ufunc", [np.abs, np.exp]) @pytest.mark.parametrize( - "arr", [pd.SparseArray([0, 0, -1, 1]), pd.SparseArray([None, None, -1, 1])] + "arr", [pd.arrays.SparseArray([0, 0, -1, 1]), + pd.arrays.SparseArray([None, None, -1, 1])] ) def test_ufuncs(ufunc, arr): result = ufunc(arr) fill_value = ufunc(arr.fill_value) - expected = pd.SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) + expected = pd.arrays.SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) tm.assert_sp_array_equal(result, expected) @pytest.mark.parametrize( "a, b", [ - (pd.SparseArray([0, 0, 0]), np.array([0, 1, 2])), - (pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), - (pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), - (pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), - (pd.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (pd.arrays.SparseArray([0, 0, 0]), np.array([0, 1, 2])), + (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), ], ) @pytest.mark.parametrize("ufunc", [np.add, np.greater]) @@ -440,12 +441,12 @@ def test_binary_ufuncs(ufunc, a, b): # can't say anything about fill value here. result = ufunc(a, b) expected = ufunc(np.asarray(a), np.asarray(b)) - assert isinstance(result, pd.SparseArray) + assert isinstance(result, pd.arrays.SparseArray) tm.assert_numpy_array_equal(np.asarray(result), expected) def test_ndarray_inplace(): - sparray = pd.SparseArray([0, 2, 0, 0]) + sparray = pd.arrays.SparseArray([0, 2, 0, 0]) ndarray = np.array([0, 1, 2, 3]) ndarray += sparray expected = np.array([0, 3, 2, 3]) @@ -453,19 +454,19 @@ def test_ndarray_inplace(): def test_sparray_inplace(): - sparray = pd.SparseArray([0, 2, 0, 0]) + sparray = pd.arrays.SparseArray([0, 2, 0, 0]) ndarray = np.array([0, 1, 2, 3]) sparray += ndarray - expected = pd.SparseArray([0, 3, 2, 3], fill_value=0) + expected = pd.arrays.SparseArray([0, 3, 2, 3], fill_value=0) tm.assert_sp_array_equal(sparray, expected) @pytest.mark.parametrize("fill_value", [True, False]) def test_invert(fill_value): arr = np.array([True, False, False, True]) - sparray = pd.SparseArray(arr, fill_value=fill_value) + sparray = pd.arrays.SparseArray(arr, fill_value=fill_value) result = ~sparray - expected = pd.SparseArray(~arr, fill_value=not fill_value) + expected = pd.arrays.SparseArray(~arr, fill_value=not fill_value) tm.assert_sp_array_equal(result, expected) @@ -473,7 +474,7 @@ def test_invert(fill_value): @pytest.mark.parametrize("op", [operator.pos, operator.neg]) def test_unary_op(op, fill_value): arr = np.array([0, 1, np.nan, 2]) - sparray = pd.SparseArray(arr, fill_value=fill_value) + sparray = pd.arrays.SparseArray(arr, fill_value=fill_value) result = op(sparray) - expected = pd.SparseArray(op(arr), fill_value=op(fill_value)) + expected = pd.arrays.SparseArray(op(arr), fill_value=op(fill_value)) tm.assert_sp_array_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 0aaf294378bf7..088317067201d 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -470,7 +470,7 @@ def test_astype(self): arr.astype("Sparse[i8]") def test_astype_bool(self): - a = pd.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) + a = pd.arrays.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) result = a.astype(bool) expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0)) tm.assert_sp_array_equal(result, expected) @@ -682,7 +682,7 @@ def test_getslice_tuple(self): dense[4:, :] def test_boolean_slice_empty(self): - arr = pd.SparseArray([0, 1, 2]) + arr = pd.arrays.SparseArray([0, 1, 2]) res = arr[[False, False, False]] assert res.dtype == arr.dtype @@ -828,12 +828,14 @@ def test_fillna_overlap(self): def test_nonzero(self): # Tests regression #21172. - sa = pd.SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + sa = pd.arrays.SparseArray( + [float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0] + ) expected = np.array([2, 5, 9], dtype=np.int32) (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) - sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + sa = pd.arrays.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) @@ -1086,11 +1088,13 @@ def test_ufunc_args(self): @pytest.mark.parametrize("fill_value", [0.0, np.nan]) def test_modf(self, fill_value): # https://github.com/pandas-dev/pandas/issues/26946 - sparse = pd.SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value) + sparse = pd.arrays.SparseArray( + [fill_value] * 10 + [1.1, 2.2], fill_value=fill_value + ) r1, r2 = np.modf(sparse) e1, e2 = np.modf(np.asarray(sparse)) - tm.assert_sp_array_equal(r1, pd.SparseArray(e1, fill_value=fill_value)) - tm.assert_sp_array_equal(r2, pd.SparseArray(e2, fill_value=fill_value)) + tm.assert_sp_array_equal(r1, pd.arrays.SparseArray(e1, fill_value=fill_value)) + tm.assert_sp_array_equal(r2, pd.arrays.SparseArray(e2, fill_value=fill_value)) def test_nbytes_integer(self): arr = SparseArray([1, 0, 0, 0, 2], kind="integer") @@ -1106,7 +1110,7 @@ def test_nbytes_block(self): assert result == 24 def test_asarray_datetime64(self): - s = pd.SparseArray(pd.to_datetime(["2012", None, None, "2013"])) + s = pd.arrays.SparseArray(pd.to_datetime(["2012", None, None, "2013"])) np.asarray(s) def test_density(self): @@ -1208,7 +1212,7 @@ def test_first_fill_value_loc(arr, loc): ) @pytest.mark.parametrize("fill_value", [np.nan, 0, 1]) def test_unique_na_fill(arr, fill_value): - a = pd.SparseArray(arr, fill_value=fill_value).unique() + a = pd.arrays.SparseArray(arr, fill_value=fill_value).unique() b = pd.Series(arr).unique() assert isinstance(a, SparseArray) a = np.asarray(a) diff --git a/pandas/tests/arrays/sparse/test_combine_concat.py b/pandas/tests/arrays/sparse/test_combine_concat.py index 4ad1aa60e7b4f..90b0345c96633 100644 --- a/pandas/tests/arrays/sparse/test_combine_concat.py +++ b/pandas/tests/arrays/sparse/test_combine_concat.py @@ -8,10 +8,10 @@ class TestSparseArrayConcat: @pytest.mark.parametrize("kind", ["integer", "block"]) def test_basic(self, kind): - a = pd.SparseArray([1, 0, 0, 2], kind=kind) - b = pd.SparseArray([1, 0, 2, 2], kind=kind) + a = pd.arrays.SparseArray([1, 0, 0, 2], kind=kind) + b = pd.arrays.SparseArray([1, 0, 2, 2], kind=kind) - result = pd.SparseArray._concat_same_type([a, b]) + result = pd.arrays.SparseArray._concat_same_type([a, b]) # Can't make any assertions about the sparse index itself # since we aren't don't merge sparse blocs across arrays # in to_concat @@ -22,10 +22,10 @@ def test_basic(self, kind): @pytest.mark.parametrize("kind", ["integer", "block"]) def test_uses_first_kind(self, kind): other = "integer" if kind == "block" else "block" - a = pd.SparseArray([1, 0, 0, 2], kind=kind) - b = pd.SparseArray([1, 0, 2, 2], kind=other) + a = pd.arrays.SparseArray([1, 0, 0, 2], kind=kind) + b = pd.arrays.SparseArray([1, 0, 2, 2], kind=other) - result = pd.SparseArray._concat_same_type([a, b]) + result = pd.arrays.SparseArray._concat_same_type([a, b]) expected = np.array([1, 2, 1, 2, 2], dtype="int64") tm.assert_numpy_array_equal(result.sp_values, expected) assert result.kind == kind diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index f2a4e73e7b6ad..a6dbb34b6960d 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -113,7 +113,7 @@ pd.arrays.IntervalArray.from_tuples([(1, 2), (3, 4)]), ), # Sparse - ([0, 1], "Sparse[int64]", pd.SparseArray([0, 1], dtype="int64")), + ([0, 1], "Sparse[int64]", pd.arrays.SparseArray([0, 1], dtype="int64")), # IntegerNA ([1, None], "Int16", integer_array([1, None], dtype="Int16")), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 4295d89869a72..f01d315571ea5 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -271,7 +271,7 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), (pd.core.arrays.integer_array([0, np.nan]), "_data"), (pd.core.arrays.IntervalArray.from_breaks([0, 1]), "_left"), - (pd.SparseArray([0, 1]), "_sparse_values"), + (pd.arrays.SparseArray([0, 1]), "_sparse_values"), (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), # tz-aware Datetime ( @@ -321,7 +321,7 @@ def test_array_multiindex_raises(): pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]), np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), ), - (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), + (pd.arrays.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), # tz-naive datetime ( DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")), diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 7abb43bb25e14..3b2bb1b6c7853 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -182,7 +182,7 @@ def test_is_object(): "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] ) def test_is_sparse(check_scipy): - assert com.is_sparse(pd.SparseArray([1, 2, 3])) + assert com.is_sparse(pd.arrays.SparseArray([1, 2, 3])) assert not com.is_sparse(np.array([1, 2, 3])) @@ -198,7 +198,7 @@ def test_is_scipy_sparse(): assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) - assert not com.is_scipy_sparse(pd.SparseArray([1, 2, 3])) + assert not com.is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3])) def test_is_categorical(): @@ -576,7 +576,7 @@ def test_is_extension_type(check_scipy): cat = pd.Categorical([1, 2, 3]) assert com.is_extension_type(cat) assert com.is_extension_type(pd.Series(cat)) - assert com.is_extension_type(pd.SparseArray([1, 2, 3])) + assert com.is_extension_type(pd.arrays.SparseArray([1, 2, 3])) assert com.is_extension_type(pd.DatetimeIndex(["2000"], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") @@ -605,7 +605,7 @@ def test_is_extension_array_dtype(check_scipy): cat = pd.Categorical([1, 2, 3]) assert com.is_extension_array_dtype(cat) assert com.is_extension_array_dtype(pd.Series(cat)) - assert com.is_extension_array_dtype(pd.SparseArray([1, 2, 3])) + assert com.is_extension_array_dtype(pd.arrays.SparseArray([1, 2, 3])) assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) dtype = DatetimeTZDtype("ns", tz="US/Eastern") diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 3a933a5ca8cdc..47dd34700fd6d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -914,7 +914,7 @@ def test_registry_find(dtype, expected): (pd.Series([1, 2]), False), (np.array([True, False]), True), (pd.Series([True, False]), True), - (pd.SparseArray([True, False]), True), + (pd.arrays.SparseArray([True, False]), True), (SparseDtype(bool), True), ], ) @@ -924,7 +924,7 @@ def test_is_bool_dtype(dtype, expected): def test_is_bool_dtype_sparse(): - result = is_bool_dtype(pd.Series(pd.SparseArray([True, False]))) + result = is_bool_dtype(pd.Series(pd.arrays.SparseArray([True, False]))) assert result is True diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index c17a8997a9b8f..8802131c12440 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -17,7 +17,7 @@ class TestABCClasses: categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index) - sparse_array = pd.SparseArray(np.random.randn(10)) + sparse_array = pd.arrays.SparseArray(np.random.randn(10)) datetime_array = pd.core.arrays.DatetimeArray(datetime_index) timedelta_array = pd.core.arrays.TimedeltaArray(timedelta_index) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 6ebe71e173ec2..c01dd0444c5a2 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -4,7 +4,8 @@ from pandas.errors import PerformanceWarning import pandas as pd -from pandas import SparseArray, SparseDtype +from pandas import SparseDtype +from pandas.arrays import SparseArray from pandas.tests.extension import base import pandas.util.testing as tm @@ -231,7 +232,7 @@ def test_combine_le(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series( - pd.SparseArray( + pd.arrays.SparseArray( [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], fill_value=False, ) @@ -241,7 +242,9 @@ def test_combine_le(self, data_repeated): val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 <= x2) expected = pd.Series( - pd.SparseArray([a <= val for a in list(orig_data1)], fill_value=False) + pd.arrays.SparseArray( + [a <= val for a in list(orig_data1)], fill_value=False + ) ) self.assert_series_equal(result, expected) @@ -346,7 +349,7 @@ def _compare_other(self, s, data, op_name, other): with np.errstate(all="ignore"): expected = pd.Series( - pd.SparseArray( + pd.arrays.SparseArray( op(np.asarray(data), np.asarray(other)), fill_value=result.values.fill_value, ) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9a53caa491970..479200eef418f 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1776,7 +1776,7 @@ def test_getitem_ix_float_duplicates(self): def test_getitem_sparse_column(self): # https://github.com/pandas-dev/pandas/issues/23559 - data = pd.SparseArray([0, 1]) + data = pd.arrays.SparseArray([0, 1]) df = pd.DataFrame({"A": data}) expected = pd.Series(data, name="A") result = df["A"] @@ -1791,7 +1791,7 @@ def test_getitem_sparse_column(self): def test_setitem_with_sparse_value(self): # GH8131 df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) - sp_array = pd.SparseArray([0, 0, 1]) + sp_array = pd.arrays.SparseArray([0, 0, 1]) df["new_column"] = sp_array tm.assert_series_equal( df["new_column"], pd.Series(sp_array, name="new_column"), check_names=False @@ -1799,9 +1799,9 @@ def test_setitem_with_sparse_value(self): def test_setitem_with_unaligned_sparse_value(self): df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) - sp_series = pd.Series(pd.SparseArray([0, 0, 1]), index=[2, 1, 0]) + sp_series = pd.Series(pd.arrays.SparseArray([0, 0, 1]), index=[2, 1, 0]) df["new_column"] = sp_series - exp = pd.Series(pd.SparseArray([1, 0, 0]), name="new_column") + exp = pd.Series(pd.arrays.SparseArray([1, 0, 0]), name="new_column") tm.assert_series_equal(df["new_column"], exp) def test_setitem_with_unaligned_tz_aware_datetime_column(self): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index c25b24121d481..59a2cbf7608be 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -9,8 +9,8 @@ class TestDataFrameQuantile: def test_quantile_sparse(self): # GH#17198 - s = pd.Series(pd.SparseArray([1, 2])) - s1 = pd.Series(pd.SparseArray([3, 4])) + s = pd.Series(pd.arrays.SparseArray([1, 2])) + s1 = pd.Series(pd.arrays.SparseArray([3, 4])) df = pd.DataFrame({0: s, 1: s1}) result = df.quantile() diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 4e7d8c3054cf2..396dcf46e47ec 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2414,7 +2414,7 @@ class List(list): "extension_arr", [ Categorical(list("aabbc")), - pd.SparseArray([1, np.nan, np.nan, np.nan]), + pd.arrays.SparseArray([1, np.nan, np.nan, np.nan]), IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), PeriodArray(pd.period_range(start="1/1/2017", end="1/1/2018", freq="M")), ], diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 551782d0b363a..cc39e4e4df823 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -17,10 +17,9 @@ Index, MultiIndex, Series, - SparseArray, ) import pandas.core.algorithms as algos -from pandas.core.arrays import DatetimeArray, TimedeltaArray +from pandas.core.arrays import DatetimeArray, TimedeltaArray, SparseArray from pandas.core.internals import BlockManager, SingleBlockManager, make_block import pandas.util.testing as tm diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index b695b05c7c7db..7c893fa88b62b 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -45,7 +45,7 @@ def test_basic(self, sparse, dtype): dtype=self.effective_dtype(dtype), ) if sparse: - expected = expected.apply(pd.SparseArray, fill_value=0.0) + expected = expected.apply(pd.arrays.SparseArray, fill_value=0.0) result = get_dummies(s_list, sparse=sparse, dtype=dtype) tm.assert_frame_equal(result, expected) @@ -132,7 +132,7 @@ def test_include_na(self, sparse, dtype): {"a": [1, 0, 0], "b": [0, 1, 0]}, dtype=self.effective_dtype(dtype) ) if sparse: - exp = exp.apply(pd.SparseArray, fill_value=0.0) + exp = exp.apply(pd.arrays.SparseArray, fill_value=0.0) tm.assert_frame_equal(res, exp) # Sparse dataframes do not allow nan labelled columns, see #GH8822 @@ -145,7 +145,7 @@ def test_include_na(self, sparse, dtype): # hack (NaN handling in assert_index_equal) exp_na.columns = res_na.columns if sparse: - exp_na = exp_na.apply(pd.SparseArray, fill_value=0.0) + exp_na = exp_na.apply(pd.arrays.SparseArray, fill_value=0.0) tm.assert_frame_equal(res_na, exp_na) res_just_na = get_dummies([np.nan], dummy_na=True, sparse=sparse, dtype=dtype) @@ -167,7 +167,7 @@ def test_unicode(self, sparse): dtype=np.uint8, ) if sparse: - exp = exp.apply(pd.SparseArray, fill_value=0) + exp = exp.apply(pd.arrays.SparseArray, fill_value=0) tm.assert_frame_equal(res, exp) def test_dataframe_dummies_all_obj(self, df, sparse): @@ -180,10 +180,10 @@ def test_dataframe_dummies_all_obj(self, df, sparse): if sparse: expected = pd.DataFrame( { - "A_a": pd.SparseArray([1, 0, 1], dtype="uint8"), - "A_b": pd.SparseArray([0, 1, 0], dtype="uint8"), - "B_b": pd.SparseArray([1, 1, 0], dtype="uint8"), - "B_c": pd.SparseArray([0, 0, 1], dtype="uint8"), + "A_a": pd.arrays.SparseArray([1, 0, 1], dtype="uint8"), + "A_b": pd.arrays.SparseArray([0, 1, 0], dtype="uint8"), + "B_b": pd.arrays.SparseArray([1, 1, 0], dtype="uint8"), + "B_c": pd.arrays.SparseArray([0, 0, 1], dtype="uint8"), } ) @@ -226,7 +226,7 @@ def test_dataframe_dummies_prefix_list(self, df, sparse): cols = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"] expected = expected[["C"] + cols] - typ = pd.SparseArray if sparse else pd.Series + typ = pd.arrays.SparseArray if sparse else pd.Series expected[cols] = expected[cols].apply(lambda x: typ(x)) tm.assert_frame_equal(result, expected) @@ -423,7 +423,7 @@ def test_basic_drop_first(self, sparse): result = get_dummies(s_list, drop_first=True, sparse=sparse) if sparse: - expected = expected.apply(pd.SparseArray, fill_value=0) + expected = expected.apply(pd.arrays.SparseArray, fill_value=0) tm.assert_frame_equal(result, expected) result = get_dummies(s_series, drop_first=True, sparse=sparse) @@ -457,7 +457,7 @@ def test_basic_drop_first_NA(self, sparse): res = get_dummies(s_NA, drop_first=True, sparse=sparse) exp = DataFrame({"b": [0, 1, 0]}, dtype=np.uint8) if sparse: - exp = exp.apply(pd.SparseArray, fill_value=0) + exp = exp.apply(pd.arrays.SparseArray, fill_value=0) tm.assert_frame_equal(res, exp) @@ -466,7 +466,7 @@ def test_basic_drop_first_NA(self, sparse): ["b", np.nan], axis=1 ) if sparse: - exp_na = exp_na.apply(pd.SparseArray, fill_value=0) + exp_na = exp_na.apply(pd.arrays.SparseArray, fill_value=0) tm.assert_frame_equal(res_na, exp_na) res_just_na = get_dummies( @@ -480,7 +480,7 @@ def test_dataframe_dummies_drop_first(self, df, sparse): result = get_dummies(df, drop_first=True, sparse=sparse) expected = DataFrame({"A_b": [0, 1, 0], "B_c": [0, 0, 1]}, dtype=np.uint8) if sparse: - expected = expected.apply(pd.SparseArray, fill_value=0) + expected = expected.apply(pd.arrays.SparseArray, fill_value=0) tm.assert_frame_equal(result, expected) def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype): @@ -494,7 +494,7 @@ def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype): expected = expected[["C", "A_b", "B_c", "cat_y"]] if sparse: for col in cols: - expected[col] = pd.SparseArray(expected[col]) + expected[col] = pd.arrays.SparseArray(expected[col]) tm.assert_frame_equal(result, expected) def test_dataframe_dummies_drop_first_with_na(self, df, sparse): @@ -516,7 +516,7 @@ def test_dataframe_dummies_drop_first_with_na(self, df, sparse): expected = expected.sort_index(axis=1) if sparse: for col in cols: - expected[col] = pd.SparseArray(expected[col]) + expected[col] = pd.arrays.SparseArray(expected[col]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c49cd6930781e..cf1592bee8c9a 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -457,9 +457,9 @@ def test_fillna_consistency(self): def test_where_sparse(self): # GH#17198 make sure we dont get an AttributeError for sp_index - ser = pd.Series(pd.SparseArray([1, 2])) + ser = pd.Series(pd.arrays.SparseArray([1, 2])) result = ser.where(ser >= 2, 0) - expected = pd.Series(pd.SparseArray([0, 2])) + expected = pd.Series(pd.arrays.SparseArray([0, 2])) tm.assert_series_equal(result, expected) def test_datetime64tz_fillna_round_issue(self): diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 120eaeaf785b0..580f9625ce2c4 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -33,7 +33,7 @@ def test_unary_ufunc(ufunc, sparse): array = np.random.randint(0, 10, 10, dtype="int64") array[::2] = 0 if sparse: - array = pd.SparseArray(array, dtype=pd.SparseDtype("int64", 0)) + array = pd.arrays.SparseArray(array, dtype=pd.SparseDtype("int64", 0)) index = list(string.ascii_letters[:10]) name = "name" @@ -51,8 +51,8 @@ def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): # Test that ufunc(Series(a), array) == Series(ufunc(a, b)) a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + a1 = pd.arrays.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = pd.arrays.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -79,8 +79,8 @@ def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): # * ufunc(Index, Series) dispatches to Series (returns a Series) a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + a1 = pd.arrays.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = pd.arrays.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -110,8 +110,8 @@ def test_binary_ufunc_with_series( # with alignment between the indices a1, a2 = arrays_for_binary_ufunc if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + a1 = pd.arrays.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = pd.arrays.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) name = "name" # op(Series, array) preserves the name. series = pd.Series(a1, name=name) @@ -149,7 +149,7 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): # * ufunc(Series, scalar) == ufunc(scalar, Series) array, _ = arrays_for_binary_ufunc if sparse: - array = pd.SparseArray(array) + array = pd.arrays.SparseArray(array) other = 2 series = pd.Series(array, name="name") @@ -183,8 +183,8 @@ def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ a2[a2 == 0] = 1 if sparse: - a1 = pd.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) - a2 = pd.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + a1 = pd.arrays.SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = pd.arrays.SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) s1 = pd.Series(a1) s2 = pd.Series(a2) @@ -209,7 +209,7 @@ def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): array, _ = arrays_for_binary_ufunc if sparse: - array = pd.SparseArray(array) + array = pd.arrays.SparseArray(array) series = pd.Series(array, name="name") result = np.modf(series) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2e201339d4d77..c78f39613dedf 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1490,7 +1490,7 @@ def assert_sp_array_equal( block indices. """ - _check_isinstance(left, right, pd.SparseArray) + _check_isinstance(left, right, pd.arrays.SparseArray) assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype) From 0b415837af1e16e016b72b959cc4ae06ebaad43a Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2020 15:33:00 -0500 Subject: [PATCH 3/9] Fix up isort issues. type issue in __init__. black formatting. whatsnew okwarning --- doc/source/whatsnew/v0.19.0.rst | 2 ++ doc/source/whatsnew/v0.25.0.rst | 2 ++ pandas/__init__.py | 4 ++-- pandas/tests/arrays/sparse/test_accessor.py | 3 +-- pandas/tests/arrays/sparse/test_arithmetics.py | 3 +-- pandas/tests/extension/test_sparse.py | 2 +- pandas/tests/internals/test_internals.py | 2 +- 7 files changed, 10 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 6f6446c3f74e1..e7ff7491c95fe 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -1198,6 +1198,7 @@ Sparse data structures now gained enhanced support of ``int64`` and ``bool`` ``d Previously, sparse data were ``float64`` dtype by default, even if all inputs were of ``int`` or ``bool`` dtype. You had to specify ``dtype`` explicitly to create sparse data with ``int64`` dtype. Also, ``fill_value`` had to be specified explicitly because the default was ``np.nan`` which doesn't appear in ``int64`` or ``bool`` data. .. code-block:: ipython + :okwarning: In [1]: pd.SparseArray([1, 2, 0, 0]) Out[1]: @@ -1225,6 +1226,7 @@ Previously, sparse data were ``float64`` dtype by default, even if all inputs we As of v0.19.0, sparse data keeps the input dtype, and uses more appropriate ``fill_value`` defaults (``0`` for ``int64`` dtype, ``False`` for ``bool`` dtype). .. ipython:: python + :okwarning: pd.SparseArray([1, 2, 0, 0], dtype=np.int64) pd.SparseArray([True, False, False, False]) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index b6b91983b8267..b18d022349001 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -354,6 +354,7 @@ When passed DataFrames whose values are sparse, :func:`concat` will now return a :class:`Series` or :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`25702`). .. ipython:: python + :okwarning: df = pd.DataFrame({"A": pd.SparseArray([0, 1])}) @@ -910,6 +911,7 @@ by a ``Series`` or ``DataFrame`` with sparse values. **New way** .. ipython:: python + :okwarning: df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])}) df.dtypes diff --git a/pandas/__init__.py b/pandas/__init__.py index 360e95c4874a9..10d65e41d3030 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -255,9 +255,9 @@ class Panel: FutureWarning, stacklevel=2, ) - from pandas.core.arrays.sparse import SparseArray + from pandas.core.arrays.sparse import SparseArray as _SparseArray - return SparseArray + return _SparseArray raise AttributeError(f"module 'pandas' has no attribute '{name}'") diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index ae33398528f4d..edb23c3f90aea 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -69,8 +69,7 @@ def test_to_dense(self): { "A": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)), "B": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)), - "C": pd.arrays.SparseArray([1.0, 0.0], - dtype=pd.SparseDtype("float64", 0.0)), + "C": pd.arrays.SparseArray([1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)), }, index=["b", "a"], ) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index e1a15055f5cea..12c5aa8a2ab73 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -416,8 +416,7 @@ def test_with_zerodim_ndarray(): @pytest.mark.parametrize("ufunc", [np.abs, np.exp]) @pytest.mark.parametrize( - "arr", [pd.arrays.SparseArray([0, 0, -1, 1]), - pd.arrays.SparseArray([None, None, -1, 1])] + "arr", [pd.arrays.SparseArray([0, 0, -1, 1]), pd.arrays.SparseArray([None, None, -1, 1])] ) def test_ufuncs(ufunc, arr): result = ufunc(arr) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 58af2c2727fa6..eec18028a60c6 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -5,8 +5,8 @@ import pandas as pd from pandas import SparseDtype -from pandas.arrays import SparseArray import pandas._testing as tm +from pandas.arrays import SparseArray from pandas.tests.extension import base diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index bc440c3618917..6d9c1644a6ac0 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -20,7 +20,7 @@ ) import pandas._testing as tm import pandas.core.algorithms as algos -from pandas.core.arrays import DatetimeArray, TimedeltaArray, SparseArray +from pandas.core.arrays import DatetimeArray, SparseArray, TimedeltaArray from pandas.core.internals import BlockManager, SingleBlockManager, make_block From d5f365d165e48482e312a8801aa4b744749da777 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2020 16:10:52 -0500 Subject: [PATCH 4/9] remove extraneous okwarning. Use black correctly. --- doc/source/whatsnew/v0.19.0.rst | 1 - pandas/tests/arrays/sparse/test_accessor.py | 4 +++- pandas/tests/arrays/sparse/test_arithmetics.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index e7ff7491c95fe..6eb509a258430 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -1198,7 +1198,6 @@ Sparse data structures now gained enhanced support of ``int64`` and ``bool`` ``d Previously, sparse data were ``float64`` dtype by default, even if all inputs were of ``int`` or ``bool`` dtype. You had to specify ``dtype`` explicitly to create sparse data with ``int64`` dtype. Also, ``fill_value`` had to be specified explicitly because the default was ``np.nan`` which doesn't appear in ``int64`` or ``bool`` data. .. code-block:: ipython - :okwarning: In [1]: pd.SparseArray([1, 2, 0, 0]) Out[1]: diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index edb23c3f90aea..7b397301db24a 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -69,7 +69,9 @@ def test_to_dense(self): { "A": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)), "B": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)), - "C": pd.arrays.SparseArray([1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)), + "C": pd.arrays.SparseArray( + [1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0) + ), }, index=["b", "a"], ) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 12c5aa8a2ab73..46863da8f61c7 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -416,7 +416,8 @@ def test_with_zerodim_ndarray(): @pytest.mark.parametrize("ufunc", [np.abs, np.exp]) @pytest.mark.parametrize( - "arr", [pd.arrays.SparseArray([0, 0, -1, 1]), pd.arrays.SparseArray([None, None, -1, 1])] + "arr", + [pd.arrays.SparseArray([0, 0, -1, 1]), pd.arrays.SparseArray([None, None, -1, 1])], ) def test_ufuncs(ufunc, arr): result = ufunc(arr) From dfef0e5d6b1e5adc30d09958c4c06aaa9bf1b0ea Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Fri, 3 Jan 2020 16:35:31 -0500 Subject: [PATCH 5/9] fix isort issue --- pandas/tests/internals/test_internals.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 6d9c1644a6ac0..15b1434f8629f 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -10,14 +10,7 @@ from pandas._libs.internals import BlockPlacement import pandas as pd -from pandas import ( - Categorical, - DataFrame, - DatetimeIndex, - Index, - MultiIndex, - Series, -) +from pandas import Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Series import pandas._testing as tm import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray, SparseArray, TimedeltaArray From b17143cc8f5c2a3a17cb71b79ac8ea0e6dbb5d80 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 4 Jan 2020 14:03:49 -0500 Subject: [PATCH 6/9] in sparse tests, import SparseArray --- pandas/tests/arrays/sparse/test_accessor.py | 21 ++++----- .../tests/arrays/sparse/test_arithmetics.py | 43 +++++++++---------- pandas/tests/arrays/sparse/test_array.py | 22 ++++------ .../arrays/sparse/test_combine_concat.py | 13 +++--- 4 files changed, 46 insertions(+), 53 deletions(-) diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 7b397301db24a..4615eca837393 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -7,6 +7,7 @@ import pandas as pd import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray, SparseDtype class TestSeriesAccessor: @@ -31,7 +32,7 @@ def test_accessor_raises(self): def test_from_spmatrix(self, format, labels, dtype): import scipy.sparse - sp_dtype = pd.SparseDtype(dtype, np.array(0, dtype=dtype).item()) + sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item()) mat = scipy.sparse.eye(10, format=format, dtype=dtype) result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels) @@ -48,7 +49,7 @@ def test_from_spmatrix(self, format, labels, dtype): def test_from_spmatrix_columns(self, columns): import scipy.sparse - dtype = pd.SparseDtype("float64", 0.0) + dtype = SparseDtype("float64", 0.0) mat = scipy.sparse.random(10, 2, density=0.5) result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns) @@ -67,11 +68,9 @@ def test_to_coo(self): def test_to_dense(self): df = pd.DataFrame( { - "A": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 0)), - "B": pd.arrays.SparseArray([1, 0], dtype=pd.SparseDtype("int64", 1)), - "C": pd.arrays.SparseArray( - [1.0, 0.0], dtype=pd.SparseDtype("float64", 0.0) - ), + "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)), + "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)), + "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)), }, index=["b", "a"], ) @@ -84,8 +83,8 @@ def test_to_dense(self): def test_density(self): df = pd.DataFrame( { - "A": pd.arrays.SparseArray([1, 0, 2, 1], fill_value=0), - "B": pd.arrays.SparseArray([0, 1, 1, 1], fill_value=0), + "A": SparseArray([1, 0, 2, 1], fill_value=0), + "B": SparseArray([0, 1, 1, 1], fill_value=0), } ) res = df.sparse.density @@ -101,9 +100,7 @@ def test_series_from_coo(self, dtype, dense_index): A = scipy.sparse.eye(3, format="coo", dtype=dtype) result = pd.Series.sparse.from_coo(A, dense_index=dense_index) index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) - expected = pd.Series( - pd.arrays.SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index - ) + expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) if dense_index: expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 46863da8f61c7..76442a63ccb0f 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -6,7 +6,7 @@ import pandas as pd import pandas._testing as tm from pandas.core import ops -from pandas.core.arrays.sparse import SparseDtype +from pandas.core.arrays.sparse import SparseArray, SparseDtype @pytest.fixture(params=["integer", "block"]) @@ -24,7 +24,7 @@ def mix(request): class TestSparseArrayArithmetics: _base = np.array - _klass = pd.arrays.SparseArray + _klass = SparseArray def _assert(self, a, b): tm.assert_numpy_array_equal(a, b) @@ -391,15 +391,15 @@ def test_mixed_array_comparison(self, kind): @pytest.mark.parametrize("op", [operator.eq, operator.add]) def test_with_list(op): - arr = pd.arrays.SparseArray([0, 1], fill_value=0) + arr = SparseArray([0, 1], fill_value=0) result = op(arr, [0, 1]) - expected = op(arr, pd.arrays.SparseArray([0, 1])) + expected = op(arr, SparseArray([0, 1])) tm.assert_sp_array_equal(result, expected) def test_with_dataframe(): # GH#27910 - arr = pd.arrays.SparseArray([0, 1], fill_value=0) + arr = SparseArray([0, 1], fill_value=0) df = pd.DataFrame([[1, 2], [3, 4]]) result = arr.__add__(df) assert result is NotImplemented @@ -407,7 +407,7 @@ def test_with_dataframe(): def test_with_zerodim_ndarray(): # GH#27910 - arr = pd.arrays.SparseArray([0, 1], fill_value=0) + arr = SparseArray([0, 1], fill_value=0) result = arr * np.array(2) expected = arr * 2 @@ -416,24 +416,23 @@ def test_with_zerodim_ndarray(): @pytest.mark.parametrize("ufunc", [np.abs, np.exp]) @pytest.mark.parametrize( - "arr", - [pd.arrays.SparseArray([0, 0, -1, 1]), pd.arrays.SparseArray([None, None, -1, 1])], + "arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])] ) def test_ufuncs(ufunc, arr): result = ufunc(arr) fill_value = ufunc(arr.fill_value) - expected = pd.arrays.SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) + expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) tm.assert_sp_array_equal(result, expected) @pytest.mark.parametrize( "a, b", [ - (pd.arrays.SparseArray([0, 0, 0]), np.array([0, 1, 2])), - (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), - (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), - (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), - (pd.arrays.SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0]), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), ], ) @pytest.mark.parametrize("ufunc", [np.add, np.greater]) @@ -441,12 +440,12 @@ def test_binary_ufuncs(ufunc, a, b): # can't say anything about fill value here. result = ufunc(a, b) expected = ufunc(np.asarray(a), np.asarray(b)) - assert isinstance(result, pd.arrays.SparseArray) + assert isinstance(result, SparseArray) tm.assert_numpy_array_equal(np.asarray(result), expected) def test_ndarray_inplace(): - sparray = pd.arrays.SparseArray([0, 2, 0, 0]) + sparray = SparseArray([0, 2, 0, 0]) ndarray = np.array([0, 1, 2, 3]) ndarray += sparray expected = np.array([0, 3, 2, 3]) @@ -454,19 +453,19 @@ def test_ndarray_inplace(): def test_sparray_inplace(): - sparray = pd.arrays.SparseArray([0, 2, 0, 0]) + sparray = SparseArray([0, 2, 0, 0]) ndarray = np.array([0, 1, 2, 3]) sparray += ndarray - expected = pd.arrays.SparseArray([0, 3, 2, 3], fill_value=0) + expected = SparseArray([0, 3, 2, 3], fill_value=0) tm.assert_sp_array_equal(sparray, expected) @pytest.mark.parametrize("fill_value", [True, False]) def test_invert(fill_value): arr = np.array([True, False, False, True]) - sparray = pd.arrays.SparseArray(arr, fill_value=fill_value) + sparray = SparseArray(arr, fill_value=fill_value) result = ~sparray - expected = pd.arrays.SparseArray(~arr, fill_value=not fill_value) + expected = SparseArray(~arr, fill_value=not fill_value) tm.assert_sp_array_equal(result, expected) @@ -474,7 +473,7 @@ def test_invert(fill_value): @pytest.mark.parametrize("op", [operator.pos, operator.neg]) def test_unary_op(op, fill_value): arr = np.array([0, 1, np.nan, 2]) - sparray = pd.arrays.SparseArray(arr, fill_value=fill_value) + sparray = SparseArray(arr, fill_value=fill_value) result = op(sparray) - expected = pd.arrays.SparseArray(op(arr), fill_value=op(fill_value)) + expected = SparseArray(op(arr), fill_value=op(fill_value)) tm.assert_sp_array_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index df88ae629cbdd..baca18239b929 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -470,7 +470,7 @@ def test_astype(self): arr.astype("Sparse[i8]") def test_astype_bool(self): - a = pd.arrays.SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) + a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) result = a.astype(bool) expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0)) tm.assert_sp_array_equal(result, expected) @@ -682,7 +682,7 @@ def test_getslice_tuple(self): dense[4:, :] def test_boolean_slice_empty(self): - arr = pd.arrays.SparseArray([0, 1, 2]) + arr = SparseArray([0, 1, 2]) res = arr[[False, False, False]] assert res.dtype == arr.dtype @@ -828,14 +828,12 @@ def test_fillna_overlap(self): def test_nonzero(self): # Tests regression #21172. - sa = pd.arrays.SparseArray( - [float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0] - ) + sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) expected = np.array([2, 5, 9], dtype=np.int32) (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) - sa = pd.arrays.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) @@ -1088,13 +1086,11 @@ def test_ufunc_args(self): @pytest.mark.parametrize("fill_value", [0.0, np.nan]) def test_modf(self, fill_value): # https://github.com/pandas-dev/pandas/issues/26946 - sparse = pd.arrays.SparseArray( - [fill_value] * 10 + [1.1, 2.2], fill_value=fill_value - ) + sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value) r1, r2 = np.modf(sparse) e1, e2 = np.modf(np.asarray(sparse)) - tm.assert_sp_array_equal(r1, pd.arrays.SparseArray(e1, fill_value=fill_value)) - tm.assert_sp_array_equal(r2, pd.arrays.SparseArray(e2, fill_value=fill_value)) + tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value)) + tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value)) def test_nbytes_integer(self): arr = SparseArray([1, 0, 0, 0, 2], kind="integer") @@ -1110,7 +1106,7 @@ def test_nbytes_block(self): assert result == 24 def test_asarray_datetime64(self): - s = pd.arrays.SparseArray(pd.to_datetime(["2012", None, None, "2013"])) + s = SparseArray(pd.to_datetime(["2012", None, None, "2013"])) np.asarray(s) def test_density(self): @@ -1212,7 +1208,7 @@ def test_first_fill_value_loc(arr, loc): ) @pytest.mark.parametrize("fill_value", [np.nan, 0, 1]) def test_unique_na_fill(arr, fill_value): - a = pd.arrays.SparseArray(arr, fill_value=fill_value).unique() + a = SparseArray(arr, fill_value=fill_value).unique() b = pd.Series(arr).unique() assert isinstance(a, SparseArray) a = np.asarray(a) diff --git a/pandas/tests/arrays/sparse/test_combine_concat.py b/pandas/tests/arrays/sparse/test_combine_concat.py index 72447147e32ff..9951470178114 100644 --- a/pandas/tests/arrays/sparse/test_combine_concat.py +++ b/pandas/tests/arrays/sparse/test_combine_concat.py @@ -3,15 +3,16 @@ import pandas as pd import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray class TestSparseArrayConcat: @pytest.mark.parametrize("kind", ["integer", "block"]) def test_basic(self, kind): - a = pd.arrays.SparseArray([1, 0, 0, 2], kind=kind) - b = pd.arrays.SparseArray([1, 0, 2, 2], kind=kind) + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=kind) - result = pd.arrays.SparseArray._concat_same_type([a, b]) + result = SparseArray._concat_same_type([a, b]) # Can't make any assertions about the sparse index itself # since we aren't don't merge sparse blocs across arrays # in to_concat @@ -22,10 +23,10 @@ def test_basic(self, kind): @pytest.mark.parametrize("kind", ["integer", "block"]) def test_uses_first_kind(self, kind): other = "integer" if kind == "block" else "block" - a = pd.arrays.SparseArray([1, 0, 0, 2], kind=kind) - b = pd.arrays.SparseArray([1, 0, 2, 2], kind=other) + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=other) - result = pd.arrays.SparseArray._concat_same_type([a, b]) + result = SparseArray._concat_same_type([a, b]) expected = np.array([1, 2, 1, 2, 2], dtype="int64") tm.assert_numpy_array_equal(result.sp_values, expected) assert result.kind == kind From 627251b7c21d10d2cf9a2d3c50f9f8de115ddb77 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 4 Jan 2020 14:08:30 -0500 Subject: [PATCH 7/9] fix extension/test_sparse.py --- pandas/tests/extension/test_sparse.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index eec18028a60c6..e22f74005db5a 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -232,7 +232,7 @@ def test_combine_le(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series( - pd.arrays.SparseArray( + SparseArray( [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], fill_value=False, ) @@ -242,9 +242,7 @@ def test_combine_le(self, data_repeated): val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 <= x2) expected = pd.Series( - pd.arrays.SparseArray( - [a <= val for a in list(orig_data1)], fill_value=False - ) + SparseArray([a <= val for a in list(orig_data1)], fill_value=False) ) self.assert_series_equal(result, expected) @@ -349,7 +347,7 @@ def _compare_other(self, s, data, op_name, other): with np.errstate(all="ignore"): expected = pd.Series( - pd.arrays.SparseArray( + SparseArray( op(np.asarray(data), np.asarray(other)), fill_value=result.values.fill_value, ) From 31b96df7179ce80d8307e0872539acc83fe863de Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 4 Jan 2020 14:26:44 -0500 Subject: [PATCH 8/9] use imports where possible in test_conversion --- pandas/tests/base/test_conversion.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 35a7d70a40d03..e328cc223c8f2 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -7,7 +7,14 @@ import pandas as pd from pandas import CategoricalIndex, Series, Timedelta, Timestamp import pandas._testing as tm -from pandas.core.arrays import DatetimeArray, PandasArray, TimedeltaArray +from pandas.core.arrays import ( + DatetimeArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + TimedeltaArray, +) class TestToIterable: @@ -177,14 +184,10 @@ def test_iter_box(self): ), ( pd.PeriodIndex([2018, 2019], freq="A"), - pd.core.arrays.PeriodArray, + PeriodArray, pd.core.dtypes.dtypes.PeriodDtype("A-DEC"), ), - ( - pd.IntervalIndex.from_breaks([0, 1, 2]), - pd.core.arrays.IntervalArray, - "interval", - ), + (pd.IntervalIndex.from_breaks([0, 1, 2]), IntervalArray, "interval",), # This test is currently failing for datetime64[ns] and timedelta64[ns]. # The NumPy type system is sufficient for representing these types, so # we just use NumPy for Series / DataFrame columns of these types (so @@ -270,8 +273,8 @@ def test_numpy_array_all_dtypes(any_numpy_dtype): (pd.Categorical(["a", "b"]), "_codes"), (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), (pd.core.arrays.integer_array([0, np.nan]), "_data"), - (pd.core.arrays.IntervalArray.from_breaks([0, 1]), "_left"), - (pd.arrays.SparseArray([0, 1]), "_sparse_values"), + (IntervalArray.from_breaks([0, 1]), "_left"), + (SparseArray([0, 1]), "_sparse_values"), (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), # tz-aware Datetime ( @@ -318,10 +321,10 @@ def test_array_multiindex_raises(): np.array([0, pd.NA], dtype=object), ), ( - pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]), + IntervalArray.from_breaks([0, 1, 2]), np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), ), - (pd.arrays.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), + (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), # tz-naive datetime ( DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")), From 0b163ee83b08db0752a7c29f08ebe9b43aa20785 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Sat, 4 Jan 2020 15:06:48 -0500 Subject: [PATCH 9/9] fix lint issue in test_combine_concat. Fix issue in core/generic from master --- pandas/core/generic.py | 2 +- pandas/tests/arrays/sparse/test_combine_concat.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 21a22322daece..5eafa965ca30f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -461,7 +461,7 @@ def _get_index_resolvers(self) -> Dict[str, ABCSeries]: for axis_name in self._AXIS_ORDERS: d.update(self._get_axis_resolvers(axis_name)) - return {clean_column_name(k): v for k, v in d.items() if k is not int} + return {clean_column_name(k): v for k, v in d.items()} def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]: """ diff --git a/pandas/tests/arrays/sparse/test_combine_concat.py b/pandas/tests/arrays/sparse/test_combine_concat.py index 9951470178114..f1697dc9ff7ce 100644 --- a/pandas/tests/arrays/sparse/test_combine_concat.py +++ b/pandas/tests/arrays/sparse/test_combine_concat.py @@ -1,7 +1,6 @@ import numpy as np import pytest -import pandas as pd import pandas._testing as tm from pandas.core.arrays.sparse import SparseArray