Skip to content

REF: SparseArray imports #23329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 34 additions & 42 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,47 @@
SparseArray data structure
"""
from __future__ import division
# pylint: disable=E1101,E1103,W0231

import re
import operator
import numbers
import numpy as np
import operator
import re
import warnings

import pandas as pd
from pandas.core.base import PandasObject
import numpy as np

import pandas._libs.sparse as splib
import pandas.core.algorithms as algos
import pandas.core.common as com
import pandas.io.formats.printing as printing
from pandas import compat
from pandas.errors import PerformanceWarning
from pandas._libs import index as libindex, lib
from pandas._libs.sparse import BlockIndex, IntIndex
from pandas._libs.tslibs import NaT
from pandas.compat.numpy import function as nv

from pandas.core.accessor import PandasDelegate, delegate_names
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
import pandas.core.common as com
from pandas.core.base import PandasObject
from pandas.core.dtypes.base import ExtensionDtype
from pandas.core.dtypes.cast import (
astype_nansafe, construct_1d_arraylike_from_scalar, find_common_type,
infer_dtype_from_scalar, maybe_convert_platform
)
from pandas.core.dtypes.common import (
is_array_like, is_bool_dtype, is_datetime64_any_dtype, is_dtype_equal,
is_integer, is_list_like, is_object_dtype, is_scalar, is_string_dtype,
pandas_dtype
)
from pandas.core.dtypes.dtypes import register_extension_dtype
from pandas.core.dtypes.generic import (
ABCSparseSeries, ABCSeries, ABCIndexClass
ABCIndexClass, ABCSeries, ABCSparseSeries
)
from pandas.core.dtypes.common import (
is_datetime64_any_dtype,
is_integer,
is_object_dtype,
is_array_like,
pandas_dtype,
is_bool_dtype,
is_list_like,
is_string_dtype,
is_scalar, is_dtype_equal)
from pandas.core.dtypes.cast import (
maybe_convert_platform,
astype_nansafe, find_common_type, infer_dtype_from_scalar,
construct_1d_arraylike_from_scalar)
from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
from pandas.core.missing import interpolate_2d

import pandas._libs.sparse as splib
from pandas._libs.sparse import BlockIndex, IntIndex
from pandas._libs import index as libindex
from pandas._libs import lib
import pandas.core.algorithms as algos
import pandas.io.formats.printing as printing
from pandas.errors import PerformanceWarning


# ----------------------------------------------------------------------------
# Dtype

@register_extension_dtype
class SparseDtype(ExtensionDtype):
"""
Expand Down Expand Up @@ -620,7 +610,7 @@ def __array__(self, dtype=None, copy=True):
if is_datetime64_any_dtype(self.sp_values.dtype):
# However, we *do* special-case the common case of
# a datetime64 with pandas NaT.
if fill_value is pd.NaT:
if fill_value is NaT:
# Can't put pd.NaT in a datetime64[ns]
fill_value = np.datetime64('NaT')
try:
Expand Down Expand Up @@ -710,7 +700,7 @@ def _null_fill_value(self):

def _fill_value_matches(self, fill_value):
if self._null_fill_value:
return pd.isna(fill_value)
return isna(fill_value)
else:
return self.fill_value == fill_value

Expand Down Expand Up @@ -855,7 +845,7 @@ def _first_fill_value_loc(self):
return np.searchsorted(diff, 2) + 1

def unique(self):
uniques = list(pd.unique(self.sp_values))
uniques = list(algos.unique(self.sp_values))
fill_loc = self._first_fill_value_loc()
if fill_loc >= 0:
uniques.insert(fill_loc, self.fill_value)
Expand All @@ -871,8 +861,8 @@ def factorize(self, na_sentinel=-1):
# ExtensionArray.factorize -> Tuple[EA, EA]
# Given that we have to return a dense array of labels, why bother
# implementing an efficient factorize?
labels, uniques = pd.factorize(np.asarray(self),
na_sentinel=na_sentinel)
labels, uniques = algos.factorize(np.asarray(self),
na_sentinel=na_sentinel)
uniques = SparseArray(uniques, dtype=self.dtype)
return labels, uniques

Expand All @@ -889,6 +879,8 @@ def value_counts(self, dropna=True):
-------
counts : Series
"""
from pandas import Index, Series

keys, counts = algos._value_counts_arraylike(self.sp_values,
dropna=dropna)
fcounts = self.sp_index.ngaps
Expand All @@ -897,7 +889,7 @@ def value_counts(self, dropna=True):
pass
else:
if self._null_fill_value:
mask = pd.isna(keys)
mask = isna(keys)
else:
mask = keys == self.fill_value

Expand All @@ -907,9 +899,9 @@ def value_counts(self, dropna=True):
keys = np.insert(keys, 0, self.fill_value)
counts = np.insert(counts, 0, fcounts)

if not isinstance(keys, pd.Index):
keys = pd.Index(keys)
result = pd.Series(counts, index=keys)
if not isinstance(keys, ABCIndexClass):
keys = Index(keys)
result = Series(counts, index=keys)
return result

# --------
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from pandas.compat.numpy import function as nv
from pandas.core import base, generic
from pandas.core.accessor import CachedAccessor
from pandas.core.arrays import ExtensionArray, period_array
from pandas.core.arrays import ExtensionArray, SparseArray, period_array
from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
from pandas.core.arrays.sparse import SparseAccessor
from pandas.core.config import get_option
Expand Down Expand Up @@ -1367,7 +1367,6 @@ def to_sparse(self, kind='block', fill_value=None):
"""
# TODO: deprecate
from pandas.core.sparse.series import SparseSeries
from pandas.core.arrays import SparseArray

values = SparseArray(self, kind=kind, fill_value=fill_value)
return SparseSeries(
Expand Down
2 changes: 0 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,6 @@ skip=
pandas/core/reshape/merge.py,
pandas/core/reshape/reshape.py,
pandas/core/reshape/pivot.py,
pandas/core/sparse/array.py,
pandas/core/arrays/sparse.py,
pandas/core/sparse/api.py,
pandas/core/sparse/series.py,
pandas/core/sparse/frame.py,
Expand Down