2
2
SparseArray data structure
3
3
"""
4
4
from __future__ import division
5
- # pylint: disable=E1101,E1103,W0231
6
5
7
- import re
8
- import operator
9
6
import numbers
10
- import numpy as np
7
+ import operator
8
+ import re
11
9
import warnings
12
10
13
- import pandas as pd
14
- from pandas .core .base import PandasObject
11
+ import numpy as np
15
12
13
+ import pandas ._libs .sparse as splib
14
+ import pandas .core .algorithms as algos
15
+ import pandas .core .common as com
16
+ import pandas .io .formats .printing as printing
16
17
from pandas import compat
17
- from pandas .errors import PerformanceWarning
18
+ from pandas ._libs import index as libindex , lib
19
+ from pandas ._libs .sparse import BlockIndex , IntIndex
20
+ from pandas ._libs .tslibs import NaT
18
21
from pandas .compat .numpy import function as nv
19
-
20
22
from pandas .core .accessor import PandasDelegate , delegate_names
21
23
from pandas .core .arrays import ExtensionArray , ExtensionOpsMixin
22
- import pandas .core .common as com
24
+ from pandas .core .base import PandasObject
23
25
from pandas .core .dtypes .base import ExtensionDtype
26
+ from pandas .core .dtypes .cast import (
27
+ astype_nansafe , construct_1d_arraylike_from_scalar , find_common_type ,
28
+ infer_dtype_from_scalar , maybe_convert_platform
29
+ )
30
+ from pandas .core .dtypes .common import (
31
+ is_array_like , is_bool_dtype , is_datetime64_any_dtype , is_dtype_equal ,
32
+ is_integer , is_list_like , is_object_dtype , is_scalar , is_string_dtype ,
33
+ pandas_dtype
34
+ )
24
35
from pandas .core .dtypes .dtypes import register_extension_dtype
25
36
from pandas .core .dtypes .generic import (
26
- ABCSparseSeries , ABCSeries , ABCIndexClass
37
+ ABCIndexClass , ABCSeries , ABCSparseSeries
27
38
)
28
- from pandas .core .dtypes .common import (
29
- is_datetime64_any_dtype ,
30
- is_integer ,
31
- is_object_dtype ,
32
- is_array_like ,
33
- pandas_dtype ,
34
- is_bool_dtype ,
35
- is_list_like ,
36
- is_string_dtype ,
37
- is_scalar , is_dtype_equal )
38
- from pandas .core .dtypes .cast import (
39
- maybe_convert_platform ,
40
- astype_nansafe , find_common_type , infer_dtype_from_scalar ,
41
- construct_1d_arraylike_from_scalar )
42
- from pandas .core .dtypes .missing import isna , notna , na_value_for_dtype
39
+ from pandas .core .dtypes .missing import isna , na_value_for_dtype , notna
43
40
from pandas .core .missing import interpolate_2d
44
-
45
- import pandas ._libs .sparse as splib
46
- from pandas ._libs .sparse import BlockIndex , IntIndex
47
- from pandas ._libs import index as libindex
48
- from pandas ._libs import lib
49
- import pandas .core .algorithms as algos
50
- import pandas .io .formats .printing as printing
41
+ from pandas .errors import PerformanceWarning
51
42
52
43
53
44
# ----------------------------------------------------------------------------
54
45
# Dtype
55
-
56
46
@register_extension_dtype
57
47
class SparseDtype (ExtensionDtype ):
58
48
"""
@@ -620,7 +610,7 @@ def __array__(self, dtype=None, copy=True):
620
610
if is_datetime64_any_dtype (self .sp_values .dtype ):
621
611
# However, we *do* special-case the common case of
622
612
# a datetime64 with pandas NaT.
623
- if fill_value is pd . NaT :
613
+ if fill_value is NaT :
624
614
# Can't put pd.NaT in a datetime64[ns]
625
615
fill_value = np .datetime64 ('NaT' )
626
616
try :
@@ -710,7 +700,7 @@ def _null_fill_value(self):
710
700
711
701
def _fill_value_matches (self , fill_value ):
712
702
if self ._null_fill_value :
713
- return pd . isna (fill_value )
703
+ return isna (fill_value )
714
704
else :
715
705
return self .fill_value == fill_value
716
706
@@ -855,7 +845,7 @@ def _first_fill_value_loc(self):
855
845
return np .searchsorted (diff , 2 ) + 1
856
846
857
847
def unique (self ):
858
- uniques = list (pd .unique (self .sp_values ))
848
+ uniques = list (algos .unique (self .sp_values ))
859
849
fill_loc = self ._first_fill_value_loc ()
860
850
if fill_loc >= 0 :
861
851
uniques .insert (fill_loc , self .fill_value )
@@ -871,8 +861,8 @@ def factorize(self, na_sentinel=-1):
871
861
# ExtensionArray.factorize -> Tuple[EA, EA]
872
862
# Given that we have to return a dense array of labels, why bother
873
863
# implementing an efficient factorize?
874
- labels , uniques = pd .factorize (np .asarray (self ),
875
- na_sentinel = na_sentinel )
864
+ labels , uniques = algos .factorize (np .asarray (self ),
865
+ na_sentinel = na_sentinel )
876
866
uniques = SparseArray (uniques , dtype = self .dtype )
877
867
return labels , uniques
878
868
@@ -889,6 +879,8 @@ def value_counts(self, dropna=True):
889
879
-------
890
880
counts : Series
891
881
"""
882
+ from pandas import Index , Series
883
+
892
884
keys , counts = algos ._value_counts_arraylike (self .sp_values ,
893
885
dropna = dropna )
894
886
fcounts = self .sp_index .ngaps
@@ -897,7 +889,7 @@ def value_counts(self, dropna=True):
897
889
pass
898
890
else :
899
891
if self ._null_fill_value :
900
- mask = pd . isna (keys )
892
+ mask = isna (keys )
901
893
else :
902
894
mask = keys == self .fill_value
903
895
@@ -907,9 +899,9 @@ def value_counts(self, dropna=True):
907
899
keys = np .insert (keys , 0 , self .fill_value )
908
900
counts = np .insert (counts , 0 , fcounts )
909
901
910
- if not isinstance (keys , pd . Index ):
911
- keys = pd . Index (keys )
912
- result = pd . Series (counts , index = keys )
902
+ if not isinstance (keys , ABCIndexClass ):
903
+ keys = Index (keys )
904
+ result = Series (counts , index = keys )
913
905
return result
914
906
915
907
# --------
0 commit comments