17
17
from pandas .types .common import (is_numeric_dtype ,
18
18
is_timedelta64_dtype , is_datetime64_dtype ,
19
19
is_categorical_dtype ,
20
+ is_interval_dtype ,
20
21
is_datetimelike ,
21
22
is_datetime64_any_dtype ,
22
23
is_bool , is_integer_dtype ,
39
40
40
41
from pandas .core .base import (PandasObject , SelectionMixin , GroupByError ,
41
42
DataError , SpecificationError )
43
+ from pandas .core .index import (Index , MultiIndex ,
44
+ CategoricalIndex , _ensure_index )
42
45
from pandas .core .categorical import Categorical
43
46
from pandas .core .frame import DataFrame
44
47
from pandas .core .generic import NDFrame
45
- from pandas .core .interval import IntervalIndex
46
48
from pandas .core .internals import BlockManager , make_block
47
49
from pandas .core .series import Series
48
50
from pandas .core .panel import Panel
@@ -2579,7 +2581,7 @@ def _convert_grouper(axis, grouper):
2579
2581
return grouper .reindex (axis )._values
2580
2582
elif isinstance (grouper , (list , Series , Index , np .ndarray )):
2581
2583
if len (grouper ) != len (axis ):
2582
- raise AssertionError ('Grouper and axis must be same length' )
2584
+ raise ValueError ('Grouper and axis must be same length' )
2583
2585
return grouper
2584
2586
else :
2585
2587
return grouper
@@ -3063,36 +3065,41 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
3063
3065
3064
3066
if bins is None :
3065
3067
lab , lev = algos .factorize (val , sort = True )
3068
+ llab = lambda lab , inc : lab [inc ]
3066
3069
else :
3067
- raise NotImplementedError ('this is broken' )
3068
- lab , bins = cut (val , bins , retbins = True )
3069
- # bins[:-1] for backward compat;
3070
- # o.w. cat.categories could be better
3071
- # cat = Categorical(cat)
3072
- # lab, lev, dropna = cat.codes, bins[:-1], False
3073
-
3074
- if (lab .dtype == object
3075
- and lib .is_interval_array_fixed_closed (lab [notnull (lab )])):
3076
- lab_index = Index (lab )
3077
- assert isinstance (lab , IntervalIndex )
3078
- sorter = np .lexsort ((lab_index .left , lab_index .right , ids ))
3070
+
3071
+ # lab is an IntervalIndex
3072
+ # we get our last level of labels from the
3073
+ # II indexer
3074
+ # TODO: make this a method on II
3075
+ lab , _ , lev = cut (val , bins , retbins = True , include_lowest = True )
3076
+
3077
+ # we compute the levels here rather than use the bins
3078
+ # because we may have adjusted them with include_lowest
3079
+ llab = lambda lab , inc : lab [inc ]._multiindex .labels [- 1 ]
3080
+
3081
+ if is_interval_dtype (lab ):
3082
+ # TODO: should we do this inside II?
3083
+ sorter = np .lexsort ((lab .left , lab .right , ids ))
3079
3084
else :
3080
3085
sorter = np .lexsort ((lab , ids ))
3086
+
3081
3087
ids , lab = ids [sorter ], lab [sorter ]
3082
3088
3083
3089
# group boundaries are where group ids change
3084
3090
idx = np .r_ [0 , 1 + np .nonzero (ids [1 :] != ids [:- 1 ])[0 ]]
3085
3091
3086
3092
# new values are where sorted labels change
3087
- inc = np .r_ [True , lab [1 :] != lab [:- 1 ]]
3093
+ lchanges = llab (lab , slice (1 , None )) != llab (lab , slice (None , - 1 ))
3094
+ inc = np .r_ [True , lchanges ]
3088
3095
inc [idx ] = True # group boundaries are also new values
3089
3096
out = np .diff (np .nonzero (np .r_ [inc , True ])[0 ]) # value counts
3090
3097
3091
3098
# num. of times each group should be repeated
3092
3099
rep = partial (np .repeat , repeats = np .add .reduceat (inc , idx ))
3093
3100
3094
3101
# multi-index components
3095
- labels = list (map (rep , self .grouper .recons_labels )) + [lab [ inc ] ]
3102
+ labels = list (map (rep , self .grouper .recons_labels )) + [llab ( lab , inc ) ]
3096
3103
levels = [ping .group_index for ping in self .grouper .groupings ] + [lev ]
3097
3104
names = self .grouper .names + [self .name ]
3098
3105
@@ -3118,13 +3125,12 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
3118
3125
acc = rep (d )
3119
3126
out /= acc
3120
3127
3121
- if sort : # and bins is None:
3128
+ if sort and bins is None :
3122
3129
cat = ids [inc ][mask ] if dropna else ids [inc ]
3123
3130
sorter = np .lexsort ((out if ascending else - out , cat ))
3124
3131
out , labels [- 1 ] = out [sorter ], labels [- 1 ][sorter ]
3125
3132
3126
- # if bins is None:
3127
- if True :
3133
+ if bins is None :
3128
3134
mi = MultiIndex (levels = levels , labels = labels , names = names ,
3129
3135
verify_integrity = False )
3130
3136
0 commit comments