@@ -285,18 +285,18 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
285
285
note: an array of Periods will ignore sort as it returns an always sorted
286
286
PeriodIndex
287
287
"""
288
- from pandas import Index , Series , DatetimeIndex
288
+ from pandas import Index , Series , DatetimeIndex , PeriodIndex
289
289
290
290
vals = np .asarray (values )
291
291
292
- # localize to UTC
293
292
is_datetimetz_type = is_datetimetz (values )
294
- if is_datetimetz_type :
295
- values = DatetimeIndex ( values )
296
- vals = values .asi8
293
+ is_period_type = is_period_dtype ( values )
294
+ if is_datetimetz_type or is_period_type :
295
+ vals = values .view ( 'i8' )
297
296
298
297
is_datetime = is_datetime64_dtype (vals )
299
298
is_timedelta = is_timedelta64_dtype (vals )
299
+
300
300
(hash_klass , vec_klass ), vals = _get_data_algo (vals , _hashtables )
301
301
302
302
table = hash_klass (size_hint or len (vals ))
@@ -318,6 +318,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
318
318
uniques = uniques .astype ('M8[ns]' )
319
319
elif is_timedelta :
320
320
uniques = uniques .astype ('m8[ns]' )
321
+ elif is_period_type :
322
+ uniques = PeriodIndex (uniques , freq = values .freq )
321
323
if isinstance (values , Index ):
322
324
uniques = values ._shallow_copy (uniques , name = None )
323
325
elif isinstance (values , Series ):
@@ -362,7 +364,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
362
364
raise TypeError ("bins argument only works with numeric data." )
363
365
values = cat .codes
364
366
365
- if is_extension_type (values ) and not is_datetimetz (values ):
367
+ if (is_extension_type (values ) and
368
+ not (is_datetimetz (values ) or is_period (values ))):
366
369
# handle Categorical and sparse,
367
370
# datetime tz can be handeled in ndarray path
368
371
result = Series (values ).values .value_counts (dropna = dropna )
@@ -394,25 +397,14 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
394
397
395
398
def _value_counts_arraylike (values , dropna = True ):
396
399
is_datetimetz_type = is_datetimetz (values )
397
- is_period_type = (is_period_dtype (values ) or
398
- is_period_arraylike (values ))
399
-
400
+ is_period_type = is_period_dtype (values )
400
401
orig = values
401
402
402
403
from pandas .core .series import Series
403
- values = Series (values ).values
404
+ values = Series (values )._values
404
405
dtype = values .dtype
405
406
406
- if needs_i8_conversion (dtype ) or is_period_type :
407
-
408
- from pandas .tseries .index import DatetimeIndex
409
- from pandas .tseries .period import PeriodIndex
410
-
411
- if is_period_type :
412
- # values may be an object
413
- values = PeriodIndex (values )
414
- freq = values .freq
415
-
407
+ if needs_i8_conversion (dtype ):
416
408
values = values .view (np .int64 )
417
409
keys , counts = htable .value_count_int64 (values , dropna )
418
410
@@ -421,13 +413,14 @@ def _value_counts_arraylike(values, dropna=True):
421
413
keys , counts = keys [msk ], counts [msk ]
422
414
423
415
# convert the keys back to the dtype we came in
424
- keys = keys .astype (dtype )
425
-
426
- # dtype handling
427
416
if is_datetimetz_type :
417
+ from pandas .tseries .index import DatetimeIndex
428
418
keys = DatetimeIndex ._simple_new (keys , tz = orig .dtype .tz )
429
- if is_period_type :
430
- keys = PeriodIndex ._simple_new (keys , freq = freq )
419
+ elif is_period_type :
420
+ from pandas .tseries .period import PeriodIndex
421
+ keys = PeriodIndex ._simple_new (keys , freq = orig .dtype .freq )
422
+ else :
423
+ keys = keys .astype (dtype )
431
424
432
425
elif is_integer_dtype (dtype ):
433
426
values = _ensure_int64 (values )
@@ -471,9 +464,6 @@ def duplicated(values, keep='first'):
471
464
# no need to revert to original type
472
465
if needs_i8_conversion (dtype ):
473
466
values = values .view (np .int64 )
474
- elif is_period_arraylike (values ):
475
- from pandas .tseries .period import PeriodIndex
476
- values = PeriodIndex (values ).asi8
477
467
elif is_categorical_dtype (dtype ):
478
468
values = values .values .codes
479
469
elif isinstance (values , (ABCSeries , ABCIndex )):
@@ -1010,8 +1000,9 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
1010
1000
if is_categorical (arr ):
1011
1001
return arr .take_nd (indexer , fill_value = fill_value ,
1012
1002
allow_fill = allow_fill )
1013
- elif is_datetimetz (arr ):
1014
- return arr .take (indexer , fill_value = fill_value , allow_fill = allow_fill )
1003
+ elif is_extension_type (arr ):
1004
+ return arr .take (indexer , fill_value = fill_value ,
1005
+ allow_fill = allow_fill )
1015
1006
1016
1007
if indexer is None :
1017
1008
indexer = np .arange (arr .shape [axis ], dtype = np .int64 )
0 commit comments