@@ -285,18 +285,27 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
285
285
note: an array of Periods will ignore sort as it returns an always sorted
286
286
PeriodIndex
287
287
"""
288
- from pandas import Index , Series , DatetimeIndex
289
-
290
- vals = np .asarray (values )
291
-
292
- # localize to UTC
293
- is_datetimetz_type = is_datetimetz (values )
294
- if is_datetimetz_type :
295
- values = DatetimeIndex (values )
296
- vals = values .asi8
288
+ from pandas import Index , Series , DatetimeIndex , PeriodIndex
289
+
290
+ # handling two possibilities here
291
+ # - for a numpy datetimelike simply view as i8 then cast back
292
+ # - for an extension datetimelike view as i8 then
293
+ # reconstruct from boxed values to transfer metadata
294
+ dtype = None
295
+ if needs_i8_conversion (values ):
296
+ if is_period_dtype (values ):
297
+ values = PeriodIndex (values )
298
+ vals = values .asi8
299
+ elif is_datetimetz (values ):
300
+ values = DatetimeIndex (values )
301
+ vals = values .asi8
302
+ else :
303
+ # numpy dtype
304
+ dtype = values .dtype
305
+ vals = values .view (np .int64 )
306
+ else :
307
+ vals = np .asarray (values )
297
308
298
- is_datetime = is_datetime64_dtype (vals )
299
- is_timedelta = is_timedelta64_dtype (vals )
300
309
(hash_klass , vec_klass ), vals = _get_data_algo (vals , _hashtables )
301
310
302
311
table = hash_klass (size_hint or len (vals ))
@@ -311,13 +320,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
311
320
uniques , labels = safe_sort (uniques , labels , na_sentinel = na_sentinel ,
312
321
assume_unique = True )
313
322
314
- if is_datetimetz_type :
315
- # reset tz
316
- uniques = values ._shallow_copy (uniques )
317
- elif is_datetime :
318
- uniques = uniques .astype ('M8[ns]' )
319
- elif is_timedelta :
320
- uniques = uniques .astype ('m8[ns]' )
323
+ if dtype is not None :
324
+ uniques = uniques .astype (dtype )
325
+
321
326
if isinstance (values , Index ):
322
327
uniques = values ._shallow_copy (uniques , name = None )
323
328
elif isinstance (values , Series ):
0 commit comments