@@ -262,64 +262,6 @@ def __init__(self, values, dtype=None, copy=False):
262
262
copy = False
263
263
elif not isinstance (values , pd .core .arrays .numeric .NumericArray ):
264
264
values = pd .array (values , copy = copy )
265
- else : # not isinstance(values, np.ndarray):
266
- if HAS_UNCERTAINTIES and dtype .kind == "O" :
267
- values = np .array (values , dtype = object , copy = copy )
268
- else :
269
- values = np .array (values , copy = copy )
270
- copy = False
271
- if HAS_UNCERTAINTIES :
272
- if np .issubdtype (values .dtype , np .floating ) or len (values ) == 0 :
273
- pass
274
- else :
275
- value_notna = [
276
- isinstance (v , UFloat )
277
- for v in values
278
- if not (pd .isna (v ) or unp .isnan (v ))
279
- ]
280
- if value_notna == []:
281
- # all NaNs, either from our own data, or from Pint/Pandas internals
282
- pa_nan = _ufloat_nan if dtype .kind == "O" else np .nan
283
- for i in range (len (values )):
284
- # Promote/demote NaNs to match non-NaN magnitudes
285
- values [i ] = pa_nan
286
- copy = False
287
- else :
288
- any_UFloats = any (value_notna )
289
- all_UFloats = all (value_notna )
290
- if any_UFloats != all_UFloats :
291
- # warnings.warn(
292
- # f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.",
293
- # category=RuntimeWarning,
294
- # )
295
- for i , v in enumerate (values ):
296
- # List comprehensions are great, but they are not np.arrays!
297
- if not isinstance (v , UFloat ):
298
- if pd .isna (v ):
299
- values [i ] = _ufloat_nan
300
- else :
301
- values [i ] = ufloat (v , 0 )
302
- elif unp .isnan (v ):
303
- # Do we need to canonicalize our NaNs?
304
- values [i ] = _ufloat_nan
305
- copy = False
306
- else :
307
- pa_nan = _ufloat_nan if any_UFloats else np .nan
308
- for i , v in enumerate (values ):
309
- # Promote/demote NaNs to match non-NaN magnitudes
310
- if pd .isna (v ) or unp .isnan (v ):
311
- values [i ] = pa_nan
312
- copy = False
313
- if not any_UFloats :
314
- values = values .astype (float )
315
- copy = False
316
- elif not np .issubdtype (values .dtype , np .floating ):
317
- warnings .warn (
318
- f"pint-pandas does not support magnitudes of { values .dtype } . Converting magnitudes to float." ,
319
- category = RuntimeWarning ,
320
- )
321
- values = values .astype (float )
322
- copy = False
323
265
if copy :
324
266
values = values .copy ()
325
267
self ._data = values
@@ -438,10 +380,11 @@ def isna(self):
438
380
"""
439
381
if HAS_UNCERTAINTIES :
440
382
# GH https://github.com/lebigot/uncertainties/issues/164
441
- if isinstance ( self . _data , np . ndarray ) and len (self ._data ) == 0 :
383
+ if len (self ._data ) == 0 :
442
384
# True or False doesn't matter--we just need the value for the type
443
385
return np .full ((0 ), True )
444
- return unp .isnan (self ._data )
386
+ elif isinstance (self ._data [0 ], UFloat ):
387
+ return unp .isnan (self ._data )
445
388
return self ._data .isna ()
446
389
447
390
def astype (self , dtype , copy = True ):
@@ -533,7 +476,8 @@ def take(self, indices, allow_fill=False, fill_value=None):
533
476
Examples
534
477
--------
535
478
"""
536
- from pandas .core .algorithms import take , is_scalar
479
+ from pandas .core .algorithms import take
480
+ from pandas .core .dtypes .common import is_scalar
537
481
538
482
data = self ._data
539
483
if allow_fill and fill_value is None :
@@ -592,8 +536,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
592
536
if dtype is None and isinstance (master_scalar , _Quantity ):
593
537
dtype = PintType (master_scalar .units )
594
538
595
- def quantify_nan (item ):
596
- if HAS_UNCERTAINTIES :
539
+ def quantify_nan (item , promote_to_ufloat ):
540
+ if promote_to_ufloat :
597
541
if type (item ) is UFloat :
598
542
return item * dtype .units
599
543
if type (item ) is float :
@@ -607,11 +551,19 @@ def quantify_nan(item):
607
551
return item
608
552
609
553
if isinstance (master_scalar , _Quantity ):
610
- scalars = [quantify_nan (item ) for item in scalars ]
554
+ if HAS_UNCERTAINTIES :
555
+ promote_to_ufloat = any ([isinstance (item .m , UFloat ) for item in scalars ])
556
+ else :
557
+ promote_to_ufloat = False
558
+ scalars = [quantify_nan (item , promote_to_ufloat ) for item in scalars ]
611
559
scalars = [
612
560
(item .to (dtype .units ).magnitude if hasattr (item , "to" ) else item )
613
561
for item in scalars
614
562
]
563
+ if HAS_UNCERTAINTIES :
564
+ promote_to_ufloat = any ([isinstance (item , UFloat ) for item in scalars ])
565
+ if promote_to_ufloat :
566
+ scalars = [item if isinstance (item , UFloat ) else _ufloat_nan if np .isnan (item ) else ufloat (item , 0 ) for item in scalars ]
615
567
return cls (scalars , dtype = dtype , copy = copy )
616
568
617
569
@classmethod
@@ -620,15 +572,90 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False):
620
572
dtype = PintType .construct_from_quantity_string (scalars [0 ])
621
573
return cls ._from_sequence ([dtype .ureg .Quantity (x ) for x in scalars ])
622
574
575
+ def factorize (
576
+ self ,
577
+ use_na_sentinel : bool = True ,
578
+ ) -> tuple [np .ndarray , ExtensionArray ]:
579
+ """
580
+ Encode the extension array as an enumerated type.
581
+
582
+ Parameters
583
+ ----------
584
+ use_na_sentinel : bool, default True
585
+ If True, the sentinel -1 will be used for NaN values. If False,
586
+ NaN values will be encoded as non-negative integers and will not drop the
587
+ NaN from the uniques of the values.
588
+
589
+ .. versionadded:: 1.5.0
590
+
591
+ Returns
592
+ -------
593
+ codes : ndarray
594
+ An integer NumPy array that's an indexer into the original
595
+ ExtensionArray.
596
+ uniques : ExtensionArray
597
+ An ExtensionArray containing the unique values of `self`.
598
+
599
+ .. note::
600
+
601
+ uniques will *not* contain an entry for the NA value of
602
+ the ExtensionArray if there are any missing values present
603
+ in `self`.
604
+
605
+ See Also
606
+ --------
607
+ factorize : Top-level factorize method that dispatches here.
608
+
609
+ Notes
610
+ -----
611
+ :meth:`pandas.factorize` offers a `sort` keyword as well.
612
+ """
613
+ # Implementer note: There are two ways to override the behavior of
614
+ # pandas.factorize
615
+ # 1. _values_for_factorize and _from_factorize.
616
+ # Specify the values passed to pandas' internal factorization
617
+ # routines, and how to convert from those values back to the
618
+ # original ExtensionArray.
619
+ # 2. ExtensionArray.factorize.
620
+ # Complete control over factorization.
621
+ if HAS_UNCERTAINTIES and self ._data .dtype .kind == 'O' :
622
+ arr , na_value = self ._values_for_factorize ()
623
+
624
+ if not use_na_sentinel :
625
+ # factorize can now handle differentiating various types of null values.
626
+ # These can only occur when the array has object dtype.
627
+ # However, for backwards compatibility we only use the null for the
628
+ # provided dtype. This may be revisited in the future, see GH#48476.
629
+ null_mask = isna (arr )
630
+ if null_mask .any ():
631
+ # Don't modify (potentially user-provided) array
632
+ arr = np .where (null_mask , na_value , arr )
633
+
634
+ codes = [- 1 ] * len (self .data )
635
+ # Note that item is a local variable provided in the loop below
636
+ vf = np .vectorize (lambda x : x == item , otypes = [bool ])
637
+ for code , item in enumerate (arr ):
638
+ code_mask = vf (self ._data )
639
+ codes = np .where (code_mask , code , codes )
640
+
641
+ uniques_ea = self ._from_factorized (arr , self )
642
+ return codes , uniques_ea
643
+ else :
644
+ return super (PintArray , self ).factorize (self , use_na_sentinel )
645
+
623
646
@classmethod
624
647
def _from_factorized (cls , values , original ):
625
648
return cls (values , dtype = original .dtype )
626
649
627
650
def _values_for_factorize (self ):
628
651
arr = self ._data
629
- if HAS_UNCERTAINTIES :
630
- return arr , _ufloat_nan
631
- return self ._data ._values_for_factorize ()
652
+ if HAS_UNCERTAINTIES and arr .dtype .kind == 'O' :
653
+ unique_data = []
654
+ for item in arr :
655
+ if item not in unique_data :
656
+ unique_data .append (item )
657
+ return np .array (unique_data ), _ufloat_nan
658
+ return arr ._values_for_factorize ()
632
659
633
660
def value_counts (self , dropna = True ):
634
661
"""
@@ -654,18 +681,26 @@ def value_counts(self, dropna=True):
654
681
655
682
# compute counts on the data with no nans
656
683
data = self ._data
657
- if HAS_UNCERTAINTIES :
684
+ if HAS_UNCERTAINTIES and data . dtype . kind == 'O' :
658
685
nafilt = unp .isnan (data )
686
+ na_value = _ufloat_nan
687
+ data = data [~ nafilt ]
688
+ unique_data = []
689
+ for item in data :
690
+ if item not in unique_data :
691
+ unique_data .append (item )
692
+ index = list (unique_data )
659
693
else :
660
694
nafilt = np .isnan (data )
661
- data = data [~ nafilt ]
695
+ na_value = np .nan
696
+ data = data [~ nafilt ]
697
+ index = list (set (data ))
662
698
663
699
data_list = data .tolist ()
664
- index = list (set (data ))
665
700
array = [data_list .count (item ) for item in index ]
666
701
667
702
if not dropna :
668
- index .append (np . nan )
703
+ index .append (na_value )
669
704
array .append (nafilt .sum ())
670
705
671
706
return Series (array , index = index )
@@ -679,7 +714,14 @@ def unique(self):
679
714
"""
680
715
from pandas import unique
681
716
682
- return self ._from_sequence (unique (self ._data ), dtype = self .dtype )
717
+ data = self ._data
718
+ if HAS_UNCERTAINTIES and data .dtype .kind == 'O' :
719
+ unique_data = []
720
+ for item in data :
721
+ if item not in unique_data :
722
+ unique_data .append (item )
723
+ return self ._from_sequence (pd .array (unique_data , dtype = data .dtype ), dtype = self .dtype )
724
+ return self ._from_sequence (unique (data ), dtype = self .dtype )
683
725
684
726
def __contains__ (self , item ) -> bool :
685
727
if not isinstance (item , _Quantity ):
0 commit comments