6
6
import numpy as np
7
7
import pandas as pd
8
8
import pint
9
- from pandas import DataFrame , Series
9
+ from pandas import DataFrame , Series , Index
10
10
from pandas .api .extensions import (
11
11
ExtensionArray ,
12
12
ExtensionDtype ,
27
27
# quantify/dequantify
28
28
NO_UNIT = "No Unit"
29
29
30
+ # from pint.facets.plain.quantity import PlainQuantity as _Quantity
31
+ # from pint.facets.plain.unit import PlainUnit as _Unit
30
32
31
33
class PintType (ExtensionDtype ):
32
34
"""
@@ -65,7 +67,7 @@ def __new__(cls, units=None):
65
67
if not isinstance (units , _Unit ):
66
68
units = cls ._parse_dtype_strict (units )
67
69
# ureg.unit returns a quantity with a magnitude of 1
68
- # eg 1 mm. Initialising a quantity and taking it's unit
70
+ # eg 1 mm. Initialising a quantity and taking its unit
69
71
# TODO: Seperate units from quantities in pint
70
72
# to simplify this bit
71
73
units = cls .ureg .Quantity (1 , units ).units
@@ -195,8 +197,8 @@ def __repr__(self):
195
197
float : pd .Float64Dtype (),
196
198
np .float64 : pd .Float64Dtype (),
197
199
np .float32 : pd .Float32Dtype (),
198
- np .complex128 : pd .core .dtypes .dtypes .PandasDtype ("complex128" ),
199
- np .complex64 : pd .core .dtypes .dtypes .PandasDtype ("complex64" ),
200
+ np .complex128 : pd .core .dtypes .dtypes .NumpyEADtype ("complex128" ),
201
+ np .complex64 : pd .core .dtypes .dtypes .NumpyEADtype ("complex64" ),
200
202
# np.float16: pd.Float16Dtype(),
201
203
}
202
204
dtypeunmap = {v : k for k , v in dtypemap .items ()}
@@ -250,7 +252,6 @@ def __init__(self, values, dtype=None, copy=False):
250
252
copy = False
251
253
elif not isinstance (values , pd .core .arrays .numeric .NumericArray ):
252
254
values = pd .array (values , copy = copy )
253
- copy = False
254
255
if copy :
255
256
values = values .copy ()
256
257
self ._data = values
@@ -309,12 +310,22 @@ def __setitem__(self, key, value):
309
310
# doing nothing here seems to be ok
310
311
return
311
312
313
+ master_scalar = None
314
+ try :
315
+ master_scalar = next (i for i in self ._data if pd .notna (i ))
316
+ except StopIteration :
317
+ pass
318
+
312
319
if isinstance (value , _Quantity ):
313
320
value = value .to (self .units ).magnitude
314
- elif is_list_like (value ) and len (value ) > 0 and isinstance (value [0 ], _Quantity ):
315
- value = [item .to (self .units ).magnitude for item in value ]
321
+ elif is_list_like (value ) and len (value ) > 0 :
322
+ if isinstance (value [0 ], _Quantity ):
323
+ value = [item .to (self .units ).magnitude for item in value ]
324
+ if len (value ) == 1 :
325
+ value = value [0 ]
316
326
317
327
key = check_array_indexer (self , key )
328
+ # Filter out invalid values for our array type(s)
318
329
try :
319
330
self ._data [key ] = value
320
331
except IndexError as e :
@@ -458,7 +469,8 @@ def take(self, indices, allow_fill=False, fill_value=None):
458
469
Examples
459
470
--------
460
471
"""
461
- from pandas .core .algorithms import take , is_scalar
472
+ from pandas .core .algorithms import take
473
+ from pandas .core .dtypes .common import is_scalar
462
474
463
475
data = self ._data
464
476
if allow_fill and fill_value is None :
@@ -470,7 +482,10 @@ def take(self, indices, allow_fill=False, fill_value=None):
470
482
# magnitude is in fact an array scalar, which will get rejected by pandas.
471
483
fill_value = fill_value [()]
472
484
473
- result = take (data , indices , fill_value = fill_value , allow_fill = allow_fill )
485
+ with warnings .catch_warnings ():
486
+ warnings .simplefilter ("ignore" )
487
+ # Turn off warning that PandasArray is deprecated for ``take``
488
+ result = take (data , indices , fill_value = fill_value , allow_fill = allow_fill )
474
489
475
490
return PintArray (result , dtype = self .dtype )
476
491
@@ -512,22 +527,17 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
512
527
raise ValueError (
513
528
"Cannot infer dtype. No dtype specified and empty array"
514
529
)
515
- if dtype is None and not isinstance ( master_scalar , _Quantity ) :
516
- raise ValueError ( "No dtype specified and not a sequence of quantities" )
517
- if dtype is None and isinstance ( master_scalar , _Quantity ):
530
+ if dtype is None :
531
+ if not isinstance ( master_scalar , _Quantity ):
532
+ raise ValueError ( "No dtype specified and not a sequence of quantities" )
518
533
dtype = PintType (master_scalar .units )
519
534
520
- def quantify_nan (item ):
521
- if type (item ) is float :
522
- return item * dtype .units
523
- return item
524
-
525
535
if isinstance (master_scalar , _Quantity ):
526
- scalars = [quantify_nan (item ) for item in scalars ]
527
536
scalars = [
528
537
(item .to (dtype .units ).magnitude if hasattr (item , "to" ) else item )
529
538
for item in scalars
530
539
]
540
+ # When creating empty arrays, make them large enoguh to hold UFloats in case we need to do so later
531
541
return cls (scalars , dtype = dtype , copy = copy )
532
542
533
543
@classmethod
@@ -538,10 +548,21 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False):
538
548
539
549
@classmethod
540
550
def _from_factorized (cls , values , original ):
551
+ from pandas ._libs .lib import infer_dtype
552
+
553
+ if infer_dtype (values ) != "object" :
554
+ values = pd .array (values , copy = False )
541
555
return cls (values , dtype = original .dtype )
542
556
543
557
def _values_for_factorize (self ):
544
- return self ._data ._values_for_factorize ()
558
+ # factorize can now handle differentiating various types of null values.
559
+ # These can only occur when the array has object dtype.
560
+ # However, for backwards compatibility we only use the null for the
561
+ # provided dtype. This may be revisited in the future, see GH#48476.
562
+ arr = self ._data
563
+ if arr .dtype .kind == "O" :
564
+ return np .array (arr , copy = False ), self .dtype .na_value .m
565
+ return arr ._values_for_factorize ()
545
566
546
567
def value_counts (self , dropna = True ):
547
568
"""
@@ -567,16 +588,17 @@ def value_counts(self, dropna=True):
567
588
568
589
# compute counts on the data with no nans
569
590
data = self ._data
570
- nafilt = np .isnan (data )
591
+ nafilt = pd .isna (data )
592
+ na_value = self .dtype .na_value .m
571
593
data = data [~ nafilt ]
594
+ index = list (set (data ))
572
595
573
596
data_list = data .tolist ()
574
- index = list (set (data ))
575
597
array = [data_list .count (item ) for item in index ]
576
598
577
599
if not dropna :
578
- index .append (np . nan )
579
- array .append (nafilt . sum ( ))
600
+ index .append (na_value )
601
+ array .append (len ( nafilt ))
580
602
581
603
return Series (array , index = index )
582
604
@@ -589,7 +611,8 @@ def unique(self):
589
611
"""
590
612
from pandas import unique
591
613
592
- return self ._from_sequence (unique (self ._data ), dtype = self .dtype )
614
+ data = self ._data
615
+ return self ._from_sequence (unique (data ), dtype = self .dtype )
593
616
594
617
def __contains__ (self , item ) -> bool :
595
618
if not isinstance (item , _Quantity ):
@@ -691,7 +714,7 @@ def convert_values(param):
691
714
else :
692
715
return param
693
716
694
- if isinstance (other , (Series , DataFrame )):
717
+ if isinstance (other , (Series , DataFrame , Index )):
695
718
return NotImplemented
696
719
lvalues = self .quantity
697
720
validate_length (lvalues , other )
@@ -740,7 +763,9 @@ def __array__(self, dtype=None, copy=False):
740
763
741
764
def _to_array_of_quantity (self , copy = False ):
742
765
qtys = [
743
- self ._Q (item , self ._dtype .units ) if not pd .isna (item ) else item
766
+ self ._Q (item , self ._dtype .units )
767
+ if item is not self .dtype .na_value .m
768
+ else self .dtype .na_value
744
769
for item in self ._data
745
770
]
746
771
with warnings .catch_warnings (record = True ):
@@ -798,7 +823,42 @@ def searchsorted(self, value, side="left", sorter=None):
798
823
value = [item .to (self .units ).magnitude for item in value ]
799
824
return arr .searchsorted (value , side = side , sorter = sorter )
800
825
801
- def _reduce (self , name , ** kwds ):
826
+ def map (self , mapper , na_action = None ):
827
+ """
828
+ Map values using an input mapping or function.
829
+
830
+ Parameters
831
+ ----------
832
+ mapper : function, dict, or Series
833
+ Mapping correspondence.
834
+ na_action : {None, 'ignore'}, default None
835
+ If 'ignore', propagate NA values, without passing them to the
836
+ mapping correspondence. If 'ignore' is not supported, a
837
+ ``NotImplementedError`` should be raised.
838
+
839
+ Returns
840
+ -------
841
+ If mapper is a function, operate on the magnitudes of the array and
842
+
843
+ """
844
+ if callable (mapper ) and len (self ):
845
+ from pandas ._libs import lib
846
+
847
+ # This converts PintArray into array of Quantities
848
+ values = self .astype (object , copy = False )
849
+ # Using _from_sequence allows for possibility that mapper changes units
850
+ if na_action is None :
851
+ arr = lib .map_infer (values , mapper , convert = True )
852
+ else :
853
+ arr = lib .map_infer_mask (
854
+ values , mapper , mask = pd .isna (values ).view (np .uint8 ), convert = True
855
+ )
856
+ # If mapper doesn't return a Quantity, this will raise a ValueError
857
+ return PintArray ._from_sequence (arr )
858
+ else :
859
+ return super ().map (mapper , na_action = na_action )
860
+
861
+ def _reduce (self , name , * , skipna : bool = True , keepdims : bool = False , ** kwds ):
802
862
"""
803
863
Return a scalar result of performing the reduction operation.
804
864
@@ -842,14 +902,20 @@ def _reduce(self, name, **kwds):
842
902
843
903
if isinstance (self ._data , ExtensionArray ):
844
904
try :
845
- result = self ._data ._reduce (name , ** kwds )
905
+ result = self ._data ._reduce (
906
+ name , skipna = skipna , keepdims = keepdims , ** kwds
907
+ )
846
908
except NotImplementedError :
847
909
result = functions [name ](self .numpy_data , ** kwds )
848
910
849
911
if name in {"all" , "any" , "kurt" , "skew" }:
850
912
return result
851
913
if name == "var" :
914
+ if keepdims :
915
+ return PintArray (result , f"pint[({ self .units } )**2]" )
852
916
return self ._Q (result , self .units ** 2 )
917
+ if keepdims :
918
+ return PintArray (result , self .dtype )
853
919
return self ._Q (result , self .units )
854
920
855
921
def _accumulate (self , name : str , * , skipna : bool = True , ** kwds ):
@@ -866,7 +932,6 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
866
932
result = self ._data ._accumulate (name , ** kwds )
867
933
except NotImplementedError :
868
934
result = functions [name ](self .numpy_data , ** kwds )
869
- print (result )
870
935
871
936
return self ._from_sequence (result , self .units )
872
937
0 commit comments