Skip to content

Commit e1d367c

Browse files
Update to us pd.NA instead of np.nan / _ufloat_nan
To resolve the question of the proper na_value for EA dtypes (np.nan vs. uncertainties _ufloat_nan), use the gender-neutral pd.NA value. Signed-off-by: Michael Tiemann <[email protected]>
1 parent 6ddf204 commit e1d367c

File tree

2 files changed

+31
-34
lines changed

2 files changed

+31
-34
lines changed

pint_pandas/pint_array.py

+26-26
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,7 @@ def name(self):
148148

149149
@property
150150
def na_value(self):
151-
if HAS_UNCERTAINTIES:
152-
return self.ureg.Quantity(_ufloat_nan, self.units)
153-
return self.ureg.Quantity(np.nan, self.units)
151+
return self.ureg.Quantity(pd.NA, self.units)
154152

155153
def __hash__(self):
156154
# make myself hashable
@@ -383,8 +381,7 @@ def isna(self):
383381
if len(self._data) == 0:
384382
# True or False doesn't matter--we just need the value for the type
385383
return np.full((0), True)
386-
elif isinstance(self._data[0], UFloat):
387-
return unp.isnan(self._data)
384+
return self._data.map(lambda x: pd.isna(x) or (isinstance(x, UFloat) and unp.isnan(x)))
388385
return self._data.isna()
389386

390387
def astype(self, dtype, copy=True):
@@ -537,6 +534,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
537534
dtype = PintType(master_scalar.units)
538535

539536
def quantify_nan(item, promote_to_ufloat):
537+
if pd.isna(item):
538+
return dtype.ureg.Quantity(item, dtype.units)
539+
# FIXME: most of this code is never executed (except the final return)
540540
if promote_to_ufloat:
541541
if type(item) is UFloat:
542542
return item * dtype.units
@@ -551,28 +551,31 @@ def quantify_nan(item, promote_to_ufloat):
551551
return item
552552

553553
if isinstance(master_scalar, _Quantity):
554+
# A quantified master_scalar does not guarantee that we don't have NA and/or np.nan values in our scalars
554555
if HAS_UNCERTAINTIES:
555556
promote_to_ufloat = any(
556-
[isinstance(item.m, UFloat) for item in scalars]
557+
[isinstance(item.m, UFloat) for item in scalars if pd.notna(item)]
557558
)
558559
else:
559560
promote_to_ufloat = False
560-
scalars = [quantify_nan(item, promote_to_ufloat) for item in scalars]
561+
scalars = [item if isinstance(item, _Quantity) else quantify_nan(item, promote_to_ufloat) for item in scalars]
561562
scalars = [
562563
(item.to(dtype.units).magnitude if hasattr(item, "to") else item)
563564
for item in scalars
564565
]
565-
if HAS_UNCERTAINTIES:
566+
elif HAS_UNCERTAINTIES:
566567
promote_to_ufloat = any([isinstance(item, UFloat) for item in scalars])
567-
if promote_to_ufloat:
568-
scalars = [
569-
item
570-
if isinstance(item, UFloat)
571-
else _ufloat_nan
572-
if np.isnan(item)
573-
else ufloat(item, 0)
574-
for item in scalars
575-
]
568+
else:
569+
promote_to_ufloat = False
570+
if promote_to_ufloat:
571+
scalars = [
572+
item
573+
if isinstance(item, UFloat)
574+
else _ufloat_nan
575+
if pd.isna(item)
576+
else ufloat(item, 0)
577+
for item in scalars
578+
]
576579
return cls(scalars, dtype=dtype, copy=copy)
577580

578581
@classmethod
@@ -642,7 +645,7 @@ def factorize(
642645

643646
codes = [-1] * len(self.data)
644647
# Note that item is a local variable provided in the loop below
645-
vf = np.vectorize(lambda x: x == item, otypes=[bool])
648+
vf = np.vectorize(lambda x: True if (x_na:=pd.isna(x))*(item_na:=pd.isna(item)) else (x_na==item_na and x==item), otypes=[bool])
646649
for code, item in enumerate(arr):
647650
code_mask = vf(self._data)
648651
codes = np.where(code_mask, code, codes)
@@ -663,7 +666,7 @@ def _values_for_factorize(self):
663666
for item in arr:
664667
if item not in unique_data:
665668
unique_data.append(item)
666-
return np.array(unique_data), _ufloat_nan
669+
return np.array(unique_data), pd.NA
667670
return arr._values_for_factorize()
668671

669672
def value_counts(self, dropna=True):
@@ -690,19 +693,16 @@ def value_counts(self, dropna=True):
690693

691694
# compute counts on the data with no nans
692695
data = self._data
696+
nafilt = data.isna()
697+
na_value = pd.NA
698+
data = data[~nafilt]
693699
if HAS_UNCERTAINTIES and data.dtype.kind == "O":
694-
nafilt = unp.isnan(data)
695-
na_value = _ufloat_nan
696-
data = data[~nafilt]
697700
unique_data = []
698701
for item in data:
699702
if item not in unique_data:
700703
unique_data.append(item)
701704
index = list(unique_data)
702705
else:
703-
nafilt = np.isnan(data)
704-
na_value = np.nan
705-
data = data[~nafilt]
706706
index = list(set(data))
707707

708708
data_list = data.tolist()
@@ -883,7 +883,7 @@ def __array__(self, dtype=None, copy=False):
883883

884884
def _to_array_of_quantity(self, copy=False):
885885
qtys = [
886-
self._Q(item, self._dtype.units) if not pd.isna(item) else item
886+
self._Q(item, self._dtype.units) if item is not pd.NA else item
887887
for item in self._data
888888
]
889889
with warnings.catch_warnings(record=True):

pint_pandas/testsuite/test_pandas_extensiontests.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,9 @@ def data_missing(numeric_dtype):
193193
numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype)
194194
if HAS_UNCERTAINTIES:
195195
numeric_dtype = None
196-
dm = [_ufloat_nan, ufloat(1, 0)]
196+
dm = [pd.NA, ufloat(1, 0)]
197197
else:
198-
dm = [np.nan, 1]
198+
dm = [pd.NA, 1]
199199
return PintArray.from_1darray_quantity(
200200
ureg.Quantity(pd.array(dm, dtype=numeric_dtype), ureg.meter)
201201
)
@@ -260,9 +260,9 @@ def data_missing_for_sorting(numeric_dtype):
260260
numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype)
261261
if HAS_UNCERTAINTIES:
262262
numeric_dtype = None
263-
dms = [ufloat(4, 0), _ufloat_nan, ufloat(-5, 0)]
263+
dms = [ufloat(4, 0), pd.NA, ufloat(-5, 0)]
264264
else:
265-
dms = [4, np.nan, -5]
265+
dms = [4, pd.NA, -5]
266266
return PintArray.from_1darray_quantity(
267267
ureg.Quantity(pd.array(dms, dtype=numeric_dtype), ureg.centimeter)
268268
)
@@ -271,8 +271,6 @@ def data_missing_for_sorting(numeric_dtype):
271271
@pytest.fixture
272272
def na_cmp():
273273
"""Binary operator for comparing NA values."""
274-
if HAS_UNCERTAINTIES:
275-
return lambda x, y: bool(unp.isnan(x.magnitude)) & bool(unp.isnan(y.magnitude))
276274
return lambda x, y: bool(pd.isna(x.magnitude)) & bool(pd.isna(y.magnitude))
277275

278276

@@ -286,12 +284,11 @@ def data_for_grouping(numeric_dtype):
286284
a = 1.0
287285
b = 2.0**32 + 1
288286
c = 2.0**32 + 10
289-
_n = np.nan
287+
_n = pd.NA
290288
if HAS_UNCERTAINTIES:
291289
a = a + ufloat(0, 0)
292290
b = b + ufloat(0, 0)
293291
c = c + ufloat(0, 0)
294-
_n = _ufloat_nan
295292
numeric_dtype = None
296293
else:
297294
numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype)

0 commit comments

Comments
 (0)