Skip to content

Commit 5b2e162

Browse files
BUG: preserve nullable dtype for float result in IntegerArray/BooleanArray arithmetic ops (#38178)
1 parent 6d0dab4 commit 5b2e162

File tree

15 files changed

+107
-44
lines changed

15 files changed

+107
-44
lines changed

doc/source/whatsnew/v1.2.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ Alternatively, you can also use the dtype object:
196196
197197
pd.Series([1.5, None], dtype=pd.Float32Dtype())
198198
199+
Operations with the existing integer or boolean nullable data types that
200+
give float results will now also use the nullable floating data types (:issue:`38178`).
201+
199202
.. warning::
200203

201204
Experimental: the new floating data types are currently experimental, and their

pandas/core/arrays/boolean.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -706,10 +706,11 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
706706
if (is_float_dtype(other) or is_float(other)) or (
707707
op_name in ["rtruediv", "truediv"]
708708
):
709-
result[mask] = np.nan
710-
return result
709+
from pandas.core.arrays import FloatingArray
710+
711+
return FloatingArray(result, mask, copy=False)
711712

712-
if is_bool_dtype(result):
713+
elif is_bool_dtype(result):
713714
return BooleanArray(result, mask, copy=False)
714715

715716
elif is_integer_dtype(result):

pandas/core/arrays/integer.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -539,13 +539,15 @@ def _cmp_method(self, other, op):
539539
return BooleanArray(result, mask)
540540

541541
def _arith_method(self, other, op):
542+
from pandas.core.arrays import FloatingArray
543+
542544
op_name = op.__name__
543545
omask = None
544546

545547
if getattr(other, "ndim", 0) > 1:
546548
raise NotImplementedError("can only perform ops with 1-d structures")
547549

548-
if isinstance(other, IntegerArray):
550+
if isinstance(other, (IntegerArray, FloatingArray)):
549551
other, omask = other._data, other._mask
550552

551553
elif is_list_like(other):
@@ -636,8 +638,9 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
636638
if (is_float_dtype(other) or is_float(other)) or (
637639
op_name in ["rtruediv", "truediv"]
638640
):
639-
result[mask] = np.nan
640-
return result
641+
from pandas.core.arrays import FloatingArray
642+
643+
return FloatingArray(result, mask, copy=False)
641644

642645
if result.dtype == "timedelta64[ns]":
643646
from pandas.core.arrays import TimedeltaArray

pandas/tests/arrays/boolean/test_arithmetic.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pandas as pd
77
import pandas._testing as tm
8+
from pandas.arrays import FloatingArray
89

910

1011
@pytest.fixture
@@ -51,13 +52,15 @@ def test_sub(left_array, right_array):
5152

5253

5354
def test_div(left_array, right_array):
54-
# for now division gives a float numpy array
5555
result = left_array / right_array
56-
expected = np.array(
57-
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
58-
dtype="float64",
56+
expected = FloatingArray(
57+
np.array(
58+
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
59+
dtype="float64",
60+
),
61+
np.array([False, False, True, False, False, True, True, True, True]),
5962
)
60-
tm.assert_numpy_array_equal(result, expected)
63+
tm.assert_extension_array_equal(result, expected)
6164

6265

6366
@pytest.mark.parametrize(

pandas/tests/arrays/boolean/test_function.py

+7
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ def test_value_counts_na():
8585
tm.assert_series_equal(result, expected)
8686

8787

88+
def test_value_counts_with_normalize():
89+
s = pd.Series([True, False, pd.NA], dtype="boolean")
90+
result = s.value_counts(normalize=True)
91+
expected = pd.Series([1, 1], index=[False, True], dtype="Float64") / 2
92+
tm.assert_series_equal(result, expected)
93+
94+
8895
def test_diff():
8996
a = pd.array(
9097
[True, True, False, False, True, None, True, None, False], dtype="boolean"

pandas/tests/arrays/floating/test_function.py

+7
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,13 @@ def test_value_counts_empty():
113113
tm.assert_series_equal(result, expected)
114114

115115

116+
def test_value_counts_with_normalize():
117+
s = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
118+
result = s.value_counts(normalize=True)
119+
expected = pd.Series([2, 1], index=[0.1, 0.2], dtype="Float64") / 3
120+
tm.assert_series_equal(result, expected)
121+
122+
116123
@pytest.mark.parametrize("skipna", [True, False])
117124
@pytest.mark.parametrize("min_count", [0, 4])
118125
def test_floating_array_sum(skipna, min_count, dtype):

pandas/tests/arrays/integer/test_arithmetic.py

+31-15
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import pandas as pd
99
import pandas._testing as tm
10-
from pandas.core.arrays import integer_array
10+
from pandas.core.arrays import FloatingArray, integer_array
1111
import pandas.core.ops as ops
1212

1313
# Basic test for the arithmetic array ops
@@ -45,24 +45,26 @@ def test_sub(dtype):
4545

4646

4747
def test_div(dtype):
48-
# for now division gives a float numpy array
4948
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
5049
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
5150

5251
result = a / b
53-
expected = np.array([np.inf, 2, np.nan, np.nan, 1.25], dtype="float64")
54-
tm.assert_numpy_array_equal(result, expected)
52+
expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
53+
tm.assert_extension_array_equal(result, expected)
5554

5655

5756
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
5857
def test_divide_by_zero(zero, negative):
5958
# https://github.com/pandas-dev/pandas/issues/27398
6059
a = pd.array([0, 1, -1, None], dtype="Int64")
6160
result = a / zero
62-
expected = np.array([np.nan, np.inf, -np.inf, np.nan])
61+
expected = FloatingArray(
62+
np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
63+
np.array([False, False, False, True]),
64+
)
6365
if negative:
6466
expected *= -1
65-
tm.assert_numpy_array_equal(result, expected)
67+
tm.assert_extension_array_equal(result, expected)
6668

6769

6870
def test_floordiv(dtype):
@@ -99,8 +101,11 @@ def test_pow_scalar():
99101
tm.assert_extension_array_equal(result, expected)
100102

101103
result = a ** np.nan
102-
expected = np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64")
103-
tm.assert_numpy_array_equal(result, expected)
104+
expected = FloatingArray(
105+
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
106+
np.array([False, False, False, True, False]),
107+
)
108+
tm.assert_extension_array_equal(result, expected)
104109

105110
# reversed
106111
a = a[1:] # Can't raise integers to negative powers.
@@ -118,8 +123,11 @@ def test_pow_scalar():
118123
tm.assert_extension_array_equal(result, expected)
119124

120125
result = np.nan ** a
121-
expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64")
122-
tm.assert_numpy_array_equal(result, expected)
126+
expected = FloatingArray(
127+
np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
128+
np.array([False, False, True, False]),
129+
)
130+
tm.assert_extension_array_equal(result, expected)
123131

124132

125133
def test_pow_array():
@@ -133,10 +141,10 @@ def test_pow_array():
133141
def test_rpow_one_to_na():
134142
# https://github.com/pandas-dev/pandas/issues/22022
135143
# https://github.com/pandas-dev/pandas/issues/29997
136-
arr = integer_array([np.nan, np.nan])
144+
arr = pd.array([np.nan, np.nan], dtype="Int64")
137145
result = np.array([1.0, 2.0]) ** arr
138-
expected = np.array([1.0, np.nan])
139-
tm.assert_numpy_array_equal(result, expected)
146+
expected = pd.array([1.0, np.nan], dtype="Float64")
147+
tm.assert_extension_array_equal(result, expected)
140148

141149

142150
@pytest.mark.parametrize("other", [0, 0.5])
@@ -198,11 +206,19 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators):
198206

199207
result = op(s, other)
200208
expected = op(s.astype(float), other)
209+
expected = expected.astype("Float64")
201210
# rfloordiv results in nan instead of inf
202211
if all_arithmetic_operators == "__rfloordiv__" and _np_version_under1p20:
203212
# for numpy 1.20 https://github.com/numpy/numpy/pull/16161
204213
# updated floordiv, now matches our behavior defined in core.ops
205-
expected[(expected == np.inf) | (expected == -np.inf)] = np.nan
214+
mask = (
215+
((expected == np.inf) | (expected == -np.inf)).fillna(False).to_numpy(bool)
216+
)
217+
expected.array._data[mask] = np.nan
218+
# rmod results in NaN that wasn't NA in original nullable Series -> unmask it
219+
elif all_arithmetic_operators == "__rmod__":
220+
mask = (s == 0).fillna(False).to_numpy(bool)
221+
expected.array._mask[mask] = False
206222

207223
tm.assert_series_equal(result, expected)
208224

@@ -215,7 +231,7 @@ def test_arithmetic_conversion(all_arithmetic_operators, other):
215231

216232
s = pd.Series([1, 2, 3], dtype="Int64")
217233
result = op(s, other)
218-
assert result.dtype is np.dtype("float")
234+
assert result.dtype == "Float64"
219235

220236

221237
def test_cross_type_arithmetic():

pandas/tests/arrays/integer/test_function.py

+8
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ def test_value_counts_empty():
127127
tm.assert_series_equal(result, expected)
128128

129129

130+
def test_value_counts_with_normalize():
131+
# GH 33172
132+
s = pd.Series([1, 2, 1, pd.NA], dtype="Int64")
133+
result = s.value_counts(normalize=True)
134+
expected = pd.Series([2, 1], index=[1, 2], dtype="Float64") / 3
135+
tm.assert_series_equal(result, expected)
136+
137+
130138
@pytest.mark.parametrize("skipna", [True, False])
131139
@pytest.mark.parametrize("min_count", [0, 4])
132140
def test_integer_array_sum(skipna, min_count, any_nullable_int_dtype):

pandas/tests/arrays/masked/test_arithmetic.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,7 @@ def test_array_scalar_like_equivalence(data, all_arithmetic_operators):
4343
for scalar in [scalar, data.dtype.type(scalar)]:
4444
result = op(data, scalar)
4545
expected = op(data, scalar_array)
46-
if isinstance(expected, ExtensionArray):
47-
tm.assert_extension_array_equal(result, expected)
48-
else:
49-
# TODO div still gives float ndarray -> remove this once we have Float EA
50-
tm.assert_numpy_array_equal(result, expected)
46+
tm.assert_extension_array_equal(result, expected)
5147

5248

5349
def test_array_NA(data, all_arithmetic_operators):

pandas/tests/arrays/string_/test_string.py

+12
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,18 @@ def test_value_counts_na(dtype, request):
495495
tm.assert_series_equal(result, expected)
496496

497497

498+
def test_value_counts_with_normalize(dtype, request):
499+
if dtype == "arrow_string":
500+
reason = "TypeError: boolean value of NA is ambiguous"
501+
mark = pytest.mark.xfail(reason=reason)
502+
request.node.add_marker(mark)
503+
504+
s = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
505+
result = s.value_counts(normalize=True)
506+
expected = pd.Series([2, 1], index=["a", "b"], dtype="Float64") / 3
507+
tm.assert_series_equal(result, expected)
508+
509+
498510
@pytest.mark.parametrize(
499511
"values, expected",
500512
[

pandas/tests/extension/test_boolean.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
130130
elif op_name in ("__truediv__", "__rtruediv__"):
131131
# combine with bools does not generate the correct result
132132
# (numpy behaviour for div is to regard the bools as numeric)
133-
expected = s.astype(float).combine(other, op)
133+
expected = s.astype(float).combine(other, op).astype("Float64")
134134
if op_name == "__rpow__":
135135
# for rpow, combine does not propagate NaN
136136
expected[result.isna()] = np.nan
@@ -235,6 +235,10 @@ def test_searchsorted(self, data_for_sorting, as_series):
235235
def test_value_counts(self, all_data, dropna):
236236
return super().test_value_counts(all_data, dropna)
237237

238+
@pytest.mark.skip(reason="uses nullable integer")
239+
def test_value_counts_with_normalize(self, data):
240+
pass
241+
238242
def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting):
239243
# override because there are only 2 unique values
240244

pandas/tests/extension/test_floating.py

+4
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ def test_value_counts(self, all_data, dropna):
184184

185185
self.assert_series_equal(result, expected)
186186

187+
@pytest.mark.skip(reason="uses nullable integer")
188+
def test_value_counts_with_normalize(self, data):
189+
pass
190+
187191

188192
class TestCasting(base.BaseCastingTests):
189193
pass

pandas/tests/extension/test_integer.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,7 @@ def _check_op(self, s, op, other, op_name, exc=NotImplementedError):
130130
expected = s.combine(other, op)
131131

132132
if op_name in ("__rtruediv__", "__truediv__", "__div__"):
133-
expected = expected.fillna(np.nan).astype(float)
134-
if op_name == "__rtruediv__":
135-
# TODO reverse operators result in object dtype
136-
result = result.astype(float)
133+
expected = expected.fillna(np.nan).astype("Float64")
137134
elif op_name.startswith("__r"):
138135
# TODO reverse operators result in object dtype
139136
# see https://github.com/pandas-dev/pandas/issues/22024
@@ -224,6 +221,10 @@ def test_value_counts(self, all_data, dropna):
224221

225222
self.assert_series_equal(result, expected)
226223

224+
@pytest.mark.skip(reason="uses nullable integer")
225+
def test_value_counts_with_normalize(self, data):
226+
pass
227+
227228

228229
class TestCasting(base.BaseCastingTests):
229230
pass

pandas/tests/extension/test_string.py

+4
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ class TestMethods(base.BaseMethodsTests):
118118
def test_value_counts(self, all_data, dropna):
119119
return super().test_value_counts(all_data, dropna)
120120

121+
@pytest.mark.skip(reason="returns nullable")
122+
def test_value_counts_with_normalize(self, data):
123+
pass
124+
121125

122126
class TestCasting(base.BaseCastingTests):
123127
pass

pandas/tests/series/test_arithmetic.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -838,14 +838,8 @@ class TestInplaceOperations:
838838
(
839839
("Int64", "Int64", "Int64", "Int64"),
840840
("float", "float", "float", "float"),
841-
("Int64", "float", "float", "float"),
842-
pytest.param(
843-
"Int64",
844-
"Float64",
845-
"Float64",
846-
"Float64",
847-
marks=pytest.mark.xfail(reason="Not implemented yet"),
848-
),
841+
("Int64", "float", "Float64", "Float64"),
842+
("Int64", "Float64", "Float64", "Float64"),
849843
),
850844
)
851845
def test_series_inplace_ops(self, dtype1, dtype2, dtype_expected, dtype_mul):

0 commit comments

Comments
 (0)