Skip to content

Commit 435ca72

Browse files
jorisvandenbosscheKevin D Smith
authored and
Kevin D Smith
committed
ENH: nullable Float32/64 ExtensionArray (pandas-dev#34307)
1 parent f661a2f commit 435ca72

33 files changed

+1992
-47
lines changed

doc/source/whatsnew/v1.2.0.rst

+48
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,54 @@ Beginning with this version, the default is now to use the more accurate parser
109109
``floating_precision="legacy"`` to use the legacy parser. The change to using the higher precision
110110
parser by default should have no impact on performance. (:issue:`17154`)
111111

112+
.. _whatsnew_120.floating:
113+
114+
Experimental nullable data types for float data
115+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
116+
117+
We've added :class:`Float32Dtype` / :class:`Float64Dtype` and :class:`~arrays.FloatingArray`,
118+
an extension data type dedicated to floating point data that can hold the
119+
``pd.NA`` missing value indicator (:issue:`32265`, :issue:`34307`).
120+
121+
While the default float data type already supports missing values using ``np.nan``,
122+
this new data type uses ``pd.NA`` (and its corresponding behaviour) as missing
123+
value indicator, in line with the already existing nullable :ref:`integer <integer_na>`
124+
and :ref:`boolean <boolean>` data types.
125+
126+
One example where the behaviour of ``np.nan`` and ``pd.NA`` is different is
127+
comparison operations:
128+
129+
.. ipython:: python
130+
131+
# the default numpy float64 dtype
132+
s1 = pd.Series([1.5, None])
133+
s1
134+
s1 > 1
135+
136+
.. ipython:: python
137+
138+
# the new nullable float64 dtype
139+
s2 = pd.Series([1.5, None], dtype="Float64")
140+
s2
141+
s2 > 1
142+
143+
See the :ref:`missing_data.NA` doc section for more details on the behaviour
144+
when using the ``pd.NA`` missing value indicator.
145+
146+
As shown above, the dtype can be specified using the "Float64" or "Float32"
147+
string (capitalized to distinguish it from the default "float64" data type).
148+
Alternatively, you can also use the dtype object:
149+
150+
.. ipython:: python
151+
152+
pd.Series([1.5, None], dtype=pd.Float32Dtype())
153+
154+
.. warning::
155+
156+
Experimental: the new floating data types are currently experimental, and its
157+
behaviour or API may still change without warning. Expecially the behaviour
158+
regarding NaN (distinct from NA missing values) is subject to change.
159+
112160
.. _whatsnew_120.enhancements.other:
113161

114162
Other enhancements

pandas/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
UInt16Dtype,
5959
UInt32Dtype,
6060
UInt64Dtype,
61+
Float32Dtype,
62+
Float64Dtype,
6163
CategoricalDtype,
6264
PeriodDtype,
6365
IntervalDtype,

pandas/_testing.py

+1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
8585

8686
FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"]
87+
FLOAT_EA_DTYPES: List[Dtype] = ["Float32", "Float64"]
8788
COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"]
8889
STRING_DTYPES: List[Dtype] = [str, "str", "U"]
8990

pandas/arrays/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
BooleanArray,
88
Categorical,
99
DatetimeArray,
10+
FloatingArray,
1011
IntegerArray,
1112
IntervalArray,
1213
PandasArray,
@@ -20,6 +21,7 @@
2021
"BooleanArray",
2122
"Categorical",
2223
"DatetimeArray",
24+
"FloatingArray",
2325
"IntegerArray",
2426
"IntervalArray",
2527
"PandasArray",

pandas/conftest.py

+11
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,17 @@ def float_dtype(request):
978978
return request.param
979979

980980

981+
@pytest.fixture(params=tm.FLOAT_EA_DTYPES)
982+
def float_ea_dtype(request):
983+
"""
984+
Parameterized fixture for float dtypes.
985+
986+
* 'Float32'
987+
* 'Float64'
988+
"""
989+
return request.param
990+
991+
981992
@pytest.fixture(params=tm.COMPLEX_DTYPES)
982993
def complex_dtype(request):
983994
"""

pandas/core/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.algorithms import factorize, unique, value_counts
1515
from pandas.core.arrays import Categorical
1616
from pandas.core.arrays.boolean import BooleanDtype
17+
from pandas.core.arrays.floating import Float32Dtype, Float64Dtype
1718
from pandas.core.arrays.integer import (
1819
Int8Dtype,
1920
Int16Dtype,

pandas/core/arrays/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
from pandas.core.arrays.boolean import BooleanArray
77
from pandas.core.arrays.categorical import Categorical
88
from pandas.core.arrays.datetimes import DatetimeArray
9+
from pandas.core.arrays.floating import FloatingArray
910
from pandas.core.arrays.integer import IntegerArray, integer_array
1011
from pandas.core.arrays.interval import IntervalArray
12+
from pandas.core.arrays.masked import BaseMaskedArray
1113
from pandas.core.arrays.numpy_ import PandasArray, PandasDtype
1214
from pandas.core.arrays.period import PeriodArray, period_array
1315
from pandas.core.arrays.sparse import SparseArray
@@ -18,9 +20,11 @@
1820
"ExtensionArray",
1921
"ExtensionOpsMixin",
2022
"ExtensionScalarOpsMixin",
23+
"BaseMaskedArray",
2124
"BooleanArray",
2225
"Categorical",
2326
"DatetimeArray",
27+
"FloatingArray",
2428
"IntegerArray",
2529
"integer_array",
2630
"IntervalArray",

pandas/core/arrays/boolean.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,9 @@ class BooleanDtype(BaseMaskedDtype):
5858

5959
name = "boolean"
6060

61+
# mypy: https://github.com/python/mypy/issues/4125
6162
@property
62-
def type(self) -> Type[np.bool_]:
63+
def type(self) -> Type: # type: ignore[override]
6364
return np.bool_
6465

6566
@property
@@ -606,10 +607,9 @@ def logical_method(self, other):
606607
def _create_comparison_method(cls, op):
607608
@ops.unpack_zerodim_and_defer(op.__name__)
608609
def cmp_method(self, other):
609-
from pandas.arrays import IntegerArray
610+
from pandas.arrays import FloatingArray, IntegerArray
610611

611-
if isinstance(other, IntegerArray):
612-
# Rely on pandas to unbox and dispatch to us.
612+
if isinstance(other, (IntegerArray, FloatingArray)):
613613
return NotImplemented
614614

615615
mask = None

0 commit comments

Comments
 (0)