pandas-dev · jreback · Sep 30, 2020 · May 15, 2020 · May 22, 2020 · May 22, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -254,6 +254,66 @@ If needed you can adjust the bins with the argument ``offset`` (a Timedelta) tha
 
 For a full example, see: :ref:`timeseries.adjust-the-start-of-the-bins`.
 
+.. _whatsnew_110.floating:
+
+Experimental nullable data types for float data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We've added :class:`Float32Dtype` / :class:`Float64Dtype` and :class:`~arrays.FloatingArray`,
+an extension data type dedicated to floating point data that can hold the
+``pd.NA`` missing value indicator (:issue:`32265`, :issue:`34307`).
+
+While the default float data type already supports missing values using ``np.nan``,
+this new data type uses ``pd.NA`` (and its corresponding behaviour) as missing
+value indicator, in line with the already existing nullable :ref:`integer <integer_na>`
+and :ref:`boolean <boolean>` data types.
+
+One example where the behaviour of ``np.nan`` and ``pd.NA`` is different is
+comparison operations:
+
+.. code-block:: python
+
+  # the default numpy float64 dtype
+  >>> s1 = pd.Series([1.5, None])
+  >>> s1
+  0    1.5
+  1    NaN
+  dtype: float64
+
+  >>> s1 > 1
+  0     True
+  1    False
+  dtype: bool
+
+  # the new nullable float64 dtype
+  >>> s2 = pd.Series([1.5, None], dtype="Float64")
+  >>> s2
+  0    1.5
+  1   <NA>
+  dtype: Float64
+
+  >>> s2 > 1
+  0    True
+  1    <NA>
+  dtype: boolean
+
+See the :ref:`missing_data.NA` doc section for more details on the behaviour
+when using the ``pd.NA`` missing value indicator.
+
+As shown above, the dtype can be specified using the "Float64" or "Float32"
+string (capitalized to distinguish it from the default "float64" data type).
+Alternatively, you can also use the dtype object:
+
+.. ipython:: python
+
+   pd.Series([1.5, None], dtype=pd.Float32Dtype())
+
+.. warning::
+
+   Experimental: the new floating data types are currently experimental, and its
+   behaviour or API may still change without warning. Expecially the behaviour
+   regarding NaN (distinct from NA missing values) is subject to change.
+
 fsspec now used for filesystem handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -59,6 +59,8 @@
     UInt16Dtype,
     UInt32Dtype,
     UInt64Dtype,
+    Float32Dtype,
+    Float64Dtype,
     CategoricalDtype,
     PeriodDtype,
     IntervalDtype,

diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -84,6 +84,7 @@
 ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
 
 FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"]
+FLOAT_EA_DTYPES: List[Dtype] = ["Float32", "Float64"]
 COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"]
 STRING_DTYPES: List[Dtype] = [str, "str", "U"]
 

diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py
@@ -7,6 +7,7 @@
     BooleanArray,
     Categorical,
     DatetimeArray,
+    FloatingArray,
     IntegerArray,
     IntervalArray,
     PandasArray,
@@ -20,6 +21,7 @@
     "BooleanArray",
     "Categorical",
     "DatetimeArray",
+    "FloatingArray",
     "IntegerArray",
     "IntervalArray",
     "PandasArray",

diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -14,6 +14,7 @@
 from pandas.core.algorithms import factorize, unique, value_counts
 from pandas.core.arrays import Categorical
 from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.floating import Float32Dtype, Float64Dtype
 from pandas.core.arrays.integer import (
     Int8Dtype,
     Int16Dtype,

diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -6,8 +6,10 @@
 from pandas.core.arrays.boolean import BooleanArray
 from pandas.core.arrays.categorical import Categorical
 from pandas.core.arrays.datetimes import DatetimeArray
+from pandas.core.arrays.floating import FloatingArray
 from pandas.core.arrays.integer import IntegerArray, integer_array
 from pandas.core.arrays.interval import IntervalArray
+from pandas.core.arrays.masked import BaseMaskedArray
 from pandas.core.arrays.numpy_ import PandasArray, PandasDtype
 from pandas.core.arrays.period import PeriodArray, period_array
 from pandas.core.arrays.sparse import SparseArray
@@ -18,9 +20,11 @@
     "ExtensionArray",
     "ExtensionOpsMixin",
     "ExtensionScalarOpsMixin",
+    "BaseMaskedArray",
     "BooleanArray",
     "Categorical",
     "DatetimeArray",
+    "FloatingArray",
     "IntegerArray",
     "integer_array",
     "IntervalArray",

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -606,10 +606,11 @@ def logical_method(self, other):
     @classmethod
     def _create_comparison_method(cls, op):
         def cmp_method(self, other):
-            from pandas.arrays import IntegerArray
+            from pandas.arrays import IntegerArray, FloatingArray
 
             if isinstance(
-                other, (ABCDataFrame, ABCSeries, ABCIndexClass, IntegerArray)
+                other,
+                (ABCDataFrame, ABCSeries, ABCIndexClass, IntegerArray, FloatingArray),
             ):
                 # Rely on pandas to unbox and dispatch to us.
                 return NotImplemented