From 7cfb01b9f3f83f4b994ef764b0cf3a98eff71fcc Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 2 Aug 2023 06:53:14 +0100 Subject: [PATCH 1/3] BUG: pd.array([]) should return masked array --- doc/source/whatsnew/v2.1.0.rst | 3 ++- pandas/core/construction.py | 2 +- pandas/tests/arrays/test_array.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 98465b5686ca1..3c34b3f59df5b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -628,7 +628,8 @@ Numeric Conversion ^^^^^^^^^^ - Bug in :func:`DataFrame.style.to_latex` and :func:`DataFrame.style.to_html` if the DataFrame contains integers with more digits than can be represented by floating point double precision (:issue:`52272`) -- Bug in :func:`array` when given a ``datetime64`` or ``timedelta64`` dtype with unit of "s", "us", or "ms" returning :class:`PandasArray` instead of :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`52859`) +- Bug in :func:`array` when given a ``datetime64`` or ``timedelta64`` dtype with unit of "s", "us", or "ms" returning :class:`NumpyExtensionArray` instead of :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`52859`) +- Bug in :func:`array` when given an empty list and no dtype returning :class:`NumpyExtensionArray` instead of :class:`FloatingArray` (:issue:`54371`) - Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`) - Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`) - Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 4ce6c35244e5b..b783c67414696 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -352,7 +352,7 @@ def array( return IntegerArray._from_sequence(data, copy=copy) elif ( - inferred_dtype in ("floating", "mixed-integer-float") + inferred_dtype in ("floating", "mixed-integer-float", "empty") and getattr(data, "dtype", None) != np.float16 ): # GH#44715 Exclude np.float16 bc FloatingArray does not support it; diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index b8b5e3588d48f..e14880774c0cd 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -47,6 +47,7 @@ def test_dt64_array(dtype_unit): "data, dtype, expected", [ # Basic NumPy defaults. + ([], None, FloatingArray._from_sequence([])), ([1, 2], None, IntegerArray._from_sequence([1, 2])), ([1, 2], object, NumpyExtensionArray(np.array([1, 2], dtype=object))), ( From 441311dc04cb3c4c260999c8edd343e301a89d99 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 2 Aug 2023 07:53:39 +0100 Subject: [PATCH 2/3] BUG: pd.array([]) should return masked array II --- pandas/core/construction.py | 5 +++-- pandas/tests/arrays/test_array.py | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b783c67414696..bdff940b78b1f 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -350,9 +350,10 @@ def array( elif inferred_dtype == "integer": return IntegerArray._from_sequence(data, copy=copy) - + elif inferred_dtype == "empty" and not hasattr(data, "dtype"): + return FloatingArray._from_sequence(data, copy=copy) elif ( - inferred_dtype in ("floating", "mixed-integer-float", "empty") + inferred_dtype in ("floating", "mixed-integer-float") and getattr(data, "dtype", None) != np.float16 ): # GH#44715 Exclude np.float16 bc FloatingArray does not support it; diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index e14880774c0cd..2746cd91963a0 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -55,6 +55,11 @@ def test_dt64_array(dtype_unit): np.dtype("float32"), NumpyExtensionArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), ), + ( + np.array([], dtype=object), + None, + NumpyExtensionArray(np.array([], dtype=object)), + ), (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])), ( np.array([1.0, 2.0], dtype="float64"), From 246991929bd9dca49e7bc40d89196714d62a8b74 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 2 Aug 2023 08:55:35 +0100 Subject: [PATCH 3/3] BUG: pd.array([]) should return masked array III --- pandas/core/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index bdff940b78b1f..5757c69bb6ec7 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -350,7 +350,7 @@ def array( elif inferred_dtype == "integer": return IntegerArray._from_sequence(data, copy=copy) - elif inferred_dtype == "empty" and not hasattr(data, "dtype"): + elif inferred_dtype == "empty" and not hasattr(data, "dtype") and not len(data): return FloatingArray._from_sequence(data, copy=copy) elif ( inferred_dtype in ("floating", "mixed-integer-float")