From 9667a597f591343be13355b1840e5e6796407827 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Tue, 7 Jun 2022 23:27:50 -0400 Subject: [PATCH 1/9] reindex default fill_value dtype --- pandas/core/internals/managers.py | 6 ++++++ pandas/tests/frame/methods/test_reindex.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f530c58bbd0c0..a30acb81262b7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -36,6 +36,7 @@ ensure_platform_int, is_1d_only_ea_dtype, is_dtype_equal, + is_float_dtype, is_list_like, ) from pandas.core.dtypes.dtypes import ExtensionDtype @@ -817,6 +818,11 @@ def _make_na_block( if fill_value is None: fill_value = np.nan + dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) + if is_float_dtype(dtype): + # GH45857 avoid unnecessary upcasting + fill_value = dtype.type(fill_value) + block_shape = list(self.shape) block_shape[0] = len(placement) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 8575e7895ae5a..fb2b86dea627a 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -950,6 +950,12 @@ def test_reindex_with_nans(self): expected = df.iloc[[1]] tm.assert_frame_equal(result, expected) + def test_reindex_without_upcasting(self): + # GH45857 + df = DataFrame(np.zeros((10, 10), dtype=np.float32)) + result = df.reindex(columns=np.arange(5, 15)).dtypes + assert result.eq(np.float32).all() + def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) From 95093d621fa02666c4a8f5110a8331c5ffb36878 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 8 Jun 2022 00:06:15 -0400 Subject: [PATCH 2/9] mypy --- pandas/core/internals/managers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a30acb81262b7..2ed9f9a685011 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -821,6 +821,7 @@ def _make_na_block( dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) if is_float_dtype(dtype): # GH45857 avoid unnecessary upcasting + dtype = cast(np.dtype, dtype) fill_value = dtype.type(fill_value) block_shape = list(self.shape) From c596952c99f4a7c2144d1ee7960b771fb177245a Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 2 Jul 2022 09:16:29 -0400 Subject: [PATCH 3/9] infer dtype logic --- pandas/core/internals/managers.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index cc040db4fd6af..49cb74751a4c9 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -820,18 +820,22 @@ def _make_na_block( nb = NumpyBlock(vals, placement, ndim=2) return nb + dtype = None if fill_value is None: fill_value = np.nan - dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) - if is_float_dtype(dtype): - # GH45857 avoid unnecessary upcasting + # GH45857 avoid unnecessary upcasting + idtype = interleaved_dtype([blk.dtype for blk in self.blocks]) + if is_float_dtype(idtype): + dtype = idtype dtype = cast(np.dtype, dtype) fill_value = dtype.type(fill_value) + if dtype is None: + dtype, fill_value = infer_dtype_from_scalar(fill_value) + block_shape = list(self.shape) block_shape[0] = len(placement) - dtype, fill_value = infer_dtype_from_scalar(fill_value) # error: Argument "dtype" to "empty" has incompatible type "Union[dtype, # ExtensionDtype]"; expected "Union[dtype, None, type, _SupportsDtype, str, # Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, From a1962129fde041f7c1d2e4af809c81513262a1c7 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 2 Jul 2022 10:19:05 -0400 Subject: [PATCH 4/9] one last try... infer_dtype_from_scalar for floats --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d356a858a82fb..1a3dbabbbc47a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -821,7 +821,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, dtype = np.array(val).dtype elif is_float(val): - if isinstance(val, np.floating): + if np.issubdtype(type(val), np.floating): dtype = np.dtype(type(val)) else: dtype = np.dtype(np.float64) From 109eb2d246da015317a0fd0a6505e6ff1b63d4af Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 2 Jul 2022 12:50:53 -0400 Subject: [PATCH 5/9] expand test --- pandas/core/dtypes/cast.py | 2 +- pandas/tests/frame/methods/test_reindex.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1a3dbabbbc47a..d356a858a82fb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -821,7 +821,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, dtype = np.array(val).dtype elif is_float(val): - if np.issubdtype(type(val), np.floating): + if isinstance(val, np.floating): dtype = np.dtype(type(val)) else: dtype = np.dtype(np.float64) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index fb2b86dea627a..8164a35fb9ca8 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -953,8 +953,13 @@ def test_reindex_with_nans(self): def test_reindex_without_upcasting(self): # GH45857 df = DataFrame(np.zeros((10, 10), dtype=np.float32)) - result = df.reindex(columns=np.arange(5, 15)).dtypes - assert result.eq(np.float32).all() + fill_value = np.float32(np.nan) + result = df.reindex(columns=np.arange(5, 15), fill_value=fill_value) + assert result.dtypes.eq(np.float32).all() + + df = DataFrame(np.zeros((10, 10), dtype=np.float32)) + result = df.reindex(columns=np.arange(5, 15)) + assert result.dtypes.eq(np.float32).all() def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) From 7e969b1248e5470d3061e39ee7601f7c56a57f2d Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 2 Jul 2022 15:17:02 -0400 Subject: [PATCH 6/9] try np.issubdtype --- pandas/core/internals/managers.py | 11 +++-------- pandas/tests/frame/methods/test_reindex.py | 5 ----- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 49cb74751a4c9..700f446b4e08f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -37,7 +37,6 @@ ensure_platform_int, is_1d_only_ea_dtype, is_dtype_equal, - is_float_dtype, is_list_like, ) from pandas.core.dtypes.dtypes import ExtensionDtype @@ -820,18 +819,14 @@ def _make_na_block( nb = NumpyBlock(vals, placement, ndim=2) return nb - dtype = None if fill_value is None: fill_value = np.nan # GH45857 avoid unnecessary upcasting - idtype = interleaved_dtype([blk.dtype for blk in self.blocks]) - if is_float_dtype(idtype): - dtype = idtype - dtype = cast(np.dtype, dtype) + dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) + if dtype is not None and np.issubdtype(dtype.type, np.floating): fill_value = dtype.type(fill_value) - if dtype is None: - dtype, fill_value = infer_dtype_from_scalar(fill_value) + dtype, fill_value = infer_dtype_from_scalar(fill_value) block_shape = list(self.shape) block_shape[0] = len(placement) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 8164a35fb9ca8..34142d9078383 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -952,11 +952,6 @@ def test_reindex_with_nans(self): def test_reindex_without_upcasting(self): # GH45857 - df = DataFrame(np.zeros((10, 10), dtype=np.float32)) - fill_value = np.float32(np.nan) - result = df.reindex(columns=np.arange(5, 15), fill_value=fill_value) - assert result.dtypes.eq(np.float32).all() - df = DataFrame(np.zeros((10, 10), dtype=np.float32)) result = df.reindex(columns=np.arange(5, 15)) assert result.dtypes.eq(np.float32).all() From 39bc9b687844bca8571f7e6a5159a125ec342921 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Feb 2024 14:10:27 -0800 Subject: [PATCH 7/9] Check for nan --- pandas/core/internals/managers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6504fb60728cc..e0bd7ea715258 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1119,7 +1119,6 @@ def _make_na_block( self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False ) -> Block: # Note: we only get here with self.ndim == 2 - if use_na_proxy: assert fill_value is None shape = (len(placement), self.shape[1]) @@ -1127,7 +1126,7 @@ def _make_na_block( nb = NumpyBlock(vals, placement, ndim=2) return nb - if fill_value is None: + if fill_value is None or fill_value is np.nan: fill_value = np.nan # GH45857 avoid unnecessary upcasting dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) From 3726907b9ed3854a18a44982049a40a32f294805 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:43:38 -0800 Subject: [PATCH 8/9] Whitespace --- pandas/core/internals/managers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 06010507ecd23..615bcd4602058 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1042,6 +1042,7 @@ def _make_na_block( self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False ) -> Block: # Note: we only get here with self.ndim == 2 + if use_na_proxy: assert fill_value is None shape = (len(placement), self.shape[1]) From 95069f3e55f53ccb69912c903014f13bd8c152cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:45:14 -0800 Subject: [PATCH 9/9] Add whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 81c3f88f7e8ad..53d2025448640 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -161,7 +161,7 @@ Numeric Conversion ^^^^^^^^^^ - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) -- +- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) Strings ^^^^^^^