Skip to content

DEPR: downcast inferring round floats to ints #53108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ Deprecations
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
- Deprecated downcasting behavior in :meth:`Series.interpolate`, :meth:`Series.fillna`, :meth:`DataFrame.interpolate`, :meth:`DataFrame.fillna`, with downcast="infer" and floating dtypes; in a future version these will not cast all-round floats to integer dtype, explicitly cast the result instead (:issue:`40988`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the keyword doing now? Only casting from int64 -> int8?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just object dtype

- Deprecated downcasting behavior in :meth:`Series.where` and :meth:`DataFrame.where` with floating dtypes; in a future version these will not cast all-round floats to integer dtype, explicitly cast the result instead (:issue:`40988`)
-

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ def pytest_collection_modifyitems(items, config) -> None:
"(Series|DataFrame).bool is now deprecated and will be removed "
"in future version of pandas",
),
(
"pandas.core.generic.NDFrame.clip",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add this one to the whatsnew as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated + green

"where downcasting from floating dtype to integer dtype is deprecated",
),
]

for item in items:
Expand Down
38 changes: 31 additions & 7 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
cast,
final,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -41,6 +42,7 @@
)
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.astype import (
Expand Down Expand Up @@ -422,7 +424,11 @@ def coerce_to_target_dtype(self, other) -> Block:

@final
def _maybe_downcast(
self, blocks: list[Block], downcast=None, using_cow: bool = False
self,
blocks: list[Block],
downcast=None,
using_cow: bool = False,
caller: str = "fillna",
) -> list[Block]:
if downcast is False:
return blocks
Expand All @@ -441,17 +447,35 @@ def _maybe_downcast(
if downcast is None:
return blocks

return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])
return extend_blocks(
[b._downcast_2d(downcast, using_cow, caller=caller) for b in blocks]
)

@final
@maybe_split
def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]:
def _downcast_2d(
self, dtype, using_cow: bool = False, *, caller: str = "fillna"
) -> list[Block]:
"""
downcast specialized to 2D case post-validation.

Refactored to allow use of maybe_split.
"""
new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)
if (
dtype == "infer"
and self.values.dtype.kind == "f"
and new_values.dtype.kind in "iu"
):
# GH#40988
warnings.warn(
f"{caller} downcasting from floating dtype to integer dtype is "
"deprecated. In a future version this will retain floating "
"dtype. To retain the old behavior, explicitly cast the result "
"to integer dtype",
FutureWarning,
stacklevel=find_stack_level(),
)
new_values = maybe_coerce_values(new_values)
refs = self.refs if using_cow and new_values is self.values else None
return [self.make_block(new_values, refs=refs)]
Expand Down Expand Up @@ -1194,7 +1218,7 @@ def where(
block = self.coerce_to_target_dtype(other)
blocks = block.where(orig_other, cond, using_cow=using_cow)
return self._maybe_downcast(
blocks, downcast=_downcast, using_cow=using_cow
blocks, downcast=_downcast, using_cow=using_cow, caller="where"
)

else:
Expand Down Expand Up @@ -1388,7 +1412,7 @@ def interpolate(
)

nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow)
return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")

def diff(self, n: int, axis: AxisInt = 1) -> list[Block]:
"""return block for the diff of the values"""
Expand Down Expand Up @@ -1671,7 +1695,7 @@ def where(
blk = self.coerce_to_target_dtype(orig_other)
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
return self._maybe_downcast(
nbs, downcast=_downcast, using_cow=using_cow
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
)

elif isinstance(self, NDArrayBackedExtensionBlock):
Expand All @@ -1680,7 +1704,7 @@ def where(
blk = self.coerce_to_target_dtype(orig_other)
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
return self._maybe_downcast(
nbs, downcast=_downcast, using_cow=using_cow
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
)

else:
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,26 +207,29 @@ def __internal_pivot_table(
to_unstack.append(i)
else:
to_unstack.append(name)
table = agged.unstack(to_unstack)
table = agged.unstack(to_unstack, fill_value=fill_value)

if not dropna:
if isinstance(table.index, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.index.levels), names=table.index.names
)
table = table.reindex(m, axis=0)
table = table.reindex(m, axis=0, fill_value=fill_value)

if isinstance(table.columns, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.columns.levels), names=table.columns.names
)
table = table.reindex(m, axis=1)
table = table.reindex(m, axis=1, fill_value=fill_value)

if sort is True and isinstance(table, ABCDataFrame):
table = table.sort_index(axis=1)

if fill_value is not None:
table = table.fillna(fill_value, downcast="infer")
table = table.fillna(fill_value)
table = table.infer_objects()
if aggfunc is len and not observed and lib.is_integer(fill_value):
table = table.astype(np.int64)

if margins:
if dropna:
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,9 @@ def test_where_bug_transposition(self):
expected = a.copy()
expected[~do_not_replace] = b

result = a.where(do_not_replace, b)
msg = "where downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = a.where(do_not_replace, b)
tm.assert_frame_equal(result, expected)

a = DataFrame({0: [4, 6], 1: [1, 0]})
Expand All @@ -358,7 +360,9 @@ def test_where_bug_transposition(self):
expected = a.copy()
expected[~do_not_replace] = b

result = a.where(do_not_replace, b)
msg = "where downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = a.where(do_not_replace, b)
tm.assert_frame_equal(result, expected)

def test_where_datetime(self):
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/frame/methods/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ def test_clip_with_na_args(self, float_frame):
# GH#19992 and adjusted in GH#40420
df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})

result = df.clip(lower=[4, 5, np.nan], axis=0)
msg = "where downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.clip(lower=[4, 5, np.nan], axis=0)
expected = DataFrame(
{"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
)
Expand All @@ -161,7 +163,9 @@ def test_clip_with_na_args(self, float_frame):
data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
df = DataFrame(data)
t = Series([2, -4, np.NaN, 6, 3])
result = df.clip(lower=t, axis=0)
msg = "where downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.clip(lower=t, axis=0)
expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
tm.assert_frame_equal(result, expected)

Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,13 +274,17 @@ def test_fillna_downcast(self):
# GH#15277
# infer int64 from float64
df = DataFrame({"a": [1.0, np.nan]})
result = df.fillna(0, downcast="infer")
msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.fillna(0, downcast="infer")
expected = DataFrame({"a": [1, 0]})
tm.assert_frame_equal(result, expected)

# infer int64 from float64 when fillna value is a dict
df = DataFrame({"a": [1.0, np.nan]})
result = df.fillna({"a": 0}, downcast="infer")
msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.fillna({"a": 0}, downcast="infer")
expected = DataFrame({"a": [1, 0]})
tm.assert_frame_equal(result, expected)

Expand All @@ -306,7 +310,9 @@ def test_fillna_downcast_noop(self, frame_or_series):
tm.assert_equal(res, expected)

obj2 = obj.astype(np.float64)
res2 = obj2.fillna("foo", downcast="infer")
msg2 = "fillna downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
res2 = obj2.fillna("foo", downcast="infer")
expected2 = obj # get back int64
tm.assert_equal(res2, expected2)

Expand Down
18 changes: 15 additions & 3 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,11 @@ def test_interp_combo(self):
expected = Series([1.0, 2.0, 3.0, 4.0], name="A")
tm.assert_series_equal(result, expected)

result = df["A"].interpolate(downcast="infer")
msg = (
"interpolate downcasting from floating dtype to integer dtype is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df["A"].interpolate(downcast="infer")
expected = Series([1, 2, 3, 4], name="A")
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -228,7 +232,11 @@ def test_interp_alt_scipy(self):
expected.loc[5, "A"] = 6
tm.assert_frame_equal(result, expected)

result = df.interpolate(method="barycentric", downcast="infer")
msg = (
"interpolate downcasting from floating dtype to integer dtype is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.interpolate(method="barycentric", downcast="infer")
tm.assert_frame_equal(result, expected.astype(np.int64))

result = df.interpolate(method="krogh")
Expand Down Expand Up @@ -352,7 +360,11 @@ def test_interp_inplace(self, using_copy_on_write):
tm.assert_frame_equal(result, expected)

result = df.copy()
return_value = result["a"].interpolate(inplace=True, downcast="infer")
msg = (
"interpolate downcasting from floating dtype to integer dtype is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = result["a"].interpolate(inplace=True, downcast="infer")
assert return_value is None
if using_copy_on_write:
tm.assert_frame_equal(result, expected_cow)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,7 +1235,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):

expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C")
if operation == "agg":
expected = expected.fillna(0, downcast="infer")
expected = expected.fillna(0).astype(np.int64)
grouped = df_cat.groupby(["A", "B"], observed=observed)["C"]
result = getattr(grouped, operation)(sum)
tm.assert_series_equal(result, expected)
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/series/methods/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,14 @@ def test_clip_with_na_args(self):
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))

# GH#19992
tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, 3]))
tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, 2, 1]))
msg = "where downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(lower=[0, 4, np.nan])
tm.assert_series_equal(res, Series([1, 4, 3]))

with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(upper=[1, np.nan, 1])
tm.assert_series_equal(res, Series([1, 2, 1]))

# GH#40420
s = Series([1, 2, 3])
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/series/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,17 @@ def test_fillna_downcast(self):
# GH#15277
# infer int64 from float64
ser = Series([1.0, np.nan])
result = ser.fillna(0, downcast="infer")
msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.fillna(0, downcast="infer")
expected = Series([1, 0])
tm.assert_series_equal(result, expected)

# infer int64 from float64 when fillna value is a dict
ser = Series([1.0, np.nan])
result = ser.fillna({1: 0}, downcast="infer")
msg = "fillna downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.fillna({1: 0}, downcast="infer")
expected = Series([1, 0])
tm.assert_series_equal(result, expected)

Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/series/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,14 +297,19 @@ def test_interp_scipy_basic(self):
result = s.interpolate(method="nearest")
tm.assert_series_equal(result, expected.astype("float"))

result = s.interpolate(method="nearest", downcast="infer")
msg = (
"interpolate downcasting from floating dtype to integer dtype is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="nearest", downcast="infer")
tm.assert_series_equal(result, expected)
# zero
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method="zero")
tm.assert_series_equal(result, expected.astype("float"))

result = s.interpolate(method="zero", downcast="infer")
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="zero", downcast="infer")
tm.assert_series_equal(result, expected)
# quadratic
# GH #15662.
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ def test_reindex_pad():
result = s.reindex(new_index).ffill()
tm.assert_series_equal(result, expected.astype("float64"))

result = s.reindex(new_index).ffill(downcast="infer")
msg = "interpolate downcasting from floating dtype to integer dtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.reindex(new_index).ffill(downcast="infer")
tm.assert_series_equal(result, expected)

expected = Series([1, 5, 3, 5], index=new_index)
Expand Down