Skip to content

Commit 00fa270

Browse files
authored
API: Series(floaty, dtype=inty) (#49609)
1 parent a5cbd1e commit 00fa270

File tree

6 files changed

+36
-73
lines changed

6 files changed

+36
-73
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,7 @@ Other API changes
476476
- :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`)
477477
- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`)
478478
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
479+
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data)`` or ``DataFrame(data)`` to get the old behavior, and ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the specified dtype (:issue:`49599`)
479480
- Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)
480481
- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
481482
- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`)

pandas/core/construction.py

+3-52
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
DtypeObj,
2828
T,
2929
)
30-
from pandas.errors import IntCastingNaNError
3130

3231
from pandas.core.dtypes.base import (
3332
ExtensionDtype,
@@ -46,7 +45,6 @@
4645
is_datetime64_ns_dtype,
4746
is_dtype_equal,
4847
is_extension_array_dtype,
49-
is_float_dtype,
5048
is_integer_dtype,
5149
is_list_like,
5250
is_object_dtype,
@@ -503,7 +501,6 @@ def sanitize_array(
503501
copy: bool = False,
504502
*,
505503
allow_2d: bool = False,
506-
strict_ints: bool = False,
507504
) -> ArrayLike:
508505
"""
509506
Sanitize input data to an ndarray or ExtensionArray, copy if specified,
@@ -517,8 +514,6 @@ def sanitize_array(
517514
copy : bool, default False
518515
allow_2d : bool, default False
519516
If False, raise if we have a 2D Arraylike.
520-
strict_ints : bool, default False
521-
If False, silently ignore failures to cast float data to int dtype.
522517
523518
Returns
524519
-------
@@ -571,32 +566,7 @@ def sanitize_array(
571566
if isinstance(data, np.matrix):
572567
data = data.A
573568

574-
if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
575-
# possibility of nan -> garbage
576-
try:
577-
# GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
578-
# casting aligning with IntCastingNaNError below
579-
with np.errstate(invalid="ignore"):
580-
# GH#15832: Check if we are requesting a numeric dtype and
581-
# that we can convert the data to the requested dtype.
582-
subarr = maybe_cast_to_integer_array(data, dtype)
583-
584-
except IntCastingNaNError:
585-
raise
586-
except ValueError:
587-
# Pre-2.0, we would have different behavior for Series vs DataFrame.
588-
# DataFrame would call np.array(data, dtype=dtype, copy=copy),
589-
# which would cast to the integer dtype even if the cast is lossy.
590-
# See GH#40110.
591-
if strict_ints:
592-
raise
593-
594-
# We ignore the dtype arg and return floating values,
595-
# e.g. test_constructor_floating_data_int_dtype
596-
# TODO: where is the discussion that documents the reason for this?
597-
subarr = np.array(data, copy=copy)
598-
599-
elif dtype is None:
569+
if dtype is None:
600570
subarr = data
601571
if data.dtype == object:
602572
subarr = maybe_infer_to_datetimelike(data)
@@ -629,27 +599,8 @@ def sanitize_array(
629599
subarr = np.array([], dtype=np.float64)
630600

631601
elif dtype is not None:
632-
try:
633-
subarr = _try_cast(data, dtype, copy)
634-
except ValueError:
635-
if is_integer_dtype(dtype):
636-
if strict_ints:
637-
raise
638-
casted = np.array(data, copy=False)
639-
if casted.dtype.kind == "f":
640-
# GH#40110 match the behavior we have if we passed
641-
# a ndarray[float] to begin with
642-
return sanitize_array(
643-
casted,
644-
index,
645-
dtype,
646-
copy=False,
647-
allow_2d=allow_2d,
648-
)
649-
else:
650-
raise
651-
else:
652-
raise
602+
subarr = _try_cast(data, dtype, copy)
603+
653604
else:
654605
subarr = maybe_convert_platform(data)
655606
if subarr.dtype == object:

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ def __new__(
499499
data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
500500

501501
try:
502-
arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
502+
arr = sanitize_array(data, None, dtype=dtype, copy=copy)
503503
except ValueError as err:
504504
if "index must be specified when data is not list-like" in str(err):
505505
raise cls._raise_scalar_data_error(data) from err

pandas/tests/frame/test_constructors.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -2706,11 +2706,12 @@ def test_floating_values_integer_dtype(self):
27062706

27072707
arr = np.random.randn(10, 5)
27082708

2709-
# as of 2.0, we match Series behavior by retaining float dtype instead
2710-
# of doing a lossy conversion here. Below we _do_ do the conversion
2711-
# since it is lossless.
2712-
df = DataFrame(arr, dtype="i8")
2713-
assert (df.dtypes == "f8").all()
2709+
# GH#49599 in 2.0 we raise instead of either
2710+
# a) silently ignoring dtype and returningfloat (the old Series behavior) or
2711+
# b) rounding (the old DataFrame behavior)
2712+
msg = "Trying to coerce float values to integers"
2713+
with pytest.raises(ValueError, match=msg):
2714+
DataFrame(arr, dtype="i8")
27142715

27152716
df = DataFrame(arr.round(), dtype="i8")
27162717
assert (df.dtypes == "i8").all()

pandas/tests/series/test_constructors.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -801,11 +801,13 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
801801
# not clear if this is what we want long-term
802802
expected = frame_or_series(arr)
803803

804-
res = frame_or_series(arr, dtype="i8")
805-
tm.assert_equal(res, expected)
804+
# GH#49599 as of 2.0 we raise instead of silently retaining float dtype
805+
msg = "Trying to coerce float values to integer"
806+
with pytest.raises(ValueError, match=msg):
807+
frame_or_series(arr, dtype="i8")
806808

807-
res = frame_or_series(list(arr), dtype="i8")
808-
tm.assert_equal(res, expected)
809+
with pytest.raises(ValueError, match=msg):
810+
frame_or_series(list(arr), dtype="i8")
809811

810812
# pre-2.0, when we had NaNs, we silently ignored the integer dtype
811813
arr[0] = np.nan
@@ -815,7 +817,12 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
815817
with pytest.raises(IntCastingNaNError, match=msg):
816818
frame_or_series(arr, dtype="i8")
817819

818-
with pytest.raises(IntCastingNaNError, match=msg):
820+
exc = IntCastingNaNError
821+
if frame_or_series is Series:
822+
# TODO: try to align these
823+
exc = ValueError
824+
msg = "cannot convert float NaN to integer"
825+
with pytest.raises(exc, match=msg):
819826
# same behavior if we pass list instead of the ndarray
820827
frame_or_series(list(arr), dtype="i8")
821828

@@ -833,13 +840,14 @@ def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
833840
# see gh-15832
834841
# Updated: make sure we treat this list the same as we would treat
835842
# the equivalent ndarray
843+
# GH#49599 pre-2.0 we silently retained float dtype, in 2.0 we raise
836844
vals = [1, 2, 3.5]
837845

838-
res = Series(vals, dtype=any_int_numpy_dtype)
839-
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
840-
tm.assert_series_equal(res, expected)
841-
alt = Series(np.array(vals)) # i.e. we ignore the dtype kwd
842-
tm.assert_series_equal(alt, expected)
846+
msg = "Trying to coerce float values to integer"
847+
with pytest.raises(ValueError, match=msg):
848+
Series(vals, dtype=any_int_numpy_dtype)
849+
with pytest.raises(ValueError, match=msg):
850+
Series(np.array(vals), dtype=any_int_numpy_dtype)
843851

844852
def test_constructor_coerce_float_valid(self, float_numpy_dtype):
845853
s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
@@ -853,9 +861,10 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
853861
vals = [1, 2, np.nan]
854862
# pre-2.0 this would return with a float dtype, in 2.0 we raise
855863

856-
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
857-
with pytest.raises(IntCastingNaNError, match=msg):
864+
msg = "cannot convert float NaN to integer"
865+
with pytest.raises(ValueError, match=msg):
858866
Series(vals, dtype=any_int_numpy_dtype)
867+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
859868
with pytest.raises(IntCastingNaNError, match=msg):
860869
Series(np.array(vals), dtype=any_int_numpy_dtype)
861870

pandas/tests/test_downstream.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,10 @@ def test_construct_dask_float_array_int_dtype_match_ndarray():
9595
expected = Series(arr)
9696
tm.assert_series_equal(res, expected)
9797

98-
res = Series(darr, dtype="i8")
99-
expected = Series(arr, dtype="i8")
100-
tm.assert_series_equal(res, expected)
98+
# GH#49599 in 2.0 we raise instead of silently ignoring the dtype
99+
msg = "Trying to coerce float values to integers"
100+
with pytest.raises(ValueError, match=msg):
101+
Series(darr, dtype="i8")
101102

102103
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
103104
arr[2] = np.nan

0 commit comments

Comments
 (0)