Skip to content

Commit f983159

Browse files
5j9noatamir
authored andcommitted
BUG: make Series handle dtype='int64' for string array (pandas-dev#48333)
* make sure conversion is not lossy * add whatsnew and ref issue in test * raise earlier Note the comment at the last line: # No known cases that get here * mv whatsnew entry to 1.6.0.rst
1 parent a2ee3ee commit f983159

File tree

3 files changed

+19
-1
lines changed

3 files changed

+19
-1
lines changed

doc/source/whatsnew/v1.6.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ Numeric
143143

144144
Conversion
145145
^^^^^^^^^^
146-
-
146+
- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`)
147147
-
148148

149149
Strings

pandas/core/dtypes/cast.py

+5
Original file line numberDiff line numberDiff line change
@@ -1851,6 +1851,11 @@ def maybe_cast_to_integer_array(
18511851
# doesn't handle `uint64` correctly.
18521852
arr = np.asarray(arr)
18531853

1854+
if np.issubdtype(arr.dtype, str):
1855+
if (casted.astype(str) == arr).all():
1856+
return casted
1857+
raise ValueError(f"string values cannot be losslessly cast to {dtype}")
1858+
18541859
if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
18551860
raise OverflowError("Trying to coerce negative values to unsigned integers")
18561861

pandas/tests/series/test_constructors.py

+13
Original file line numberDiff line numberDiff line change
@@ -1881,6 +1881,19 @@ def test_constructor_bool_dtype_missing_values(self):
18811881
expected = Series(True, index=[0], dtype="bool")
18821882
tm.assert_series_equal(result, expected)
18831883

1884+
def test_constructor_int64_dtype(self, any_int_dtype):
1885+
# GH#44923
1886+
result = Series(["0", "1", "2"], dtype=any_int_dtype)
1887+
expected = Series([0, 1, 2], dtype=any_int_dtype)
1888+
tm.assert_series_equal(result, expected)
1889+
1890+
def test_constructor_raise_on_lossy_conversion_of_strings(self):
1891+
# GH#44923
1892+
with pytest.raises(
1893+
ValueError, match="string values cannot be losslessly cast to int8"
1894+
):
1895+
Series(["128"], dtype="int8")
1896+
18841897
def test_constructor_dtype_timedelta_alternative_construct(self):
18851898
# GH#35465
18861899
result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]")

0 commit comments

Comments
 (0)