Skip to content

Commit 4d5ff7e

Browse files
BUG: Fix astype from float32 to string (#36464) (#36519)
Co-authored-by: Daniel Saxton <[email protected]>
1 parent d05a9ca commit 4d5ff7e

File tree

5 files changed

+22
-4
lines changed

5 files changed

+22
-4
lines changed

doc/source/whatsnew/v1.1.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Bug fixes
4747
- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`)
4848
- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`)
4949
- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`)
50+
- Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`)
5051
- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`)
5152

5253
.. ---------------------------------------------------------------------------

pandas/_libs/lib.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -650,11 +650,12 @@ cpdef ndarray[object] ensure_string_array(
650650
Py_ssize_t i = 0, n = len(arr)
651651

652652
result = np.asarray(arr, dtype="object")
653+
653654
if copy and result is arr:
654655
result = result.copy()
655656

656657
for i in range(n):
657-
val = result[i]
658+
val = arr[i]
658659
if not checknull(val):
659660
result[i] = str(val)
660661
else:

pandas/core/arrays/string_.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
199199
if dtype:
200200
assert dtype == "string"
201201

202-
result = np.asarray(scalars, dtype="object")
203-
204202
# convert non-na-likes to str, and nan-likes to StringDtype.na_value
205203
result = lib.ensure_string_array(
206-
result, na_value=StringDtype.na_value, copy=copy
204+
scalars, na_value=StringDtype.na_value, copy=copy
207205
)
208206

209207
return cls(result)

pandas/tests/arrays/string_/test_string.py

+9
Original file line numberDiff line numberDiff line change
@@ -336,3 +336,12 @@ def test_memory_usage():
336336
series = pd.Series(["a", "b", "c"], dtype="string")
337337

338338
assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True)
339+
340+
341+
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
342+
def test_astype_from_float_dtype(dtype):
343+
# https://github.com/pandas-dev/pandas/issues/36451
344+
s = pd.Series([0.1], dtype=dtype)
345+
result = s.astype("string")
346+
expected = pd.Series(["0.1"], dtype="string")
347+
tm.assert_series_equal(result, expected)

pandas/tests/series/methods/test_astype.py

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import pytest
23

34
from pandas import Interval, Series, Timestamp, date_range
@@ -46,3 +47,11 @@ def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
4647
msg = "(Cannot cast)|(could not convert)"
4748
with pytest.raises((ValueError, TypeError), match=msg):
4849
values.astype(float, errors=errors)
50+
51+
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
52+
def test_astype_from_float_to_str(self, dtype):
53+
# https://github.com/pandas-dev/pandas/issues/36451
54+
s = Series([0.1], dtype=dtype)
55+
result = s.astype(str)
56+
expected = Series(["0.1"])
57+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)