Skip to content

Commit 6df4a3f

Browse files
dsaxtonKevin D Smith
authored and
Kevin D Smith
committed
BUG: Fix astype from float32 to string (pandas-dev#36464)
1 parent 5fd1983 commit 6df4a3f

File tree

5 files changed

+22
-3
lines changed

5 files changed

+22
-3
lines changed

doc/source/whatsnew/v1.1.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Bug fixes
4747
- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`)
4848
- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`)
4949
- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`)
50+
- Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`)
5051
- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`)
5152

5253
.. ---------------------------------------------------------------------------

pandas/_libs/lib.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -659,11 +659,12 @@ cpdef ndarray[object] ensure_string_array(
659659
Py_ssize_t i = 0, n = len(arr)
660660

661661
result = np.asarray(arr, dtype="object")
662+
662663
if copy and result is arr:
663664
result = result.copy()
664665

665666
for i in range(n):
666-
val = result[i]
667+
val = arr[i]
667668

668669
if isinstance(val, str):
669670
continue

pandas/core/arrays/string_.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
198198
if dtype:
199199
assert dtype == "string"
200200

201-
result = np.asarray(scalars, dtype="object")
202201
# convert non-na-likes to str, and nan-likes to StringDtype.na_value
203202
result = lib.ensure_string_array(
204-
result, na_value=StringDtype.na_value, copy=copy
203+
scalars, na_value=StringDtype.na_value, copy=copy
205204
)
206205

207206
# Manually creating new array avoids the validation step in the __init__, so is

pandas/tests/arrays/string_/test_string.py

+9
Original file line numberDiff line numberDiff line change
@@ -336,3 +336,12 @@ def test_memory_usage():
336336
series = pd.Series(["a", "b", "c"], dtype="string")
337337

338338
assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True)
339+
340+
341+
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
342+
def test_astype_from_float_dtype(dtype):
343+
# https://github.com/pandas-dev/pandas/issues/36451
344+
s = pd.Series([0.1], dtype=dtype)
345+
result = s.astype("string")
346+
expected = pd.Series(["0.1"], dtype="string")
347+
tm.assert_series_equal(result, expected)

pandas/tests/series/methods/test_astype.py

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import pytest
23

34
from pandas import Interval, Series, Timestamp, date_range
@@ -46,3 +47,11 @@ def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
4647
msg = "(Cannot cast)|(could not convert)"
4748
with pytest.raises((ValueError, TypeError), match=msg):
4849
values.astype(float, errors=errors)
50+
51+
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
52+
def test_astype_from_float_to_str(self, dtype):
53+
# https://github.com/pandas-dev/pandas/issues/36451
54+
s = Series([0.1], dtype=dtype)
55+
result = s.astype(str)
56+
expected = Series(["0.1"])
57+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)