diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 7d658215d7b76..72937141c2870 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -47,6 +47,7 @@ Bug fixes - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) - Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`) - Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) +- Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`) - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a57cf3b523985..61a9634b00211 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -659,11 +659,12 @@ cpdef ndarray[object] ensure_string_array( Py_ssize_t i = 0, n = len(arr) result = np.asarray(arr, dtype="object") + if copy and result is arr: result = result.copy() for i in range(n): - val = result[i] + val = arr[i] if isinstance(val, str): continue diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index cef35f2b1137c..cb1144c18e49c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -198,10 +198,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: assert dtype == "string" - result = np.asarray(scalars, dtype="object") # convert non-na-likes to str, and nan-likes to StringDtype.na_value result = lib.ensure_string_array( - result, na_value=StringDtype.na_value, copy=copy + scalars, na_value=StringDtype.na_value, copy=copy ) # Manually creating new array avoids the validation step in the __init__, so is diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index efd5d29ae0717..56a8e21edd004 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -336,3 +336,12 @@ def test_memory_usage(): series = pd.Series(["a", "b", "c"], dtype="string") assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) +def test_astype_from_float_dtype(dtype): + # https://github.com/pandas-dev/pandas/issues/36451 + s = pd.Series([0.1], dtype=dtype) + result = s.astype("string") + expected = pd.Series(["0.1"], dtype="string") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index b9d90a9fc63dd..7449d8d65ef96 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from pandas import Interval, Series, Timestamp, date_range @@ -46,3 +47,11 @@ def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): values.astype(float, errors=errors) + + @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) + def test_astype_from_float_to_str(self, dtype): + # https://github.com/pandas-dev/pandas/issues/36451 + s = Series([0.1], dtype=dtype) + result = s.astype(str) + expected = Series(["0.1"]) + tm.assert_series_equal(result, expected)