diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e1fe2f7fe77e2..ebc492a493452 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -165,6 +165,7 @@ I/O - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) - Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) +- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) Plotting ^^^^^^^^ diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index e5d78dae9c023..e76c6874b8b1b 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -120,10 +120,7 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: for i in range(length): val = arr[i] - if isinstance(val, str): - l = PyUnicode_GET_SIZE(val) - elif isinstance(val, bytes): - l = PyBytes_GET_SIZE(val) + l = word_len(val) if l > m: m = l @@ -131,6 +128,18 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: return m +cpdef inline Py_ssize_t word_len(object val): + """ return the maximum length of a string or bytes value """ + cdef: + Py_ssize_t l = 0 + + if isinstance(val, str): + l = PyUnicode_GET_SIZE(val) + elif isinstance(val, bytes): + l = PyBytes_GET_SIZE(val) + + return l + # ------------------------------------------------------------------ # PyTables Helpers diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2a44177d445df..29d225443d18a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -7,7 +7,7 @@ import numpy as np -from pandas._libs import NaT, Timestamp, lib, tslib +from pandas._libs import NaT, Timestamp, lib, tslib, writers import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion from pandas._libs.tslibs.timezones import tz_compare @@ -706,7 +706,8 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): mask = isna(values) if not self.is_object and not quoting: - values = values.astype(str) + itemsize = writers.word_len(na_rep) + values = values.astype("