From f9dc507aeb1074566fe26b60377989672da55070 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 29 Sep 2021 21:50:21 +0100 Subject: [PATCH 1/2] PERF: improve perf. of conversion to string arrays --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/_libs/lib.pyx | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index c8fa6d46c3b7e..986c10d824ba0 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -347,6 +347,7 @@ Other Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`) +- Performance improvement when converting non-string arrays to string arrays (:issue:`34483`) - Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`) - Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`) - Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c9548a7e05fc5..5c03720f44548 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -727,14 +727,15 @@ cpdef ndarray[object] ensure_string_array( continue if not checknull(val): - result[i] = str(val) + # f"{val}" is faster than str(val) + result[i] = f"{val}" else: if convert_na_value: val = na_value if skipna: result[i] = val else: - result[i] = str(val) + result[i] = f"{val}" return result From 9e57891bb8dedccaf0a45a959792e3ee95992cbe Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 29 Sep 2021 23:19:32 +0100 Subject: [PATCH 2/2] fix bugs --- pandas/_libs/lib.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5c03720f44548..4b1feab63dd1e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -727,8 +727,12 @@ cpdef ndarray[object] ensure_string_array( continue if not checknull(val): - # f"{val}" is faster than str(val) - result[i] = f"{val}" + if not isinstance(val, np.floating): + # f"{val}" is faster than str(val) + result[i] = f"{val}" + else: + # f"{val}" is not always equivalent to str(val) for floats + result[i] = str(val) else: if convert_na_value: val = na_value