diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index f64f592e109a1..9112cd124f3c9 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1489,7 +1489,7 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Improved performance of ``pd.wide_to_long()`` (:issue:`14779`) -- Improved performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`) +- Improved performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`, :issue:`16057`) - Improved performance of timeseries plotting with an irregular DatetimeIndex (or with ``compat_x=True``) (:issue:`15073`). - Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`) diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 5f4db5b2f55d3..a68cb70df21bb 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -87,16 +87,11 @@ PANDAS_INLINE PyObject* get_value_1d(PyArrayObject* ap, Py_ssize_t i) { return PyArray_Scalar(item, PyArray_DESCR(ap), (PyObject*)ap); } +// returns ASCII or UTF8 (py3) view on python str +// python object owns memory, should not be freed PANDAS_INLINE char* get_c_string(PyObject* obj) { #if PY_VERSION_HEX >= 0x03000000 - PyObject* enc_str = PyUnicode_AsEncodedString(obj, "utf-8", "error"); - - char* ret; - ret = PyBytes_AS_STRING(enc_str); - - // TODO(general): memory leak here - - return ret; + return PyUnicode_AsUTF8(obj); #else return PyString_AsString(obj); #endif diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd index be8d0d4aa6302..076bc1cd56003 100644 --- a/pandas/_libs/src/util.pxd +++ b/pandas/_libs/src/util.pxd @@ -17,7 +17,7 @@ cdef extern from "numpy_helper.h": inline cnp.int64_t get_nat() inline object get_value_1d(ndarray, Py_ssize_t) inline int floatify(object, double*) except -1 - inline char *get_c_string(object) + inline char *get_c_string(object) except NULL inline object char_to_string(char*) inline void transfer_object_column(char *dst, char *src, size_t stride, size_t length)