Skip to content

Commit 527ff39

Browse files
committed
added python2 support
removed support of unicode strings in python2 added Cython cast from py_string -> const char*
1 parent 5f38e28 commit 527ff39

File tree

4 files changed

+32
-15
lines changed

4 files changed

+32
-15
lines changed

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ from pandas._libs.tslibs.nattype import NaT
5959
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
6060
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
6161
from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare
62-
62+
from pandas.compat import string_types
6363
from pandas._libs.missing cimport (
6464
checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period
6565
)

pandas/_libs/src/parser/tokenizer.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,6 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
16921692
int max_digits = 17;
16931693
int n;
16941694

1695-
if (maybe_int != NULL) *maybe_int = 1;
16961695
// Cache powers of 10 in memory.
16971696
static double e[] = {
16981697
1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
@@ -1727,6 +1726,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
17271726
1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299,
17281727
1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308};
17291728

1729+
if (maybe_int != NULL) *maybe_int = 1;
17301730
// Skip leading whitespace.
17311731
while (isspace_ascii(*p)) p++;
17321732

pandas/_libs/tslibs/util.pxd

+30-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
from cpython cimport PyTypeObject
3+
from cpython.version cimport PY_MAJOR_VERSION
34

45
cdef extern from *:
56
"""
@@ -14,6 +15,29 @@ cdef extern from *:
1415
object char_to_string(const char* data)
1516

1617

18+
cdef extern from *:
19+
"""
20+
#if PY_VERSION_HEX >= 0x03000000
21+
#define PyBytes_AsStringAndSize(py_string, buffer, length) \
22+
PyBytes_AsStringAndSize(py_string, buffer, length)
23+
24+
#define PyUnicode_AsUTF8AndSize(py_string, buffer, length) \
25+
buffer = PyUnicode_AsUTF8AndSize(py_string, length)
26+
27+
#else
28+
#define PyBytes_AsStringAndSize(py_string, buffer, length) \
29+
PyString_AsStringAndSize(py_string, buffer, length)
30+
31+
#define PyUnicode_AsUTF8AndSize(py_string, buffer, length) \
32+
do { \
33+
buffer = PyUnicode_AS_DATA(py_string); \
34+
*length = PyUnicode_GET_SIZE(py_string); \
35+
} while(0)
36+
#endif
37+
"""
38+
void PyUnicode_AsUTF8AndSize(object py_string, const char* buffer, Py_ssize_t* length)
39+
void PyBytes_AsStringAndSize(object py_string, char** buffer, Py_ssize_t* length)
40+
1741
cdef extern from "Python.h":
1842
# Note: importing extern-style allows us to declare these as nogil
1943
# functions, whereas `from cpython cimport` does not.
@@ -24,15 +48,6 @@ cdef extern from "Python.h":
2448
bint PyComplex_Check(object obj) nogil
2549
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
2650

27-
# Note that following functions can potentially raise an exception,
28-
# thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
29-
# potentially allocate memory inside in unlikely case of when underlying
30-
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
31-
bint PyBytes_AsStringAndSize(object obj, char** buf,
32-
Py_ssize_t* length) except -1
33-
const char* PyUnicode_AsUTF8AndSize(object obj,
34-
Py_ssize_t* length) except NULL
35-
3651
from numpy cimport int64_t, float64_t
3752

3853
cdef extern from "numpy/arrayobject.h":
@@ -243,7 +258,7 @@ cdef inline bint is_nan(object val):
243258

244259

245260
cdef inline const char* get_c_string_buf_and_size(object py_string,
246-
Py_ssize_t *length):
261+
Py_ssize_t *length) except NULL:
247262
"""
248263
Extract internal char* buffer of unicode or bytes object `py_string` with
249264
getting length of this internal buffer saved in `length`.
@@ -263,10 +278,13 @@ cdef inline const char* get_c_string_buf_and_size(object py_string,
263278
buf : const char*
264279
"""
265280
cdef:
266-
const char *buf
281+
const char *buf = NULL
267282

268283
if PyUnicode_Check(py_string):
269-
buf = PyUnicode_AsUTF8AndSize(py_string, length)
284+
if PY_MAJOR_VERSION > 2:
285+
PyUnicode_AsUTF8AndSize(py_string, buf, length)
286+
else:
287+
buf = py_string
270288
else:
271289
PyBytes_AsStringAndSize(py_string, <char**>&buf, length)
272290
return buf

pandas/tests/io/parser/test_parse_dates.py

-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ def test_multiple_date_cols_int_cast(all_parsers):
184184
"KORD,19990127, 23:00:00, 22:56:00, -0.5900")
185185
parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
186186
parser = all_parsers
187-
188187
result = parser.read_csv(StringIO(data), header=None,
189188
date_parser=conv.parse_date_time,
190189
parse_dates=parse_dates, prefix="X")

0 commit comments

Comments
 (0)