Skip to content

Commit 36569ac

Browse files
committed
python2 support:
StringIO import from pandas.compat; removed PY_VERSION_HEX; removed cdef extern with PyBytes_AsStringAndSize, ... removed debugging stuff changed date_strategy's min_value for python2 using 'string_types' instead of 'str' in lib.pyx
1 parent f6e9be7 commit 36569ac

File tree

4 files changed

+32
-19
lines changed

4 files changed

+32
-19
lines changed

pandas/_libs/lib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ from pandas._libs.tslibs.nattype import NaT
5757
from pandas._libs.tslibs.conversion cimport convert_to_tsobject
5858
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
5959
from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare
60-
60+
from pandas.compat import string_types
6161
from pandas._libs.missing cimport (
6262
checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period
6363
)
@@ -2378,7 +2378,7 @@ cdef inline object convert_to_unicode(object item,
23782378
if float_item == 0.0 or float_item != float_item:
23792379
do_convert = 0
23802380

2381-
if do_convert and not isinstance(item, str):
2381+
if do_convert and not isinstance(item, string_types):
23822382
item = PyObject_Str(item)
23832383

23842384
return item

pandas/_libs/src/parser/tokenizer.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,6 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
16921692
int max_digits = 17;
16931693
int n;
16941694

1695-
if (maybe_int != NULL) *maybe_int = 1;
16961695
// Cache powers of 10 in memory.
16971696
static double e[] = {
16981697
1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
@@ -1727,6 +1726,7 @@ double precise_xstrtod(const char *str, char **endptr, char decimal,
17271726
1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299,
17281727
1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308};
17291728

1729+
if (maybe_int != NULL) *maybe_int = 1;
17301730
// Skip leading whitespace.
17311731
while (isspace_ascii(*p)) p++;
17321732

pandas/_libs/tslibs/util.pxd

+26-12
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,29 @@ cdef extern from *:
1414
object char_to_string(const char* data)
1515

1616

17+
cdef extern from *:
18+
"""
19+
#if PY_VERSION_HEX >= 0x03000000
20+
#define PyBytes_AsStringAndSize(py_string, buffer, length) \
21+
PyBytes_AsStringAndSize(py_string, buffer, length)
22+
23+
#define PyUnicode_AsUTF8AndSize(py_string, buffer, length) \
24+
buffer = PyUnicode_AsUTF8AndSize(py_string, length)
25+
26+
#else
27+
#define PyBytes_AsStringAndSize(py_string, buffer, length) \
28+
PyString_AsStringAndSize(py_string, buffer, length)
29+
30+
#define PyUnicode_AsUTF8AndSize(py_string, buffer, length) \
31+
do { \
32+
buffer = PyUnicode_AS_DATA(py_string); \
33+
*length = PyUnicode_GET_SIZE(py_string); \
34+
} while(0)
35+
#endif
36+
"""
37+
void PyUnicode_AsUTF8AndSize(object py_string, const char* buffer, Py_ssize_t* length)
38+
void PyBytes_AsStringAndSize(object py_string, char** buffer, Py_ssize_t* length)
39+
1740
cdef extern from "Python.h":
1841
# Note: importing extern-style allows us to declare these as nogil
1942
# functions, whereas `from cpython cimport` does not.
@@ -24,15 +47,6 @@ cdef extern from "Python.h":
2447
bint PyComplex_Check(object obj) nogil
2548
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
2649

27-
# Note that following functions can potentially raise an exception,
28-
# thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
29-
# potentially allocate memory inside in unlikely case of when underlying
30-
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
31-
bint PyBytes_AsStringAndSize(object obj, char** buf,
32-
Py_ssize_t* length) except -1
33-
const char* PyUnicode_AsUTF8AndSize(object obj,
34-
Py_ssize_t* length) except NULL
35-
3650
from numpy cimport int64_t, float64_t
3751

3852
cdef extern from "numpy/arrayobject.h":
@@ -243,7 +257,7 @@ cdef inline bint is_nan(object val):
243257

244258

245259
cdef inline const char* get_c_string_buf_and_size(object py_string,
246-
Py_ssize_t *length):
260+
Py_ssize_t *length) except NULL:
247261
"""
248262
Extract internal char* buffer of unicode or bytes object `py_string` with
249263
getting length of this internal buffer saved in `length`.
@@ -263,10 +277,10 @@ cdef inline const char* get_c_string_buf_and_size(object py_string,
263277
buf : const char*
264278
"""
265279
cdef:
266-
const char *buf
280+
const char *buf = NULL
267281

268282
if PyUnicode_Check(py_string):
269-
buf = PyUnicode_AsUTF8AndSize(py_string, length)
283+
PyUnicode_AsUTF8AndSize(py_string, buf, length)
270284
else:
271285
PyBytes_AsStringAndSize(py_string, <char**>&buf, length)
272286
return buf

pandas/tests/io/parser/test_parse_dates.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
"""
77

88
from datetime import date, datetime
9-
from io import StringIO
109

1110
from dateutil.parser import parse as du_parse
1211
from hypothesis import given, settings, strategies as st
@@ -17,7 +16,8 @@
1716
from pandas._libs.tslib import Timestamp
1817
from pandas._libs.tslibs import parsing
1918
from pandas._libs.tslibs.parsing import parse_datetime_string
20-
from pandas.compat import is_platform_windows, lrange, parse_date
19+
from pandas.compat import (is_platform_windows, lrange,
20+
parse_date, StringIO, PY2)
2121
from pandas.compat.numpy import np_array_datetime64_compat
2222

2323
import pandas as pd
@@ -32,7 +32,7 @@
3232
_DEFAULT_DATETIME = datetime(1, 1, 1)
3333

3434
# Strategy for hypothesis
35-
if is_platform_windows():
35+
if is_platform_windows() or PY2:
3636
date_strategy = st.datetimes(min_value=datetime(1900, 1, 1))
3737
else:
3838
date_strategy = st.datetimes()
@@ -197,7 +197,6 @@ def test_multiple_date_cols_int_cast(all_parsers):
197197
"KORD,19990127, 23:00:00, 22:56:00, -0.5900")
198198
parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
199199
parser = all_parsers
200-
201200
result = parser.read_csv(StringIO(data), header=None,
202201
date_parser=conv.parse_date_time,
203202
parse_dates=parse_dates, prefix="X")

0 commit comments

Comments
 (0)