Skip to content

Commit 4d1916c

Browse files
committed
Re-instate raising TypeError when trying to get string data of non-string object
1 parent c90635b commit 4d1916c

File tree

4 files changed

+35
-21
lines changed

4 files changed

+35
-21
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
99
# VectorData
1010
# ----------------------------------------------------------------------
1111

12-
from pandas._libs.tslibs.util cimport get_string_data
12+
from pandas._libs.tslibs.util cimport get_string_data, get_string_data_checked
1313

1414
{{py:
1515

@@ -597,7 +597,7 @@ cdef class StringHashTable(HashTable):
597597
cdef:
598598
khiter_t k
599599
const char *v
600-
get_string_data(val, &v, NULL)
600+
get_string_data_checked(val, &v, NULL)
601601

602602
k = kh_get_str(self.table, v)
603603
if k != self.table.n_buckets:
@@ -611,7 +611,7 @@ cdef class StringHashTable(HashTable):
611611
int ret = 0
612612
const char *v
613613

614-
get_string_data(val, &v, NULL)
614+
get_string_data_checked(val, &v, NULL)
615615

616616
k = kh_put_str(self.table, v, &ret)
617617
self.table.keys[k] = key
@@ -634,7 +634,7 @@ cdef class StringHashTable(HashTable):
634634
vecs = <const char **>malloc(n * sizeof(char *))
635635
for i in range(n):
636636
val = values[i]
637-
get_string_data(val, &v, NULL)
637+
get_string_data_checked(val, &v, NULL)
638638
vecs[i] = v
639639

640640
with nogil:
@@ -778,7 +778,7 @@ cdef class StringHashTable(HashTable):
778778
labels[i] = na_sentinel
779779
else:
780780
# if ignore_na is False, we also stringify NaN/None/etc.
781-
get_string_data(val, &v, NULL)
781+
get_string_data_checked(val, &v, NULL)
782782
vecs[i] = v
783783

784784
# compute

pandas/_libs/tslibs/np_datetime.pyx

+2-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ from cpython.datetime cimport (datetime, date,
1212
PyDateTime_IMPORT
1313

1414
from numpy cimport int64_t
15-
from pandas._libs.tslibs.util cimport get_string_data
15+
from pandas._libs.tslibs.util cimport get_string_data_checked
1616

1717
cdef extern from "src/datetime/np_datetime.h":
1818
int cmp_npy_datetimestruct(npy_datetimestruct *a,
@@ -177,8 +177,6 @@ cdef inline int _string_to_dts(object val, npy_datetimestruct* dts,
177177
Py_ssize_t length
178178
const char* tmp
179179

180-
if not get_string_data(val, &tmp, &length):
181-
raise ValueError('Unable to parse %s' % str(val))
180+
get_string_data_checked(val, &tmp, &length)
182181
return parse_iso_8601_datetime(tmp, length,
183182
dts, out_local, out_tzoffset)
184-

pandas/_libs/tslibs/src/datetime/np_datetime_strings.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ int parse_iso_8601_datetime(const char *str, int len,
7272
int year_leap = 0;
7373
int i, numdigits;
7474
const char *substr;
75-
char sublen;
75+
int sublen;
7676

7777
/* If year-month-day are separated by a valid separator,
7878
* months/days without leading zeroes will be parsed
@@ -587,7 +587,8 @@ int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) {
587587
*/
588588
int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen,
589589
NPY_DATETIMEUNIT base) {
590-
char *substr = outstr, sublen = outlen;
590+
char *substr = outstr;
591+
int sublen = outlen;
591592
int tmplen;
592593

593594
/*

pandas/_libs/tslibs/util.pxd

+25-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
from cpython cimport PyTypeObject
2+
from cpython cimport PyTypeObject, PyErr_BadArgument
33

44
cdef extern from *:
55
"""
@@ -24,9 +24,14 @@ cdef extern from "Python.h":
2424
bint PyFloat_Check(object obj) nogil
2525
bint PyComplex_Check(object obj) nogil
2626
bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil
27+
28+
# Note that following functions can potentially raise an exception,
29+
# thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
30+
# potentially allocate memory inside in unlikely case of when underlying
31+
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
2732
bint PyBytes_AsStringAndSize(object obj, char** buf,
28-
Py_ssize_t* length) nogil
29-
char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length) nogil
33+
Py_ssize_t* length)
34+
char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length)
3035

3136
from numpy cimport int64_t
3237

@@ -237,14 +242,15 @@ cdef inline bint get_string_data(object s, const char **buf,
237242
Py_ssize_t *length):
238243
"""
239244
Extract internal char * buffer of unicode or bytes object `s` to `buf` with
240-
getting length of this internal buffer, that save in `length`.
241-
Return `False` if it failed to extract such buffer for whatever reason
242-
otherwise return `True`
245+
getting length of this internal buffer saved in `length`.
246+
Returns `False` if it failed to extract such buffer for whatever reason,
247+
otherwise returns `True`.
243248
244-
Note
245-
----
246-
python object owns memory, `buf` should not be freed
247-
`length` can be NULL
249+
Notes
250+
-----
251+
Python object owns memory, `buf` should not be freed.
252+
`length` can be NULL if getting buffer length is not needed.
253+
This function should only raise exceptions in out-of-memory cases.
248254
249255
Parameters
250256
----------
@@ -262,3 +268,12 @@ cdef inline bint get_string_data(object s, const char **buf,
262268
if PyBytes_Check(s):
263269
return PyBytes_AsStringAndSize(s, <char**>buf, length) == 0
264270
return False
271+
272+
cdef inline void get_string_data_checked(object s, const char **buf,
273+
Py_ssize_t *length):
274+
"""
275+
This is a wrapper for get_string_data() that raises TypeError
276+
when supplied with neither unicode nor bytes object
277+
"""
278+
if not get_string_data(s, buf, length):
279+
PyErr_BadArgument()

0 commit comments

Comments
 (0)