Skip to content

Commit 0b6902b

Browse files
committed
Rework get_string_data to cleaner get_c_string_buf_and_size
1 parent 13c8e95 commit 0b6902b

File tree

3 files changed

+23
-36
lines changed

3 files changed

+23
-36
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+9-9
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
99
# VectorData
1010
# ----------------------------------------------------------------------
1111

12-
from pandas._libs.tslibs.util cimport get_string_data, get_string_data_checked
12+
from pandas._libs.tslibs.util cimport get_c_string
1313

1414
{{py:
1515

@@ -597,7 +597,7 @@ cdef class StringHashTable(HashTable):
597597
cdef:
598598
khiter_t k
599599
const char *v
600-
v = get_string_data_checked(val, NULL)
600+
v = get_c_string(val)
601601

602602
k = kh_get_str(self.table, v)
603603
if k != self.table.n_buckets:
@@ -611,7 +611,7 @@ cdef class StringHashTable(HashTable):
611611
int ret = 0
612612
const char *v
613613

614-
v = get_string_data_checked(val, NULL)
614+
v = get_c_string(val)
615615

616616
k = kh_put_str(self.table, v, &ret)
617617
self.table.keys[k] = key
@@ -634,7 +634,7 @@ cdef class StringHashTable(HashTable):
634634
vecs = <const char **>malloc(n * sizeof(char *))
635635
for i in range(n):
636636
val = values[i]
637-
v = get_string_data_checked(val, NULL)
637+
v = get_c_string(val)
638638
vecs[i] = v
639639

640640
with nogil:
@@ -664,9 +664,9 @@ cdef class StringHashTable(HashTable):
664664
val = values[i]
665665

666666
if isinstance(val, (str, unicode)):
667-
v = get_string_data(val, NULL)
667+
v = get_c_string(val)
668668
else:
669-
v = get_string_data(self.na_string_sentinel, NULL)
669+
v = get_c_string(self.na_string_sentinel)
670670
vecs[i] = v
671671

672672
with nogil:
@@ -697,9 +697,9 @@ cdef class StringHashTable(HashTable):
697697
val = values[i]
698698

699699
if isinstance(val, (str, unicode)):
700-
v = get_string_data(val, NULL)
700+
v = get_c_string(val)
701701
else:
702-
v = get_string_data(self.na_string_sentinel, NULL)
702+
v = get_c_string(self.na_string_sentinel)
703703
vecs[i] = v
704704

705705
with nogil:
@@ -778,7 +778,7 @@ cdef class StringHashTable(HashTable):
778778
labels[i] = na_sentinel
779779
else:
780780
# if ignore_na is False, we also stringify NaN/None/etc.
781-
v = get_string_data_checked(val, NULL)
781+
v = get_c_string(val)
782782
vecs[i] = v
783783

784784
# compute

pandas/_libs/tslibs/np_datetime.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ from cpython.datetime cimport (datetime, date,
1212
PyDateTime_IMPORT
1313

1414
from numpy cimport int64_t
15-
from pandas._libs.tslibs.util cimport get_string_data_checked
15+
from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
1616

1717
cdef extern from "src/datetime/np_datetime.h":
1818
int cmp_npy_datetimestruct(npy_datetimestruct *a,
@@ -175,8 +175,8 @@ cdef inline int _string_to_dts(object val, npy_datetimestruct* dts,
175175
int* out_local, int* out_tzoffset) except? -1:
176176
cdef:
177177
Py_ssize_t length
178-
const char* tmp
178+
const char* buf
179179

180-
tmp = get_string_data_checked(val, &length)
181-
return parse_iso_8601_datetime(tmp, length,
180+
buf = get_c_string_buf_and_size(val, &length)
181+
return parse_iso_8601_datetime(buf, length,
182182
dts, out_local, out_tzoffset)

pandas/_libs/tslibs/util.pxd

+10-23
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ cdef extern from "Python.h":
1818
# Note: importing extern-style allows us to declare these as nogil
1919
# functions, whereas `from cpython cimport` does not.
2020
bint PyUnicode_Check(object obj) nogil
21-
bint PyBytes_Check(object obj) nogil
2221
bint PyString_Check(object obj) nogil
2322
bint PyBool_Check(object obj) nogil
2423
bint PyFloat_Check(object obj) nogil
@@ -30,8 +29,8 @@ cdef extern from "Python.h":
3029
# potentially allocate memory inside in unlikely case of when underlying
3130
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
3231
bint PyBytes_AsStringAndSize(object obj, char** buf,
33-
Py_ssize_t* length)
34-
char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length)
32+
Py_ssize_t* length) except -1
33+
char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length) except NULL
3534

3635
from numpy cimport int64_t
3736

@@ -238,18 +237,16 @@ cdef inline bint is_nan(object val):
238237
return (is_float_object(val) or is_complex_object(val)) and val != val
239238

240239

241-
cdef inline const char* get_string_data(object s, Py_ssize_t *length):
240+
cdef inline const char* get_c_string_buf_and_size(object s,
241+
Py_ssize_t *length):
242242
"""
243-
Extract internal char * buffer of unicode or bytes object `s` to `buf` with
243+
Extract internal char * buffer of unicode or bytes object `s` with
244244
getting length of this internal buffer saved in `length`.
245-
Returns `False` if it failed to extract such buffer for whatever reason,
246-
otherwise returns `True`.
247245
248246
Notes
249247
-----
250-
Python object owns memory, `buf` should not be freed.
248+
Python object owns memory, thus returned char* must not be freed.
251249
`length` can be NULL if getting buffer length is not needed.
252-
This function should only raise exceptions in out-of-memory cases.
253250
254251
Parameters
255252
----------
@@ -265,20 +262,10 @@ cdef inline const char* get_string_data(object s, Py_ssize_t *length):
265262

266263
if PyUnicode_Check(s):
267264
buf = PyUnicode_AsUTF8AndSize(s, length)
268-
if PyBytes_Check(s):
269-
if PyBytes_AsStringAndSize(s, <char**>&buf, length) != 0:
270-
return NULL
265+
else:
266+
PyBytes_AsStringAndSize(s, <char**>&buf, length)
271267
return buf
272268

273269

274-
cdef inline const char* get_string_data_checked(object s, Py_ssize_t *length):
275-
"""
276-
This is a wrapper for get_string_data() that raises TypeError
277-
when supplied with neither unicode nor bytes object
278-
"""
279-
cdef:
280-
const char *buf = get_string_data(s, length)
281-
282-
if not buf:
283-
PyErr_BadArgument()
284-
return buf
270+
cdef inline const char* get_c_string(object s):
271+
return get_c_string_buf_and_size(s, NULL)

0 commit comments

Comments
 (0)