Skip to content

Commit 2ee36b2

Browse files
committed
Cython version of _concat_date_cols works for all cases
1 parent dc669dd commit 2ee36b2

File tree

1 file changed

+50
-19
lines changed

1 file changed

+50
-19
lines changed

pandas/_libs/lib.pyx

+50-19
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PyDateTime_IMPORT
2020

2121
import numpy as np
2222
cimport numpy as cnp
23-
from numpy cimport (ndarray, PyArray_GETITEM,
23+
from numpy cimport (ndarray, PyArray_GETITEM, PyArray_Check,
2424
PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew,
2525
flatiter, NPY_OBJECT,
2626
int64_t, float32_t, float64_t,
@@ -2380,53 +2380,84 @@ cdef inline void convert_and_set_item(object item, Py_ssize_t index,
23802380
result[index] = item
23812381

23822382

2383-
cdef inline void put_object_as_unicode(object[:] lst, Py_ssize_t idx,
2383+
@cython.wraparound(False)
2384+
@cython.boundscheck(False)
2385+
cdef inline void put_object_as_unicode(list lst, Py_ssize_t idx,
23842386
object item):
23852387
if not isinstance(item, str):
23862388
item = PyObject_Str(item)
23872389
lst[idx] = item
23882390

23892391

2392+
@cython.wraparound(False)
2393+
@cython.boundscheck(False)
23902394
cpdef object _concat_date_cols(object date_cols,
23912395
object keep_trivial_numbers=False):
23922396
cdef:
23932397
bint keep_numbers
23942398
Py_ssize_t sequence_size, i, j
23952399
Py_ssize_t array_size, min_size
2396-
object result
2397-
object separator = " "
2398-
object list_to_join, result_string
2399-
object[:] list_view
24002400
object[:] result_view
2401-
object[:] iterator
2402-
object[::] arrays
2401+
object[:,:] arrays_view
2402+
2403+
object[:] obj_iter
2404+
int64_t[:] int_iter
2405+
float64_t[:] double_iter
24032406

24042407
keep_numbers = keep_trivial_numbers
24052408
sequence_size = len(date_cols)
24062409

24072410
if sequence_size == 0:
24082411
result = np.zeros(0, dtype=object)
24092412
elif sequence_size == 1:
2410-
iterator = date_cols[0]
2411-
array_size = len(iterator)
2413+
array = date_cols[0]
2414+
array_size = len(array)
24122415
result = np.zeros(array_size, dtype=object)
24132416
result_view = result
2414-
for i in range(array_size):
2415-
convert_and_set_item(iterator[i], i, result_view, keep_numbers)
2417+
if PyArray_Check(array):
2418+
if array.dtype == np.int64:
2419+
int_iter = array
2420+
for i in range(array_size):
2421+
convert_and_set_item(int_iter[i], i,
2422+
result_view, keep_numbers)
2423+
elif array.dtype == np.float64:
2424+
double_iter = array
2425+
for i in range(array_size):
2426+
convert_and_set_item(double_iter[i], i,
2427+
result_view, keep_numbers)
2428+
else:
2429+
if array.dtype == object:
2430+
obj_iter = array
2431+
else:
2432+
obj_array = np.astype(object)
2433+
obj_iter = obj_array
2434+
for i in range(array_size):
2435+
convert_and_set_item(obj_iter[i], i, result_view, keep_numbers)
2436+
else:
2437+
for i, item in enumerate(array):
2438+
convert_and_set_item(item, i, result_view, keep_numbers)
24162439
else:
2417-
arrays = date_cols
2418-
24192440
min_size = min([len(arr) for arr in date_cols])
2441+
2442+
arrays = np.zeros((len(date_cols), min_size), dtype=object)
2443+
for idx, array in enumerate(date_cols):
2444+
if PyArray_Check(array):
2445+
if array.dtype == object:
2446+
arrays[idx] = array
2447+
else:
2448+
arrays[idx] = array.astype(object)
2449+
else:
2450+
arrays[idx] = np.array(array, dtype=object)
2451+
arrays_view = arrays
2452+
24202453
result = np.zeros(min_size, dtype=object)
24212454
result_view = result
24222455

2423-
list_to_join = PyList_New(sequence_size)
2424-
list_view = list_to_join
2456+
list_to_join = [None] * sequence_size
24252457

24262458
for i in range(min_size):
24272459
for j in range(sequence_size):
2428-
put_object_as_unicode(list_view, j, arrays[j][i])
2429-
result_string = PyUnicode_Join(separator, list_to_join)
2430-
result_view[i] = result_string
2460+
put_object_as_unicode(list_to_join, j, arrays_view[j, i])
2461+
result_view[i] = PyUnicode_Join(' ', list_to_join)
24312462

24322463
return result

0 commit comments

Comments
 (0)