@@ -7,11 +7,19 @@ from io import StringIO
7
7
8
8
from libc.string cimport strchr
9
9
10
+ import cython
11
+
12
+ from cpython cimport PyObject_Str, PyUnicode_Join
13
+
10
14
from cpython.datetime cimport datetime, datetime_new, import_datetime
11
15
from cpython.version cimport PY_VERSION_HEX
12
16
import_datetime()
13
17
14
18
import numpy as np
19
+ cimport numpy as cnp
20
+ from numpy cimport (PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT,
21
+ PyArray_IterNew, flatiter, float64_t)
22
+ cnp.import_array()
15
23
16
24
# dateutil compat
17
25
from dateutil.tz import (tzoffset,
@@ -26,7 +34,7 @@ from pandas._config import get_option
26
34
27
35
from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS
28
36
from pandas._libs.tslibs.nattype import nat_strings, NaT
29
- from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
37
+ from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size
30
38
31
39
cdef extern from " ../src/headers/portable.h" :
32
40
int getdigit_ascii(char c, int default) nogil
@@ -880,3 +888,117 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
880
888
return guessed_format
881
889
else :
882
890
return None
891
+
892
+
893
+ @ cython.wraparound (False )
894
+ @ cython.boundscheck (False )
895
+ cdef inline object convert_to_unicode(object item,
896
+ bint keep_trivial_numbers):
897
+ """
898
+ Convert `item` to str.
899
+
900
+ Parameters
901
+ ----------
902
+ item : object
903
+ keep_trivial_numbers : bool
904
+ if True, then conversion (to string from integer/float zero)
905
+ is not performed
906
+
907
+ Returns
908
+ -------
909
+ str or int or float
910
+ """
911
+ cdef:
912
+ float64_t float_item
913
+
914
+ if keep_trivial_numbers:
915
+ if isinstance (item, int ):
916
+ if < int > item == 0 :
917
+ return item
918
+ elif isinstance (item, float ):
919
+ float_item = item
920
+ if float_item == 0.0 or float_item != float_item:
921
+ return item
922
+
923
+ if not isinstance (item, str ):
924
+ item = PyObject_Str(item)
925
+
926
+ return item
927
+
928
+
929
+ @ cython.wraparound (False )
930
+ @ cython.boundscheck (False )
931
+ def _concat_date_cols (tuple date_cols , bint keep_trivial_numbers = True ):
932
+ """
933
+ Concatenates elements from numpy arrays in `date_cols` into strings.
934
+
935
+ Parameters
936
+ ----------
937
+ date_cols : tuple of numpy arrays
938
+ keep_trivial_numbers : bool, default True
939
+ if True and len(date_cols) == 1, then
940
+ conversion (to string from integer/float zero) is not performed
941
+
942
+ Returns
943
+ -------
944
+ arr_of_rows : ndarray (dtype=object)
945
+
946
+ Examples
947
+ --------
948
+ >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object)
949
+ >>> times=np.array(['11:20', '10:45'], dtype=object)
950
+ >>> result = _concat_date_cols((dates, times))
951
+ >>> result
952
+ array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object)
953
+ """
954
+ cdef:
955
+ Py_ssize_t rows_count = 0 , col_count = len (date_cols)
956
+ Py_ssize_t col_idx, row_idx
957
+ list list_to_join
958
+ cnp.ndarray[object ] iters
959
+ object [::1 ] iters_view
960
+ flatiter it
961
+ cnp.ndarray[object ] result
962
+ object [:] result_view
963
+
964
+ if col_count == 0 :
965
+ return np.zeros(0 , dtype = object )
966
+
967
+ if not all (is_array(array) for array in date_cols):
968
+ raise ValueError (" not all elements from date_cols are numpy arrays" )
969
+
970
+ rows_count = min (len (array) for array in date_cols)
971
+ result = np.zeros(rows_count, dtype = object )
972
+ result_view = result
973
+
974
+ if col_count == 1 :
975
+ array = date_cols[0 ]
976
+ it = < flatiter> PyArray_IterNew(array)
977
+ for row_idx in range (rows_count):
978
+ item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
979
+ result_view[row_idx] = convert_to_unicode(item,
980
+ keep_trivial_numbers)
981
+ PyArray_ITER_NEXT(it)
982
+ else :
983
+ # create fixed size list - more effecient memory allocation
984
+ list_to_join = [None ] * col_count
985
+ iters = np.zeros(col_count, dtype = object )
986
+
987
+ # create memoryview of iters ndarray, that will contain some
988
+ # flatiter's for each array in `date_cols` - more effecient indexing
989
+ iters_view = iters
990
+ for col_idx, array in enumerate (date_cols):
991
+ iters_view[col_idx] = PyArray_IterNew(array)
992
+
993
+ # array elements that are on the same line are converted to one string
994
+ for row_idx in range (rows_count):
995
+ for col_idx, array in enumerate (date_cols):
996
+ # this cast is needed, because we did not find a way
997
+ # to efficiently store `flatiter` type objects in ndarray
998
+ it = < flatiter> iters_view[col_idx]
999
+ item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
1000
+ list_to_join[col_idx] = convert_to_unicode(item, False )
1001
+ PyArray_ITER_NEXT(it)
1002
+ result_view[row_idx] = PyUnicode_Join(' ' , list_to_join)
1003
+
1004
+ return result
0 commit comments