@@ -23,14 +23,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
23
23
PyBytes_Check,
24
24
PyUnicode_Check,
25
25
PyTuple_New,
26
- PyObject_RichCompareBool,
27
- PyBytes_GET_SIZE,
28
- PyUnicode_GET_SIZE)
29
-
30
- try :
31
- from cpython cimport PyString_GET_SIZE
32
- except ImportError :
33
- from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE
26
+ PyObject_RichCompareBool)
34
27
35
28
cimport cpython
36
29
@@ -129,28 +122,6 @@ def item_from_zerodim(object val):
129
122
return util.unbox_if_zerodim(val)
130
123
131
124
132
- @ cython.wraparound (False )
133
- @ cython.boundscheck (False )
134
- def fast_unique (ndarray[object] values ):
135
- cdef:
136
- Py_ssize_t i, n = len (values)
137
- list uniques = []
138
- dict table = {}
139
- object val, stub = 0
140
-
141
- for i from 0 <= i < n:
142
- val = values[i]
143
- if val not in table:
144
- table[val] = stub
145
- uniques.append(val)
146
- try :
147
- uniques.sort()
148
- except Exception :
149
- pass
150
-
151
- return uniques
152
-
153
-
154
125
@ cython.wraparound (False )
155
126
@ cython.boundscheck (False )
156
127
def fast_unique_multiple (list arrays ):
@@ -370,30 +341,6 @@ def has_infs_f8(ndarray[float64_t] arr):
370
341
return False
371
342
372
343
373
- def convert_timestamps (ndarray values ):
374
- cdef:
375
- object val, f, result
376
- dict cache = {}
377
- Py_ssize_t i, n = len (values)
378
- ndarray[object ] out
379
-
380
- # for HDFStore, a bit temporary but...
381
-
382
- from datetime import datetime
383
- f = datetime.fromtimestamp
384
-
385
- out = np.empty(n, dtype = ' O' )
386
-
387
- for i in range (n):
388
- val = util.get_value_1d(values, i)
389
- if val in cache:
390
- out[i] = cache[val]
391
- else :
392
- cache[val] = out[i] = f(val)
393
-
394
- return out
395
-
396
-
397
344
def maybe_indices_to_slice (ndarray[int64_t] indices , int max_len ):
398
345
cdef:
399
346
Py_ssize_t i, n = len (indices)
@@ -733,145 +680,6 @@ def clean_index_list(list obj):
733
680
return np.asarray(obj), 0
734
681
735
682
736
- ctypedef fused pandas_string:
737
- str
738
- unicode
739
- bytes
740
-
741
-
742
- @ cython.boundscheck (False )
743
- @ cython.wraparound (False )
744
- cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr):
745
- """ return the maximum size of elements in a 1-dim string array """
746
- cdef:
747
- Py_ssize_t i, m = 0 , l = 0 , length = arr.shape[0 ]
748
- pandas_string v
749
-
750
- for i in range (length):
751
- v = arr[i]
752
- if PyString_Check(v):
753
- l = PyString_GET_SIZE(v)
754
- elif PyBytes_Check(v):
755
- l = PyBytes_GET_SIZE(v)
756
- elif PyUnicode_Check(v):
757
- l = PyUnicode_GET_SIZE(v)
758
-
759
- if l > m:
760
- m = l
761
-
762
- return m
763
-
764
-
765
- @ cython.boundscheck (False )
766
- @ cython.wraparound (False )
767
- def string_array_replace_from_nan_rep (
768
- ndarray[object , ndim = 1 ] arr, object nan_rep ,
769
- object replace = None ):
770
- """
771
- Replace the values in the array with 'replacement' if
772
- they are 'nan_rep'. Return the same array.
773
- """
774
-
775
- cdef int length = arr.shape[0 ], i = 0
776
- if replace is None :
777
- replace = np.nan
778
-
779
- for i from 0 <= i < length:
780
- if arr[i] == nan_rep:
781
- arr[i] = replace
782
-
783
- return arr
784
-
785
-
786
- @ cython.boundscheck (False )
787
- @ cython.wraparound (False )
788
- def convert_json_to_lines (object arr ):
789
- """
790
- replace comma separated json with line feeds, paying special attention
791
- to quotes & brackets
792
- """
793
- cdef:
794
- Py_ssize_t i = 0 , num_open_brackets_seen = 0 , length
795
- bint in_quotes = 0 , is_escaping = 0
796
- ndarray[uint8_t] narr
797
- unsigned char v, comma, left_bracket, right_brack, newline
798
-
799
- newline = ord (' \n ' )
800
- comma = ord (' ,' )
801
- left_bracket = ord (' {' )
802
- right_bracket = ord (' }' )
803
- quote = ord (' "' )
804
- backslash = ord (' \\ ' )
805
-
806
- narr = np.frombuffer(arr.encode(' utf-8' ), dtype = ' u1' ).copy()
807
- length = narr.shape[0 ]
808
- for i in range (length):
809
- v = narr[i]
810
- if v == quote and i > 0 and not is_escaping:
811
- in_quotes = ~ in_quotes
812
- if v == backslash or is_escaping:
813
- is_escaping = ~ is_escaping
814
- if v == comma: # commas that should be \n
815
- if num_open_brackets_seen == 0 and not in_quotes:
816
- narr[i] = newline
817
- elif v == left_bracket:
818
- if not in_quotes:
819
- num_open_brackets_seen += 1
820
- elif v == right_bracket:
821
- if not in_quotes:
822
- num_open_brackets_seen -= 1
823
-
824
- return narr.tostring().decode(' utf-8' )
825
-
826
-
827
- @ cython.boundscheck (False )
828
- @ cython.wraparound (False )
829
- def write_csv_rows (list data , ndarray data_index ,
830
- int nlevels , ndarray cols , object writer ):
831
-
832
- cdef int N, j, i, ncols
833
- cdef list rows
834
- cdef object val
835
-
836
- # In crude testing, N>100 yields little marginal improvement
837
- N= 100
838
-
839
- # pre-allocate rows
840
- ncols = len (cols)
841
- rows = [[None ] * (nlevels + ncols) for x in range (N)]
842
-
843
- j = - 1
844
- if nlevels == 1 :
845
- for j in range (len (data_index)):
846
- row = rows[j % N]
847
- row[0 ] = data_index[j]
848
- for i in range (ncols):
849
- row[1 + i] = data[i][j]
850
-
851
- if j >= N - 1 and j % N == N - 1 :
852
- writer.writerows(rows)
853
- elif nlevels > 1 :
854
- for j in range (len (data_index)):
855
- row = rows[j % N]
856
- row[:nlevels] = list (data_index[j])
857
- for i in range (ncols):
858
- row[nlevels + i] = data[i][j]
859
-
860
- if j >= N - 1 and j % N == N - 1 :
861
- writer.writerows(rows)
862
- else :
863
- for j in range (len (data_index)):
864
- row = rows[j % N]
865
- for i in range (ncols):
866
- row[i] = data[i][j]
867
-
868
- if j >= N - 1 and j % N == N - 1 :
869
- writer.writerows(rows)
870
-
871
- if j >= 0 and (j < N - 1 or (j % N) != N - 1 ):
872
- writer.writerows(rows[:((j + 1 ) % N)])
873
-
874
-
875
683
# ------------------------------------------------------------------------------
876
684
# Groupby-related functions
877
685
0 commit comments