@@ -23,7 +23,7 @@ ctypedef fused pandas_string:
23
23
@ cython.boundscheck (False )
24
24
@ cython.wraparound (False )
25
25
def write_csv_rows (list data , ndarray data_index ,
26
- int nlevels , ndarray cols , object writer ):
26
+ Py_ssize_t nlevels , ndarray cols , object writer ):
27
27
"""
28
28
Write the given data to the writer object, pre-allocating where possible
29
29
for performance improvements.
@@ -36,21 +36,16 @@ def write_csv_rows(list data, ndarray data_index,
36
36
cols : ndarray
37
37
writer : object
38
38
"""
39
+ # In crude testing, N>100 yields little marginal improvement
39
40
cdef:
40
- int N , j, i, ncols
41
+ Py_ssize_t i , j, k = len (data_index), N = 100 , ncols = len (cols)
41
42
list rows
42
- object val
43
-
44
- # In crude testing, N>100 yields little marginal improvement
45
- N = 100
46
43
47
44
# pre-allocate rows
48
- ncols = len (cols)
49
- rows = [[None ] * (nlevels + ncols) for x in range (N)]
45
+ rows = [[None ] * (nlevels + ncols) for _ in range (N)]
50
46
51
- j = - 1
52
47
if nlevels == 1 :
53
- for j in range (len (data_index) ):
48
+ for j in range (k ):
54
49
row = rows[j % N]
55
50
row[0 ] = data_index[j]
56
51
for i in range (ncols):
@@ -59,7 +54,7 @@ def write_csv_rows(list data, ndarray data_index,
59
54
if j >= N - 1 and j % N == N - 1 :
60
55
writer.writerows(rows)
61
56
elif nlevels > 1 :
62
- for j in range (len (data_index) ):
57
+ for j in range (k ):
63
58
row = rows[j % N]
64
59
row[:nlevels] = list (data_index[j])
65
60
for i in range (ncols):
@@ -68,7 +63,7 @@ def write_csv_rows(list data, ndarray data_index,
68
63
if j >= N - 1 and j % N == N - 1 :
69
64
writer.writerows(rows)
70
65
else :
71
- for j in range (len (data_index) ):
66
+ for j in range (k ):
72
67
row = rows[j % N]
73
68
for i in range (ncols):
74
69
row[i] = data[i][j]
@@ -90,8 +85,9 @@ def convert_json_to_lines(object arr):
90
85
cdef:
91
86
Py_ssize_t i = 0 , num_open_brackets_seen = 0 , length
92
87
bint in_quotes = 0 , is_escaping = 0
93
- ndarray[uint8_t] narr
94
- unsigned char v, comma, left_bracket, right_brack, newline
88
+ ndarray[uint8_t, ndim= 1 ] narr
89
+ unsigned char val, newline, comma, left_bracket, right_bracket, quote
90
+ unsigned char backslash
95
91
96
92
newline = ord (' \n ' )
97
93
comma = ord (' ,' )
@@ -103,18 +99,18 @@ def convert_json_to_lines(object arr):
103
99
narr = np.frombuffer(arr.encode(' utf-8' ), dtype = ' u1' ).copy()
104
100
length = narr.shape[0 ]
105
101
for i in range (length):
106
- v = narr[i]
107
- if v == quote and i > 0 and not is_escaping:
102
+ val = narr[i]
103
+ if val == quote and i > 0 and not is_escaping:
108
104
in_quotes = ~ in_quotes
109
- if v == backslash or is_escaping:
105
+ if val == backslash or is_escaping:
110
106
is_escaping = ~ is_escaping
111
- if v == comma: # commas that should be \n
107
+ if val == comma: # commas that should be \n
112
108
if num_open_brackets_seen == 0 and not in_quotes:
113
109
narr[i] = newline
114
- elif v == left_bracket:
110
+ elif val == left_bracket:
115
111
if not in_quotes:
116
112
num_open_brackets_seen += 1
117
- elif v == right_bracket:
113
+ elif val == right_bracket:
118
114
if not in_quotes:
119
115
num_open_brackets_seen -= 1
120
116
@@ -159,7 +155,7 @@ def string_array_replace_from_nan_rep(
159
155
they are 'nan_rep'. Return the same array.
160
156
"""
161
157
cdef:
162
- int length = arr.shape[ 0 ] , i = 0
158
+ Py_ssize_t length = len ( arr) , i = 0
163
159
164
160
if replace is None :
165
161
replace = np.nan
0 commit comments