Skip to content

Commit a37b3e2

Browse files
mroeschkejreback
authored andcommitted
CLN: writers.pyx cdef cleanup (#23880)
1 parent d865e52 commit a37b3e2

File tree

1 file changed

+17
-21
lines changed

1 file changed

+17
-21
lines changed

pandas/_libs/writers.pyx

+17-21
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ ctypedef fused pandas_string:
2323
@cython.boundscheck(False)
2424
@cython.wraparound(False)
2525
def write_csv_rows(list data, ndarray data_index,
26-
int nlevels, ndarray cols, object writer):
26+
Py_ssize_t nlevels, ndarray cols, object writer):
2727
"""
2828
Write the given data to the writer object, pre-allocating where possible
2929
for performance improvements.
@@ -36,21 +36,16 @@ def write_csv_rows(list data, ndarray data_index,
3636
cols : ndarray
3737
writer : object
3838
"""
39+
# In crude testing, N>100 yields little marginal improvement
3940
cdef:
40-
int N, j, i, ncols
41+
Py_ssize_t i, j, k = len(data_index), N = 100, ncols = len(cols)
4142
list rows
42-
object val
43-
44-
# In crude testing, N>100 yields little marginal improvement
45-
N = 100
4643

4744
# pre-allocate rows
48-
ncols = len(cols)
49-
rows = [[None] * (nlevels + ncols) for x in range(N)]
45+
rows = [[None] * (nlevels + ncols) for _ in range(N)]
5046

51-
j = -1
5247
if nlevels == 1:
53-
for j in range(len(data_index)):
48+
for j in range(k):
5449
row = rows[j % N]
5550
row[0] = data_index[j]
5651
for i in range(ncols):
@@ -59,7 +54,7 @@ def write_csv_rows(list data, ndarray data_index,
5954
if j >= N - 1 and j % N == N - 1:
6055
writer.writerows(rows)
6156
elif nlevels > 1:
62-
for j in range(len(data_index)):
57+
for j in range(k):
6358
row = rows[j % N]
6459
row[:nlevels] = list(data_index[j])
6560
for i in range(ncols):
@@ -68,7 +63,7 @@ def write_csv_rows(list data, ndarray data_index,
6863
if j >= N - 1 and j % N == N - 1:
6964
writer.writerows(rows)
7065
else:
71-
for j in range(len(data_index)):
66+
for j in range(k):
7267
row = rows[j % N]
7368
for i in range(ncols):
7469
row[i] = data[i][j]
@@ -90,8 +85,9 @@ def convert_json_to_lines(object arr):
9085
cdef:
9186
Py_ssize_t i = 0, num_open_brackets_seen = 0, length
9287
bint in_quotes = 0, is_escaping = 0
93-
ndarray[uint8_t] narr
94-
unsigned char v, comma, left_bracket, right_brack, newline
88+
ndarray[uint8_t, ndim=1] narr
89+
unsigned char val, newline, comma, left_bracket, right_bracket, quote
90+
unsigned char backslash
9591

9692
newline = ord('\n')
9793
comma = ord(',')
@@ -103,18 +99,18 @@ def convert_json_to_lines(object arr):
10399
narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy()
104100
length = narr.shape[0]
105101
for i in range(length):
106-
v = narr[i]
107-
if v == quote and i > 0 and not is_escaping:
102+
val = narr[i]
103+
if val == quote and i > 0 and not is_escaping:
108104
in_quotes = ~in_quotes
109-
if v == backslash or is_escaping:
105+
if val == backslash or is_escaping:
110106
is_escaping = ~is_escaping
111-
if v == comma: # commas that should be \n
107+
if val == comma: # commas that should be \n
112108
if num_open_brackets_seen == 0 and not in_quotes:
113109
narr[i] = newline
114-
elif v == left_bracket:
110+
elif val == left_bracket:
115111
if not in_quotes:
116112
num_open_brackets_seen += 1
117-
elif v == right_bracket:
113+
elif val == right_bracket:
118114
if not in_quotes:
119115
num_open_brackets_seen -= 1
120116

@@ -159,7 +155,7 @@ def string_array_replace_from_nan_rep(
159155
they are 'nan_rep'. Return the same array.
160156
"""
161157
cdef:
162-
int length = arr.shape[0], i = 0
158+
Py_ssize_t length = len(arr), i = 0
163159

164160
if replace is None:
165161
replace = np.nan

0 commit comments

Comments
 (0)