Skip to content

Commit 78e16a9

Browse files
committed
refactor out libwriters, fix references to Timestamp, Timedelta
1 parent 8a7aca9 commit 78e16a9

File tree

12 files changed

+267
-238
lines changed

12 files changed

+267
-238
lines changed

pandas/_libs/lib.pyx

Lines changed: 1 addition & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,7 @@ from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
2323
PyBytes_Check,
2424
PyUnicode_Check,
2525
PyTuple_New,
26-
PyObject_RichCompareBool,
27-
PyBytes_GET_SIZE,
28-
PyUnicode_GET_SIZE)
29-
30-
try:
31-
from cpython cimport PyString_GET_SIZE
32-
except ImportError:
33-
from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE
26+
PyObject_RichCompareBool)
3427

3528
cimport cpython
3629

@@ -129,28 +122,6 @@ def item_from_zerodim(object val):
129122
return util.unbox_if_zerodim(val)
130123

131124

132-
@cython.wraparound(False)
133-
@cython.boundscheck(False)
134-
def fast_unique(ndarray[object] values):
135-
cdef:
136-
Py_ssize_t i, n = len(values)
137-
list uniques = []
138-
dict table = {}
139-
object val, stub = 0
140-
141-
for i from 0 <= i < n:
142-
val = values[i]
143-
if val not in table:
144-
table[val] = stub
145-
uniques.append(val)
146-
try:
147-
uniques.sort()
148-
except Exception:
149-
pass
150-
151-
return uniques
152-
153-
154125
@cython.wraparound(False)
155126
@cython.boundscheck(False)
156127
def fast_unique_multiple(list arrays):
@@ -370,30 +341,6 @@ def has_infs_f8(ndarray[float64_t] arr):
370341
return False
371342

372343

373-
def convert_timestamps(ndarray values):
374-
cdef:
375-
object val, f, result
376-
dict cache = {}
377-
Py_ssize_t i, n = len(values)
378-
ndarray[object] out
379-
380-
# for HDFStore, a bit temporary but...
381-
382-
from datetime import datetime
383-
f = datetime.fromtimestamp
384-
385-
out = np.empty(n, dtype='O')
386-
387-
for i in range(n):
388-
val = util.get_value_1d(values, i)
389-
if val in cache:
390-
out[i] = cache[val]
391-
else:
392-
cache[val] = out[i] = f(val)
393-
394-
return out
395-
396-
397344
def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len):
398345
cdef:
399346
Py_ssize_t i, n = len(indices)
@@ -733,145 +680,6 @@ def clean_index_list(list obj):
733680
return np.asarray(obj), 0
734681

735682

736-
ctypedef fused pandas_string:
737-
str
738-
unicode
739-
bytes
740-
741-
742-
@cython.boundscheck(False)
743-
@cython.wraparound(False)
744-
cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr):
745-
""" return the maximum size of elements in a 1-dim string array """
746-
cdef:
747-
Py_ssize_t i, m = 0, l = 0, length = arr.shape[0]
748-
pandas_string v
749-
750-
for i in range(length):
751-
v = arr[i]
752-
if PyString_Check(v):
753-
l = PyString_GET_SIZE(v)
754-
elif PyBytes_Check(v):
755-
l = PyBytes_GET_SIZE(v)
756-
elif PyUnicode_Check(v):
757-
l = PyUnicode_GET_SIZE(v)
758-
759-
if l > m:
760-
m = l
761-
762-
return m
763-
764-
765-
@cython.boundscheck(False)
766-
@cython.wraparound(False)
767-
def string_array_replace_from_nan_rep(
768-
ndarray[object, ndim=1] arr, object nan_rep,
769-
object replace=None):
770-
"""
771-
Replace the values in the array with 'replacement' if
772-
they are 'nan_rep'. Return the same array.
773-
"""
774-
775-
cdef int length = arr.shape[0], i = 0
776-
if replace is None:
777-
replace = np.nan
778-
779-
for i from 0 <= i < length:
780-
if arr[i] == nan_rep:
781-
arr[i] = replace
782-
783-
return arr
784-
785-
786-
@cython.boundscheck(False)
787-
@cython.wraparound(False)
788-
def convert_json_to_lines(object arr):
789-
"""
790-
replace comma separated json with line feeds, paying special attention
791-
to quotes & brackets
792-
"""
793-
cdef:
794-
Py_ssize_t i = 0, num_open_brackets_seen = 0, length
795-
bint in_quotes = 0, is_escaping = 0
796-
ndarray[uint8_t] narr
797-
unsigned char v, comma, left_bracket, right_brack, newline
798-
799-
newline = ord('\n')
800-
comma = ord(',')
801-
left_bracket = ord('{')
802-
right_bracket = ord('}')
803-
quote = ord('"')
804-
backslash = ord('\\')
805-
806-
narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy()
807-
length = narr.shape[0]
808-
for i in range(length):
809-
v = narr[i]
810-
if v == quote and i > 0 and not is_escaping:
811-
in_quotes = ~in_quotes
812-
if v == backslash or is_escaping:
813-
is_escaping = ~is_escaping
814-
if v == comma: # commas that should be \n
815-
if num_open_brackets_seen == 0 and not in_quotes:
816-
narr[i] = newline
817-
elif v == left_bracket:
818-
if not in_quotes:
819-
num_open_brackets_seen += 1
820-
elif v == right_bracket:
821-
if not in_quotes:
822-
num_open_brackets_seen -= 1
823-
824-
return narr.tostring().decode('utf-8')
825-
826-
827-
@cython.boundscheck(False)
828-
@cython.wraparound(False)
829-
def write_csv_rows(list data, ndarray data_index,
830-
int nlevels, ndarray cols, object writer):
831-
832-
cdef int N, j, i, ncols
833-
cdef list rows
834-
cdef object val
835-
836-
# In crude testing, N>100 yields little marginal improvement
837-
N=100
838-
839-
# pre-allocate rows
840-
ncols = len(cols)
841-
rows = [[None] * (nlevels + ncols) for x in range(N)]
842-
843-
j = -1
844-
if nlevels == 1:
845-
for j in range(len(data_index)):
846-
row = rows[j % N]
847-
row[0] = data_index[j]
848-
for i in range(ncols):
849-
row[1 + i] = data[i][j]
850-
851-
if j >= N - 1 and j % N == N - 1:
852-
writer.writerows(rows)
853-
elif nlevels > 1:
854-
for j in range(len(data_index)):
855-
row = rows[j % N]
856-
row[:nlevels] = list(data_index[j])
857-
for i in range(ncols):
858-
row[nlevels + i] = data[i][j]
859-
860-
if j >= N - 1 and j % N == N - 1:
861-
writer.writerows(rows)
862-
else:
863-
for j in range(len(data_index)):
864-
row = rows[j % N]
865-
for i in range(ncols):
866-
row[i] = data[i][j]
867-
868-
if j >= N - 1 and j % N == N - 1:
869-
writer.writerows(rows)
870-
871-
if j >= 0 and (j < N - 1 or (j % N) != N - 1):
872-
writer.writerows(rows[:((j + 1) % N)])
873-
874-
875683
# ------------------------------------------------------------------------------
876684
# Groupby-related functions
877685

pandas/_libs/src/inference.pyx

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ from tslibs.nattype import NaT
66
from tslibs.conversion cimport convert_to_tsobject
77
from tslibs.timedeltas cimport convert_to_timedelta64
88
from tslibs.timezones cimport get_timezone, tz_compare
9-
from datetime import datetime, timedelta
9+
1010
iNaT = util.get_nat()
1111

1212
cdef bint PY2 = sys.version_info[0] == 2
@@ -1389,30 +1389,6 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
13891389
return objects
13901390

13911391

1392-
def sanitize_objects(ndarray[object] values, set na_values,
1393-
convert_empty=True):
1394-
cdef:
1395-
Py_ssize_t i, n
1396-
object val, onan
1397-
Py_ssize_t na_count = 0
1398-
dict memo = {}
1399-
1400-
n = len(values)
1401-
onan = np.nan
1402-
1403-
for i from 0 <= i < n:
1404-
val = values[i]
1405-
if (convert_empty and val == '') or (val in na_values):
1406-
values[i] = onan
1407-
na_count += 1
1408-
elif val in memo:
1409-
values[i] = memo[val]
1410-
else:
1411-
memo[val] = val
1412-
1413-
return na_count
1414-
1415-
14161392
def maybe_convert_bool(ndarray[object] arr,
14171393
true_values=None, false_values=None):
14181394
cdef:

0 commit comments

Comments
 (0)