@@ -7,9 +7,8 @@ import warnings
7
7
8
8
from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE
9
9
10
- from libc.stdio cimport fopen, fclose
11
- from libc.stdlib cimport malloc, free
12
- from libc.string cimport strncpy, strlen, strcmp, strcasecmp
10
+ from libc.stdlib cimport free
11
+ from libc.string cimport strncpy, strlen, strcasecmp
13
12
14
13
cimport cython
15
14
from cython cimport Py_ssize_t
@@ -27,9 +26,6 @@ cdef extern from "Python.h":
27
26
object PyUnicode_Decode(char * v, Py_ssize_t size, char * encoding,
28
27
char * errors)
29
28
30
- cdef extern from " stdlib.h" :
31
- void memcpy(void * dst, void * src, size_t n)
32
-
33
29
34
30
import numpy as np
35
31
cimport numpy as cnp
@@ -50,7 +46,7 @@ from khash cimport (
50
46
51
47
import pandas.compat as compat
52
48
from pandas.core.dtypes.common import (
53
- is_categorical_dtype, CategoricalDtype,
49
+ is_categorical_dtype,
54
50
is_integer_dtype, is_float_dtype,
55
51
is_bool_dtype, is_object_dtype,
56
52
is_datetime64_dtype,
90
86
except NameError :
91
87
basestring = str
92
88
93
- cdef extern from " src/numpy_helper.h" :
94
- void transfer_object_column(char * dst, char * src, size_t stride,
95
- size_t length)
96
89
97
90
cdef extern from " parser/tokenizer.h" :
98
91
@@ -232,8 +225,6 @@ cdef extern from "parser/tokenizer.h":
232
225
233
226
int parser_trim_buffers(parser_t * self )
234
227
235
- void debug_print_parser(parser_t * self )
236
-
237
228
int tokenize_all_rows(parser_t * self ) nogil
238
229
int tokenize_nrows(parser_t * self , size_t nrows) nogil
239
230
@@ -249,7 +240,6 @@ cdef extern from "parser/tokenizer.h":
249
240
double round_trip(const char * p, char ** q, char decimal, char sci,
250
241
char tsep, int skip_trailing) nogil
251
242
252
- int to_longlong(char * item, long long * p_value) nogil
253
243
int to_boolean(const char * item, uint8_t * val) nogil
254
244
255
245
@@ -875,9 +865,6 @@ cdef class TextReader:
875
865
876
866
return header, field_count
877
867
878
- cdef _implicit_index_count(self ):
879
- pass
880
-
881
868
def read (self , rows = None ):
882
869
"""
883
870
rows=None --> read all rows
@@ -997,9 +984,6 @@ cdef class TextReader:
997
984
998
985
return columns
999
986
1000
- def debug_print (self ):
1001
- debug_print_parser(self .parser)
1002
-
1003
987
cdef _start_clock(self ):
1004
988
self .clocks.append(time.time())
1005
989
@@ -1346,6 +1330,7 @@ cdef class TextReader:
1346
1330
else :
1347
1331
return None
1348
1332
1333
+
1349
1334
cdef object _true_values = [b' True' , b' TRUE' , b' true' ]
1350
1335
cdef object _false_values = [b' False' , b' FALSE' , b' false' ]
1351
1336
@@ -1375,21 +1360,6 @@ cdef asbytes(object o):
1375
1360
_NA_VALUES = _ensure_encoded(list (com._NA_VALUES))
1376
1361
1377
1362
1378
- def _is_file_like (obj ):
1379
- if PY3:
1380
- import io
1381
- if isinstance (obj, io.TextIOWrapper):
1382
- raise ParserError(' Cannot handle open unicode files (yet)' )
1383
-
1384
- # BufferedReader is a byte reader for Python 3
1385
- file = io.BufferedReader
1386
- else :
1387
- import __builtin__
1388
- file = __builtin__.file
1389
-
1390
- return isinstance (obj, (basestring , file ))
1391
-
1392
-
1393
1363
def _maybe_upcast (arr ):
1394
1364
"""
1395
1365
@@ -1479,6 +1449,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col,
1479
1449
1480
1450
return result, na_count
1481
1451
1452
+
1482
1453
cdef _string_box_utf8(parser_t * parser, int64_t col,
1483
1454
int64_t line_start, int64_t line_end,
1484
1455
bint na_filter, kh_str_t * na_hashset):
@@ -1532,6 +1503,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
1532
1503
1533
1504
return result, na_count
1534
1505
1506
+
1535
1507
cdef _string_box_decode(parser_t * parser, int64_t col,
1536
1508
int64_t line_start, int64_t line_end,
1537
1509
bint na_filter, kh_str_t * na_hashset,
@@ -1662,6 +1634,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
1662
1634
kh_destroy_str(table)
1663
1635
return np.asarray(codes), result, na_count
1664
1636
1637
+
1665
1638
cdef _to_fw_string(parser_t * parser, int64_t col, int64_t line_start,
1666
1639
int64_t line_end, int64_t width):
1667
1640
cdef:
@@ -1679,6 +1652,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start,
1679
1652
1680
1653
return result
1681
1654
1655
+
1682
1656
cdef inline void _to_fw_string_nogil(parser_t * parser, int64_t col,
1683
1657
int64_t line_start, int64_t line_end,
1684
1658
size_t width, char * data) nogil:
@@ -1694,10 +1668,12 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col,
1694
1668
strncpy(data, word, width)
1695
1669
data += width
1696
1670
1671
+
1697
1672
cdef char * cinf = b' inf'
1698
1673
cdef char * cposinf = b' +inf'
1699
1674
cdef char * cneginf = b' -inf'
1700
1675
1676
+
1701
1677
cdef _try_double(parser_t * parser, int64_t col,
1702
1678
int64_t line_start, int64_t line_end,
1703
1679
bint na_filter, kh_str_t * na_hashset, object na_flist):
@@ -1738,6 +1714,7 @@ cdef _try_double(parser_t *parser, int64_t col,
1738
1714
return None , None
1739
1715
return result, na_count
1740
1716
1717
+
1741
1718
cdef inline int _try_double_nogil(parser_t * parser,
1742
1719
double (* double_converter)(
1743
1720
const char * , char ** , char ,
@@ -1808,6 +1785,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
1808
1785
1809
1786
return 0
1810
1787
1788
+
1811
1789
cdef _try_uint64(parser_t * parser, int64_t col,
1812
1790
int64_t line_start, int64_t line_end,
1813
1791
bint na_filter, kh_str_t * na_hashset):
@@ -1843,6 +1821,7 @@ cdef _try_uint64(parser_t *parser, int64_t col,
1843
1821
1844
1822
return result
1845
1823
1824
+
1846
1825
cdef inline int _try_uint64_nogil(parser_t * parser, int64_t col,
1847
1826
int64_t line_start,
1848
1827
int64_t line_end, bint na_filter,
@@ -1881,6 +1860,7 @@ cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col,
1881
1860
1882
1861
return 0
1883
1862
1863
+
1884
1864
cdef _try_int64(parser_t * parser, int64_t col,
1885
1865
int64_t line_start, int64_t line_end,
1886
1866
bint na_filter, kh_str_t * na_hashset):
@@ -1909,6 +1889,7 @@ cdef _try_int64(parser_t *parser, int64_t col,
1909
1889
1910
1890
return result, na_count
1911
1891
1892
+
1912
1893
cdef inline int _try_int64_nogil(parser_t * parser, int64_t col,
1913
1894
int64_t line_start,
1914
1895
int64_t line_end, bint na_filter,
@@ -1948,69 +1929,6 @@ cdef inline int _try_int64_nogil(parser_t *parser, int64_t col,
1948
1929
1949
1930
return 0
1950
1931
1951
- cdef _try_bool(parser_t * parser, int64_t col,
1952
- int64_t line_start, int64_t line_end,
1953
- bint na_filter, kh_str_t * na_hashset):
1954
- cdef:
1955
- int na_count
1956
- Py_ssize_t lines = line_end - line_start
1957
- uint8_t * data
1958
- cnp.ndarray[cnp.uint8_t, ndim= 1 ] result
1959
-
1960
- uint8_t NA = na_values[np.bool_]
1961
-
1962
- result = np.empty(lines)
1963
- data = < uint8_t * > result.data
1964
-
1965
- with nogil:
1966
- error = _try_bool_nogil(parser, col, line_start,
1967
- line_end, na_filter,
1968
- na_hashset, NA, data,
1969
- & na_count)
1970
- if error != 0 :
1971
- return None , None
1972
- return result.view(np.bool_), na_count
1973
-
1974
- cdef inline int _try_bool_nogil(parser_t * parser, int64_t col,
1975
- int64_t line_start,
1976
- int64_t line_end, bint na_filter,
1977
- const kh_str_t * na_hashset, uint8_t NA,
1978
- uint8_t * data, int * na_count) nogil:
1979
- cdef:
1980
- int error
1981
- Py_ssize_t i, lines = line_end - line_start
1982
- coliter_t it
1983
- const char * word = NULL
1984
- khiter_t k
1985
- na_count[0 ] = 0
1986
-
1987
- coliter_setup(& it, parser, col, line_start)
1988
-
1989
- if na_filter:
1990
- for i in range (lines):
1991
- COLITER_NEXT(it, word)
1992
-
1993
- k = kh_get_str(na_hashset, word)
1994
- # in the hash table
1995
- if k != na_hashset.n_buckets:
1996
- na_count[0 ] += 1
1997
- data[0 ] = NA
1998
- data += 1
1999
- continue
2000
-
2001
- error = to_boolean(word, data)
2002
- if error != 0 :
2003
- return error
2004
- data += 1
2005
- else :
2006
- for i in range (lines):
2007
- COLITER_NEXT(it, word)
2008
-
2009
- error = to_boolean(word, data)
2010
- if error != 0 :
2011
- return error
2012
- data += 1
2013
- return 0
2014
1932
2015
1933
cdef _try_bool_flex(parser_t * parser, int64_t col,
2016
1934
int64_t line_start, int64_t line_end,
@@ -2039,6 +1957,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col,
2039
1957
return None , None
2040
1958
return result.view(np.bool_), na_count
2041
1959
1960
+
2042
1961
cdef inline int _try_bool_flex_nogil(parser_t * parser, int64_t col,
2043
1962
int64_t line_start,
2044
1963
int64_t line_end, bint na_filter,
@@ -2131,6 +2050,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL:
2131
2050
2132
2051
return table
2133
2052
2053
+
2134
2054
cdef kh_float64_t* kset_float64_from_list(values) except NULL :
2135
2055
# caller takes responsibility for freeing the hash table
2136
2056
cdef:
0 commit comments