@@ -175,7 +175,7 @@ cdef extern from "parser/tokenizer.h":
175
175
int col
176
176
177
177
void coliter_setup(coliter_t * it, parser_t * parser, int i, int start)
178
- char * COLITER_NEXT(coliter_t it)
178
+ void COLITER_NEXT(coliter_t it, const char * word )
179
179
180
180
parser_t* parser_new()
181
181
@@ -212,7 +212,7 @@ cdef extern from "parser/tokenizer.h":
212
212
inline int to_longlong(char * item, long long * p_value)
213
213
# inline int to_longlong_thousands(char *item, long long *p_value,
214
214
# char tsep)
215
- int to_boolean(char * item, uint8_t * val)
215
+ int to_boolean(const char * item, uint8_t * val)
216
216
217
217
218
218
cdef extern from " parser/io.h" :
@@ -1279,7 +1279,7 @@ cdef _string_box_factorize(parser_t *parser, int col,
1279
1279
Py_ssize_t i
1280
1280
size_t lines
1281
1281
coliter_t it
1282
- char * word
1282
+ const char * word = NULL
1283
1283
ndarray[object ] result
1284
1284
1285
1285
int ret = 0
@@ -1296,7 +1296,7 @@ cdef _string_box_factorize(parser_t *parser, int col,
1296
1296
coliter_setup(& it, parser, col, line_start)
1297
1297
1298
1298
for i in range (lines):
1299
- word = COLITER_NEXT(it)
1299
+ COLITER_NEXT(it, word )
1300
1300
1301
1301
if na_filter:
1302
1302
k = kh_get_str(na_hashset, word)
@@ -1333,7 +1333,7 @@ cdef _string_box_utf8(parser_t *parser, int col,
1333
1333
Py_ssize_t i
1334
1334
size_t lines
1335
1335
coliter_t it
1336
- char * word
1336
+ const char * word = NULL
1337
1337
ndarray[object ] result
1338
1338
1339
1339
int ret = 0
@@ -1350,7 +1350,7 @@ cdef _string_box_utf8(parser_t *parser, int col,
1350
1350
coliter_setup(& it, parser, col, line_start)
1351
1351
1352
1352
for i in range (lines):
1353
- word = COLITER_NEXT(it)
1353
+ COLITER_NEXT(it, word )
1354
1354
1355
1355
if na_filter:
1356
1356
k = kh_get_str(na_hashset, word)
@@ -1388,7 +1388,7 @@ cdef _string_box_decode(parser_t *parser, int col,
1388
1388
Py_ssize_t i, size
1389
1389
size_t lines
1390
1390
coliter_t it
1391
- char * word
1391
+ const char * word = NULL
1392
1392
ndarray[object ] result
1393
1393
1394
1394
int ret = 0
@@ -1407,7 +1407,7 @@ cdef _string_box_decode(parser_t *parser, int col,
1407
1407
coliter_setup(& it, parser, col, line_start)
1408
1408
1409
1409
for i in range (lines):
1410
- word = COLITER_NEXT(it)
1410
+ COLITER_NEXT(it, word )
1411
1411
1412
1412
if na_filter:
1413
1413
k = kh_get_str(na_hashset, word)
@@ -1444,7 +1444,7 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start,
1444
1444
int error
1445
1445
Py_ssize_t i, j
1446
1446
coliter_t it
1447
- char * word
1447
+ const char * word = NULL
1448
1448
char * data
1449
1449
ndarray result
1450
1450
@@ -1454,7 +1454,7 @@ cdef _to_fw_string(parser_t *parser, int col, int line_start,
1454
1454
coliter_setup(& it, parser, col, line_start)
1455
1455
1456
1456
for i in range (line_end - line_start):
1457
- word = COLITER_NEXT(it)
1457
+ COLITER_NEXT(it, word )
1458
1458
strncpy(data, word, width)
1459
1459
data += width
1460
1460
@@ -1469,7 +1469,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
1469
1469
int error, na_count = 0
1470
1470
size_t i, lines
1471
1471
coliter_t it
1472
- char * word
1472
+ const char * word = NULL
1473
1473
char * p_end
1474
1474
double * data
1475
1475
double NA = na_values[np.float64]
@@ -1485,7 +1485,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
1485
1485
1486
1486
if na_filter:
1487
1487
for i in range (lines):
1488
- word = COLITER_NEXT(it)
1488
+ COLITER_NEXT(it, word )
1489
1489
1490
1490
k = kh_get_str(na_hashset, word)
1491
1491
# in the hash table
@@ -1509,7 +1509,7 @@ cdef _try_double(parser_t *parser, int col, int line_start, int line_end,
1509
1509
data += 1
1510
1510
else :
1511
1511
for i in range (lines):
1512
- word = COLITER_NEXT(it)
1512
+ COLITER_NEXT(it, word )
1513
1513
data[0 ] = parser.converter(word, & p_end, parser.decimal, parser.sci,
1514
1514
parser.thousands, 1 )
1515
1515
if errno != 0 or p_end[0 ] or p_end == word:
@@ -1530,7 +1530,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
1530
1530
int error, na_count = 0
1531
1531
size_t i, lines
1532
1532
coliter_t it
1533
- char * word
1533
+ const char * word = NULL
1534
1534
int64_t * data
1535
1535
ndarray result
1536
1536
@@ -1544,7 +1544,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
1544
1544
1545
1545
if na_filter:
1546
1546
for i in range (lines):
1547
- word = COLITER_NEXT(it)
1547
+ COLITER_NEXT(it, word )
1548
1548
k = kh_get_str(na_hashset, word)
1549
1549
# in the hash table
1550
1550
if k != na_hashset.n_buckets:
@@ -1561,7 +1561,7 @@ cdef _try_int64(parser_t *parser, int col, int line_start, int line_end,
1561
1561
return None , None
1562
1562
else :
1563
1563
for i in range (lines):
1564
- word = COLITER_NEXT(it)
1564
+ COLITER_NEXT(it, word )
1565
1565
data[i] = str_to_int64(word, INT64_MIN, INT64_MAX,
1566
1566
& error, parser.thousands)
1567
1567
if error != 0 :
@@ -1578,7 +1578,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
1578
1578
int error, na_count = 0
1579
1579
size_t i, lines
1580
1580
coliter_t it
1581
- char * word
1581
+ const char * word = NULL
1582
1582
uint8_t * data
1583
1583
ndarray result
1584
1584
@@ -1592,7 +1592,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
1592
1592
1593
1593
if na_filter:
1594
1594
for i in range (lines):
1595
- word = COLITER_NEXT(it)
1595
+ COLITER_NEXT(it, word )
1596
1596
1597
1597
k = kh_get_str(na_hashset, word)
1598
1598
# in the hash table
@@ -1608,7 +1608,7 @@ cdef _try_bool(parser_t *parser, int col, int line_start, int line_end,
1608
1608
data += 1
1609
1609
else :
1610
1610
for i in range (lines):
1611
- word = COLITER_NEXT(it)
1611
+ COLITER_NEXT(it, word )
1612
1612
1613
1613
error = to_boolean(word, data)
1614
1614
if error != 0 :
@@ -1625,7 +1625,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
1625
1625
int error, na_count = 0
1626
1626
size_t i, lines
1627
1627
coliter_t it
1628
- char * word
1628
+ const char * word = NULL
1629
1629
uint8_t * data
1630
1630
ndarray result
1631
1631
@@ -1639,7 +1639,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
1639
1639
1640
1640
if na_filter:
1641
1641
for i in range (lines):
1642
- word = COLITER_NEXT(it)
1642
+ COLITER_NEXT(it, word )
1643
1643
1644
1644
k = kh_get_str(na_hashset, word)
1645
1645
# in the hash table
@@ -1667,7 +1667,7 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
1667
1667
data += 1
1668
1668
else :
1669
1669
for i in range (lines):
1670
- word = COLITER_NEXT(it)
1670
+ COLITER_NEXT(it, word )
1671
1671
1672
1672
k = kh_get_str(true_hashset, word)
1673
1673
if k != true_hashset.n_buckets:
@@ -1688,33 +1688,6 @@ cdef _try_bool_flex(parser_t *parser, int col, int line_start, int line_end,
1688
1688
1689
1689
return result.view(np.bool_), na_count
1690
1690
1691
- cdef _get_na_mask(parser_t * parser, int col, int line_start, int line_end,
1692
- kh_str_t * na_hashset):
1693
- cdef:
1694
- int error
1695
- Py_ssize_t i
1696
- size_t lines
1697
- coliter_t it
1698
- char * word
1699
- ndarray[uint8_t, cast= True ] result
1700
- khiter_t k
1701
-
1702
- lines = line_end - line_start
1703
- result = np.empty(lines, dtype = np.bool_)
1704
-
1705
- coliter_setup(& it, parser, col, line_start)
1706
- for i in range (lines):
1707
- word = COLITER_NEXT(it)
1708
-
1709
- k = kh_get_str(na_hashset, word)
1710
- # in the hash table
1711
- if k != na_hashset.n_buckets:
1712
- result[i] = 1
1713
- else :
1714
- result[i] = 0
1715
-
1716
- return result
1717
-
1718
1691
cdef kh_str_t* kset_from_list(list values) except NULL :
1719
1692
# caller takes responsibility for freeing the hash table
1720
1693
cdef:
@@ -1897,7 +1870,7 @@ cdef _apply_converter(object f, parser_t *parser, int col,
1897
1870
Py_ssize_t i
1898
1871
size_t lines
1899
1872
coliter_t it
1900
- char * word
1873
+ const char * word = NULL
1901
1874
char * errors = " strict"
1902
1875
ndarray[object ] result
1903
1876
object val
@@ -1909,17 +1882,17 @@ cdef _apply_converter(object f, parser_t *parser, int col,
1909
1882
1910
1883
if not PY3 and c_encoding == NULL :
1911
1884
for i in range (lines):
1912
- word = COLITER_NEXT(it)
1885
+ COLITER_NEXT(it, word )
1913
1886
val = PyBytes_FromString(word)
1914
1887
result[i] = f(val)
1915
1888
elif ((PY3 and c_encoding == NULL ) or c_encoding == b' utf-8' ):
1916
1889
for i in range (lines):
1917
- word = COLITER_NEXT(it)
1890
+ COLITER_NEXT(it, word )
1918
1891
val = PyUnicode_FromString(word)
1919
1892
result[i] = f(val)
1920
1893
else :
1921
1894
for i in range (lines):
1922
- word = COLITER_NEXT(it)
1895
+ COLITER_NEXT(it, word )
1923
1896
val = PyUnicode_Decode(word, strlen(word),
1924
1897
c_encoding, errors)
1925
1898
result[i] = f(val)
0 commit comments