5
5
import platform
6
6
import codecs
7
7
8
- import subprocess
9
-
10
8
import re
11
9
import sys
12
10
from datetime import datetime
@@ -1494,36 +1492,71 @@ def test_memory_map(self):
1494
1492
out = self .read_csv (mmap_file , memory_map = True )
1495
1493
tm .assert_frame_equal (out , expected )
1496
1494
1497
-
1498
1495
def test_parse_trim_buffers (self ):
1499
- # This test is designed to cause a `segfault` with unpatched `tokenizer.c`,
1500
- # Sometimes the test fails on `segfault`, other times it fails due to memory
1501
- # corruption, which causes the loaded DataFrame to differ from the expected
1502
- # one.
1496
+ # This test is designed to cause a `segfault` with unpatched
1497
+ # `tokenizer.c`, Sometimes the test fails on `segfault`, other
1498
+ # times it fails due to memory corruption, which causes the
1499
+ # loaded DataFrame to differ from the expected one.
1503
1500
n_lines , chunksizes = 173 , range (57 , 90 )
1504
1501
1505
1502
# Create the expected output
1506
1503
expected_ = [(chunksize_ , "9999-9" , "9999-9" )
1507
- for chunksize_ in chunksizes
1508
- for _ in range ((n_lines + chunksize_ - 1 ) // chunksize_ )]
1504
+ for chunksize_ in chunksizes
1505
+ for _ in range ((n_lines + chunksize_ - 1 ) // chunksize_ )]
1509
1506
expected = pd .DataFrame (expected_ , columns = None , index = None )
1510
1507
1511
1508
# Generate a large mixed-type CSV file on-the-fly (approx 272 KiB)
1512
- record_ = "9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"
1509
+ record_ = \
1510
+ """9999-9,99:99,,,,ZZ,ZZ,,,ZZZ-ZZZZ,.Z-ZZZZ,-9.99,,,9.""" \
1511
+ """99,ZZZZZ,,-99,9,ZZZ-ZZZZ,ZZ-ZZZZ,,9.99,ZZZ-ZZZZZ,ZZZ-""" \
1512
+ """ZZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-ZZZZ,ZZZ-""" \
1513
+ """ZZZZ,ZZZ-ZZZZ,999,ZZZ-ZZZZ,,ZZ-ZZZZ,,,,,ZZZZ,ZZZ-""" \
1514
+ """ZZZZZ,ZZZ-ZZZZ,,,9,9,9,9,99,99,999,999,ZZZZZ,ZZZ-""" \
1515
+ """ZZZZZ,ZZZ-ZZZZ,9,ZZ-ZZZZ,9.99,ZZ-ZZZZ,ZZ-""" \
1516
+ """ZZZZ,,,,ZZZZ,,,ZZ,ZZ,,,,,,,,,,,,,9,,,999.99,999.99,,,""" \
1517
+ """ZZZZZ,,,Z9,,,,,,,ZZZ,ZZZ,,,,,,,,,,,ZZZZZ,ZZZZZ,ZZZ-""" \
1518
+ """ZZZZZZ,ZZZ-ZZZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-ZZZZ,ZZ-""" \
1519
+ """ZZZZ,,,999999,999999,ZZZ,ZZZ,,,ZZZ,ZZZ,999.99,999.""" \
1520
+ """99,,,,ZZZ-ZZZ,ZZZ-ZZZ,-9.99,-9.99,9,9,,99,,9.99,9.""" \
1521
+ """99,9,9,9.99,9.99,,,,9.99,9.99,,99,,99,9.99,9.""" \
1522
+ """99,,,ZZZ,ZZZ,,999.99,,999.99,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
1523
+ """ZZZZ,,,ZZZZZ,ZZZZZ,ZZZ,ZZZ,9,9,,,,,,ZZZ-""" \
1524
+ """ZZZZ,ZZZ999Z,,,999.99,,999.99,ZZZ-ZZZZ,,,9.999,9.""" \
1525
+ """999,9.999,9.999,-9.999,-9.999,-9.999,-9.999,9.999,9.""" \
1526
+ """999,9.999,9.999,9.999,9.999,9.999,9.999,99999,ZZZ-""" \
1527
+ """ZZZZ,,9.99,ZZZ,,,,,,,,ZZZ,,,,,9,,,,9,,,,,,,,,,ZZZ-""" \
1528
+ """ZZZZ,ZZZ-ZZZZ,,ZZZZZ,ZZZZZ,ZZZZZ,ZZZZZ,,,9.99,,ZZ-""" \
1529
+ """ZZZZ,ZZ-ZZZZ,ZZ,999,,,,ZZ-ZZZZ,ZZZ,ZZZ,ZZZ-ZZZZ,ZZZ-""" \
1530
+ """ZZZZ,,,99.99,99.99,,,9.99,9.99,9.99,9.99,ZZZ-""" \
1531
+ """ZZZZ,,,ZZZ-ZZZZZ,,,,,-9.99,-9.99,-9.99,-9.""" \
1532
+ """99,,,,,,,,,ZZZ-ZZZZ,,9,9.99,9.99,99ZZ,,-9.99,-9.""" \
1533
+ """99,ZZZ-ZZZZ,,,,,,,ZZZ-ZZZZ,9.99,9.99,9999,,,,,,,,,,-9""" \
1534
+ """.9,Z/Z-ZZZZ,999.99,9.99,,999.99,ZZ-ZZZZ,ZZ-ZZZZ,9.""" \
1535
+ """99,9.99,9.99,9.99,9.99,9.99,,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ-""" \
1536
+ """ZZZZZ,ZZZ-ZZZZZ,ZZZ-ZZZZZ,ZZZ,ZZZ,ZZZ,ZZZ,9.99,,,-9.""" \
1537
+ """99,ZZ-ZZZZ,-999.99,,-9999,,999.99,,,,999.99,99.""" \
1538
+ """99,,,ZZ-ZZZZZZZZ,ZZ-ZZZZ-ZZZZZZZ,,,,ZZ-ZZ-""" \
1539
+ """ZZZZZZZZ,ZZZZZZZZ,ZZZ-ZZZZ,9999,999.99,ZZZ-ZZZZ,-9.""" \
1540
+ """99,-9.99,ZZZ-ZZZZ,99:99:99,,99,99,,9.99,,-99.""" \
1541
+ """99,,,,,,9.99,ZZZ-ZZZZ,-9.99,-9.99,9.99,9.""" \
1542
+ """99,,ZZZ,,,,,,,ZZZ,ZZZ,,,,,"""
1513
1543
csv_data = "\n " .join ([record_ ] * n_lines ) + "\n "
1514
1544
1515
1545
output_ = list ()
1516
1546
for chunksize_ in chunksizes :
1517
1547
try :
1518
- iterator_ = self .read_csv (StringIO (csv_data ), header = None , dtype = object ,
1519
- chunksize = chunksize_ , na_filter = True )
1520
- except ValueError , e :
1548
+ iterator_ = self .read_csv (StringIO (csv_data ), header = None ,
1549
+ dtype = object , chunksize = chunksize_ ,
1550
+ na_filter = True )
1551
+ except ValueError :
1521
1552
# Ignore unsuported dtype=object by engine=python
1522
1553
pass
1523
1554
1524
1555
for chunk_ in iterator_ :
1525
- output_ .append ((chunksize_ , chunk_ .iloc [0 , 0 ], chunk_ .iloc [- 1 , 0 ]))
1556
+ output_ .append ((chunksize_ ,
1557
+ chunk_ .iloc [0 , 0 ],
1558
+ chunk_ .iloc [- 1 , 0 ]))
1526
1559
1527
1560
df = pd .DataFrame (output_ , columns = None , index = None )
1528
1561
1529
- tm .assert_frame_equal (df , expected )
1562
+ tm .assert_frame_equal (df , expected )
0 commit comments