@@ -3442,6 +3442,16 @@ Performance Considerations
3442
3442
3443
3443
This is an informal comparison of various IO methods, using pandas 0.13.1.
3444
3444
3445
+ .. code-block :: python
3446
+
3447
+ In [3 ]: df = DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
3448
+ < class ' pandas.core.frame.DataFrame' >
3449
+ Int64Index: 1000000 entries, 0 to 999999
3450
+ Data columns (total 2 columns):
3451
+ A 1000000 non- null values
3452
+ B 1000000 non- null values
3453
+ dtypes: float64(2 )
3454
+
3445
3455
3446
3456
Writing
3447
3457
@@ -3453,9 +3463,15 @@ Writing
3453
3463
In [15 ]: % timeit test_hdf_fixed_write(df)
3454
3464
1 loops, best of 3 : 237 ms per loop
3455
3465
3466
+ In [26 ]: % timeit test_hdf_fixed_write_compress(df)
3467
+ 1 loops, best of 3 : 245 ms per loop
3468
+
3456
3469
In [16 ]: % timeit test_hdf_table_write(df)
3457
3470
1 loops, best of 3 : 901 ms per loop
3458
3471
3472
+ In [27 ]: % timeit test_hdf_table_write_compress(df)
3473
+ 1 loops, best of 3 : 952 ms per loop
3474
+
3459
3475
In [17 ]: % timeit test_csv_write(df)
3460
3476
1 loops, best of 3 : 3.44 s per loop
3461
3477
@@ -3469,12 +3485,29 @@ Reading
3469
3485
In [19 ]: % timeit test_hdf_fixed_read()
3470
3486
10 loops, best of 3 : 19.1 ms per loop
3471
3487
3488
+ In [28 ]: % timeit test_hdf_fixed_read_compress()
3489
+ 10 loops, best of 3 : 36.3 ms per loop
3490
+
3472
3491
In [20 ]: % timeit test_hdf_table_read()
3473
3492
10 loops, best of 3 : 39 ms per loop
3474
3493
3494
+ In [29 ]: % timeit test_hdf_table_read_compress()
3495
+ 10 loops, best of 3 : 60.6 ms per loop
3496
+
3475
3497
In [22 ]: % timeit test_csv_read()
3476
3498
1 loops, best of 3 : 620 ms per loop
3477
3499
3500
+ Space on disk (in bytes)
3501
+
3502
+ .. code-block :: python
3503
+
3504
+ 25843712 Apr 8 14 :11 test.sql
3505
+ 24007368 Apr 8 14 :11 test_fixed.hdf
3506
+ 15580682 Apr 8 14 :11 test_fixed_compress.hdf
3507
+ 24458444 Apr 8 14 :11 test_table.hdf
3508
+ 16797283 Apr 8 14 :11 test_table_compress.hdf
3509
+ 46152810 Apr 8 14 :11 test.csv
3510
+
3478
3511
And here's the code
3479
3512
3480
3513
.. code-block :: python
@@ -3483,13 +3516,7 @@ And here's the code
3483
3516
import os
3484
3517
from pandas.io import sql
3485
3518
3486
- In [3 ]: df = DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
3487
- < class ' pandas.core.frame.DataFrame' >
3488
- Int64Index: 1000000 entries, 0 to 999999
3489
- Data columns (total 2 columns):
3490
- A 1000000 non- null values
3491
- B 1000000 non- null values
3492
- dtypes: float64(2 )
3519
+ df = DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
3493
3520
3494
3521
def test_sql_write (df ):
3495
3522
if os.path.exists(' test.sql' ):
@@ -3509,15 +3536,27 @@ And here's the code
3509
3536
def test_hdf_fixed_read ():
3510
3537
pd.read_hdf(' test_fixed.hdf' ,' test' )
3511
3538
3539
+ def test_hdf_fixed_write_compress (df ):
3540
+ df.to_hdf(' test_fixed_compress.hdf' ,' test' ,mode = ' w' ,complib = ' blosc' )
3541
+
3542
+ def test_hdf_fixed_read_compress ():
3543
+ pd.read_hdf(' test_fixed_compress.hdf' ,' test' )
3544
+
3512
3545
def test_hdf_table_write (df ):
3513
3546
df.to_hdf(' test_table.hdf' ,' test' ,mode = ' w' ,format = ' table' )
3514
3547
3515
3548
def test_hdf_table_read ():
3516
3549
pd.read_hdf(' test_table.hdf' ,' test' )
3517
3550
3518
- def test_csv_read ():
3519
- pd.read_csv(' test.csv' ,index_col = 0 )
3551
+ def test_hdf_table_write_compress (df ):
3552
+ df.to_hdf(' test_table_compress.hdf' ,' test' ,mode = ' w' ,complib = ' blosc' ,format = ' table' )
3553
+
3554
+ def test_hdf_table_read_compress ():
3555
+ pd.read_hdf(' test_table_compress.hdf' ,' test' )
3520
3556
3521
3557
def test_csv_write (df ):
3522
3558
df.to_csv(' test.csv' ,mode = ' w' )
3523
3559
3560
+ def test_csv_read ():
3561
+ pd.read_csv(' test.csv' ,index_col = 0 )
3562
+
0 commit comments