@@ -58,6 +58,8 @@ The corresponding ``writer`` functions are object methods that are accessed like
58
58
* :ref: `to_clipboard<io.clipboard> `
59
59
* :ref: `to_pickle<io.pickle> `
60
60
61
+ :ref: `Here <io.perf >` is an informal performance comparison for some of these IO methods.
62
+
61
63
.. note ::
62
64
For examples that use the ``StringIO `` class, make sure you import it
63
65
according to your Python version, i.e. ``from StringIO import StringIO `` for
@@ -3432,3 +3434,90 @@ Alternatively, the function :func:`~pandas.io.stata.read_stata` can be used
3432
3434
3433
3435
import os
3434
3436
os.remove(' stata.dta' )
3437
+
3438
+ .. _io.perf :
3439
+
3440
+ Performance Considerations
3441
+ --------------------------
3442
+
3443
+ This is an informal comparison of various IO methods, using pandas 0.13.1.
3444
+
3445
+
3446
+ Writing
3447
+
3448
+ .. code-block :: python
3449
+
3450
+ In [14 ]: % timeit test_sql_write(df)
3451
+ 1 loops, best of 3 : 6.24 s per loop
3452
+
3453
+ In [15 ]: % timeit test_hdf_fixed_write(df)
3454
+ 1 loops, best of 3 : 237 ms per loop
3455
+
3456
+ In [16 ]: % timeit test_hdf_table_write(df)
3457
+ 1 loops, best of 3 : 901 ms per loop
3458
+
3459
+ In [17 ]: % timeit test_csv_write(df)
3460
+ 1 loops, best of 3 : 3.44 s per loop
3461
+
3462
+ Reading
3463
+
3464
+ .. code-block :: python
3465
+
3466
+ In [18 ]: % timeit test_sql_read()
3467
+ 1 loops, best of 3 : 766 ms per loop
3468
+
3469
+ In [19 ]: % timeit test_hdf_fixed_read()
3470
+ 10 loops, best of 3 : 19.1 ms per loop
3471
+
3472
+ In [20 ]: % timeit test_hdf_table_read()
3473
+ 10 loops, best of 3 : 39 ms per loop
3474
+
3475
+ In [22 ]: % timeit test_csv_read()
3476
+ 1 loops, best of 3 : 620 ms per loop
3477
+
3478
+ And here's the code
3479
+
3480
+ .. code-block :: python
3481
+
3482
+ import sqlite3
3483
+ import os
3484
+ from pandas.io import sql
3485
+
3486
+ In [3 ]: df = DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
3487
+ < class ' pandas.core.frame.DataFrame' >
3488
+ Int64Index: 1000000 entries, 0 to 999999
3489
+ Data columns (total 2 columns):
3490
+ A 1000000 non- null values
3491
+ B 1000000 non- null values
3492
+ dtypes: float64(2 )
3493
+
3494
+ def test_sql_write (df ):
3495
+ if os.path.exists(' test.sql' ):
3496
+ os.remove(' test.sql' )
3497
+ sql_db = sqlite3.connect(' test.sql' )
3498
+ sql.write_frame(df, name = ' test_table' , con = sql_db)
3499
+ sql_db.close()
3500
+
3501
+ def test_sql_read ():
3502
+ sql_db = sqlite3.connect(' test.sql' )
3503
+ sql.read_frame(" select * from test_table" , sql_db)
3504
+ sql_db.close()
3505
+
3506
+ def test_hdf_fixed_write (df ):
3507
+ df.to_hdf(' test_fixed.hdf' ,' test' ,mode = ' w' )
3508
+
3509
+ def test_hdf_fixed_read ():
3510
+ pd.read_hdf(' test_fixed.hdf' ,' test' )
3511
+
3512
+ def test_hdf_table_write (df ):
3513
+ df.to_hdf(' test_table.hdf' ,' test' ,mode = ' w' ,format = ' table' )
3514
+
3515
+ def test_hdf_table_read ():
3516
+ pd.read_hdf(' test_table.hdf' ,' test' )
3517
+
3518
+ def test_csv_read ():
3519
+ pd.read_csv(' test.csv' ,index_col = 0 )
3520
+
3521
+ def test_csv_write (df ):
3522
+ df.to_csv(' test.csv' ,mode = ' w' )
3523
+
0 commit comments