19
19
import matplotlib.pyplot as plt
20
20
plt.close(' all' )
21
21
22
- from pandas import *
23
22
options.display.max_rows= 15
24
23
import pandas.util.testing as tm
25
- clipdf = DataFrame({' A' :[1 ,2 ,3 ],' B' :[4 ,5 ,6 ],' C' :[' p' ,' q' ,' r' ]},
24
+ clipdf = pd. DataFrame({' A' :[1 ,2 ,3 ],' B' :[4 ,5 ,6 ],' C' :[' p' ,' q' ,' r' ]},
26
25
index = [' x' ,' y' ,' z' ])
27
26
28
27
===============================
@@ -1195,7 +1194,7 @@ class of the csv module. For this, you have to specify ``sep=None``.
1195
1194
.. ipython :: python
1196
1195
:suppress:
1197
1196
1198
- df = DataFrame(np.random.randn(10 , 4 ))
1197
+ df = pd. DataFrame(np.random.randn(10 , 4 ))
1199
1198
df.to_csv(' tmp.sv' , sep = ' |' )
1200
1199
df.to_csv(' tmp2.sv' , sep = ' :' )
1201
1200
@@ -1375,7 +1374,7 @@ Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datet
1375
1374
1376
1375
.. ipython :: python
1377
1376
1378
- dfj = DataFrame(randn(5 , 2 ), columns = list (' AB' ))
1377
+ dfj = pd. DataFrame(randn(5 , 2 ), columns = list (' AB' ))
1379
1378
json = dfj.to_json()
1380
1379
json
1381
1380
@@ -1387,10 +1386,10 @@ file / string. Consider the following DataFrame and Series:
1387
1386
1388
1387
.. ipython :: python
1389
1388
1390
- dfjo = DataFrame(dict (A = range (1 , 4 ), B = range (4 , 7 ), C = range (7 , 10 )),
1389
+ dfjo = pd. DataFrame(dict (A = range (1 , 4 ), B = range (4 , 7 ), C = range (7 , 10 )),
1391
1390
columns = list (' ABC' ), index = list (' xyz' ))
1392
1391
dfjo
1393
- sjo = Series(dict (x = 15 , y = 16 , z = 17 ), name = ' D' )
1392
+ sjo = pd. Series(dict (x = 15 , y = 16 , z = 17 ), name = ' D' )
1394
1393
sjo
1395
1394
1396
1395
**Column oriented ** (the default for ``DataFrame ``) serializes the data as
@@ -1472,10 +1471,10 @@ Writing to a file, with a date index and a date column
1472
1471
.. ipython :: python
1473
1472
1474
1473
dfj2 = dfj.copy()
1475
- dfj2[' date' ] = Timestamp(' 20130101' )
1474
+ dfj2[' date' ] = pd. Timestamp(' 20130101' )
1476
1475
dfj2[' ints' ] = list (range (5 ))
1477
1476
dfj2[' bools' ] = True
1478
- dfj2.index = date_range(' 20130101' , periods = 5 )
1477
+ dfj2.index = pd. date_range(' 20130101' , periods = 5 )
1479
1478
dfj2.to_json(' test.json' )
1480
1479
open (' test.json' ).read()
1481
1480
@@ -1506,7 +1505,7 @@ problems:
1506
1505
1507
1506
In [141]: from datetime import timedelta
1508
1507
1509
- In [142]: dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42])
1508
+ In [142]: dftd = pd. DataFrame([timedelta(23), timedelta(seconds=5), 42])
1510
1509
1511
1510
In [143]: dftd.to_json()
1512
1511
@@ -1633,7 +1632,7 @@ Preserve string indices:
1633
1632
1634
1633
.. ipython :: python
1635
1634
1636
- si = DataFrame(np.zeros((4 , 4 )),
1635
+ si = pd. DataFrame(np.zeros((4 , 4 )),
1637
1636
columns = list (range (4 )),
1638
1637
index = [str (i) for i in range (4 )])
1639
1638
si
@@ -1681,7 +1680,7 @@ data:
1681
1680
1682
1681
randfloats = np.random.uniform(- 100 , 1000 , 10000 )
1683
1682
randfloats.shape = (1000 , 10 )
1684
- dffloats = DataFrame(randfloats, columns = list (' ABCDEFGHIJ' ))
1683
+ dffloats = pd. DataFrame(randfloats, columns = list (' ABCDEFGHIJ' ))
1685
1684
1686
1685
jsonfloats = dffloats.to_json()
1687
1686
@@ -1884,7 +1883,7 @@ Read in pandas ``to_html`` output (with some loss of floating point precision)
1884
1883
1885
1884
.. code-block :: python
1886
1885
1887
- df = DataFrame(randn(2 , 2 ))
1886
+ df = pd. DataFrame(randn(2 , 2 ))
1888
1887
s = df.to_html(float_format = ' {0:.40g } ' .format)
1889
1888
dfin = read_html(s, index_col = 0 )
1890
1889
@@ -1937,7 +1936,7 @@ in the method ``to_string`` described above.
1937
1936
1938
1937
.. ipython :: python
1939
1938
1940
- df = DataFrame(randn(2 , 2 ))
1939
+ df = pd. DataFrame(randn(2 , 2 ))
1941
1940
df
1942
1941
print (df.to_html()) # raw html
1943
1942
@@ -2013,7 +2012,7 @@ Finally, the ``escape`` argument allows you to control whether the
2013
2012
2014
2013
.. ipython :: python
2015
2014
2016
- df = DataFrame({' a' : list (' &<>' ), ' b' : randn(3 )})
2015
+ df = pd. DataFrame({' a' : list (' &<>' ), ' b' : randn(3 )})
2017
2016
2018
2017
2019
2018
.. ipython :: python
@@ -2367,7 +2366,7 @@ Added support for Openpyxl >= 2.2
2367
2366
bio = BytesIO()
2368
2367
2369
2368
# By setting the 'engine' in the ExcelWriter constructor.
2370
- writer = ExcelWriter(bio, engine = ' xlsxwriter' )
2369
+ writer = pd. ExcelWriter(bio, engine = ' xlsxwriter' )
2371
2370
df.to_excel(writer, sheet_name = ' Sheet1' )
2372
2371
2373
2372
# Save the workbook
@@ -2423,7 +2422,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
2423
2422
df.to_excel(' path_to_file.xlsx' , sheet_name = ' Sheet1' , engine = ' xlsxwriter' )
2424
2423
2425
2424
# By setting the 'engine' in the ExcelWriter constructor.
2426
- writer = ExcelWriter(' path_to_file.xlsx' , engine = ' xlsxwriter' )
2425
+ writer = pd. ExcelWriter(' path_to_file.xlsx' , engine = ' xlsxwriter' )
2427
2426
2428
2427
# Or via pandas configuration.
2429
2428
from pandas import options
@@ -2559,7 +2558,7 @@ both on the writing (serialization), and reading (deserialization).
2559
2558
2560
2559
.. ipython :: python
2561
2560
2562
- df = DataFrame(np.random.rand(5 ,2 ),columns = list (' AB' ))
2561
+ df = pd. DataFrame(np.random.rand(5 ,2 ),columns = list (' AB' ))
2563
2562
df.to_msgpack(' foo.msg' )
2564
2563
pd.read_msgpack(' foo.msg' )
2565
2564
s = Series(np.random.rand(5 ),index = date_range(' 20130101' ,periods = 5 ))
@@ -2647,7 +2646,7 @@ for some advanced strategies
2647
2646
2648
2647
.. ipython :: python
2649
2648
2650
- store = HDFStore(' store.h5' )
2649
+ store = pd. HDFStore(' store.h5' )
2651
2650
print (store)
2652
2651
2653
2652
Objects can be written to the file just like adding key-value pairs to a
@@ -2656,11 +2655,11 @@ dict:
2656
2655
.. ipython :: python
2657
2656
2658
2657
np.random.seed(1234 )
2659
- index = date_range(' 1/1/2000' , periods = 8 )
2660
- s = Series(randn(5 ), index = [' a' , ' b' , ' c' , ' d' , ' e' ])
2661
- df = DataFrame(randn(8 , 3 ), index = index,
2658
+ index = pd. date_range(' 1/1/2000' , periods = 8 )
2659
+ s = pd. Series(randn(5 ), index = [' a' , ' b' , ' c' , ' d' , ' e' ])
2660
+ df = pd. DataFrame(randn(8 , 3 ), index = index,
2662
2661
columns = [' A' , ' B' , ' C' ])
2663
- wp = Panel(randn(2 , 5 , 4 ), items = [' Item1' , ' Item2' ],
2662
+ wp = pd. Panel(randn(2 , 5 , 4 ), items = [' Item1' , ' Item2' ],
2664
2663
major_axis = date_range(' 1/1/2000' , periods = 5 ),
2665
2664
minor_axis = [' A' , ' B' , ' C' , ' D' ])
2666
2665
@@ -2705,7 +2704,7 @@ Closing a Store, Context Manager
2705
2704
2706
2705
# Working with, and automatically closing the store with the context
2707
2706
# manager
2708
- with HDFStore(' store.h5' ) as store:
2707
+ with pd. HDFStore(' store.h5' ) as store:
2709
2708
store.keys()
2710
2709
2711
2710
.. ipython :: python
@@ -2772,7 +2771,7 @@ This is also true for the major axis of a ``Panel``:
2772
2771
[[np.nan, np.nan, np.nan], [np.nan,5 ,6 ]],
2773
2772
[[np.nan, np.nan, np.nan],[np.nan,3 ,np.nan]]]
2774
2773
2775
- panel_with_major_axis_all_missing = Panel(matrix,
2774
+ panel_with_major_axis_all_missing = pd. Panel(matrix,
2776
2775
items = [' Item1' , ' Item2' ,' Item3' ],
2777
2776
major_axis = [1 ,2 ],
2778
2777
minor_axis = [' A' , ' B' , ' C' ])
@@ -2816,7 +2815,7 @@ This format is specified by default when using ``put`` or ``to_hdf`` or by ``for
2816
2815
2817
2816
.. code-block :: python
2818
2817
2819
- DataFrame(randn(10 ,2 )).to_hdf(' test_fixed.h5' ,' df' )
2818
+ pd. DataFrame(randn(10 ,2 )).to_hdf(' test_fixed.h5' ,' df' )
2820
2819
2821
2820
pd.read_hdf(' test_fixed.h5' ,' df' ,where = ' index>5' )
2822
2821
TypeError : cannot pass a where specification when reading a fixed format .
@@ -2848,7 +2847,7 @@ enable ``put/append/to_hdf`` to by default store in the ``table`` format.
2848
2847
2849
2848
.. ipython :: python
2850
2849
2851
- store = HDFStore(' store.h5' )
2850
+ store = pd. HDFStore(' store.h5' )
2852
2851
df1 = df[0 :4 ]
2853
2852
df2 = df[4 :]
2854
2853
@@ -2914,7 +2913,7 @@ defaults to `nan`.
2914
2913
2915
2914
.. ipython :: python
2916
2915
2917
- df_mixed = DataFrame({ ' A' : randn(8 ),
2916
+ df_mixed = pd. DataFrame({ ' A' : randn(8 ),
2918
2917
' B' : randn(8 ),
2919
2918
' C' : np.array(randn(8 ),dtype = ' float32' ),
2920
2919
' string' :' string' ,
@@ -2940,12 +2939,12 @@ storing/selecting from homogeneous index DataFrames.
2940
2939
2941
2940
.. ipython :: python
2942
2941
2943
- index = MultiIndex(levels = [[' foo' , ' bar' , ' baz' , ' qux' ],
2942
+ index = pd. MultiIndex(levels = [[' foo' , ' bar' , ' baz' , ' qux' ],
2944
2943
[' one' , ' two' , ' three' ]],
2945
2944
labels = [[0 , 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 , 3 ],
2946
2945
[0 , 1 , 2 , 0 , 1 , 1 , 2 , 0 , 1 , 2 ]],
2947
2946
names = [' foo' , ' bar' ])
2948
- df_mi = DataFrame(np.random.randn(10 , 3 ), index = index,
2947
+ df_mi = pd. DataFrame(np.random.randn(10 , 3 ), index = index,
2949
2948
columns = [' A' , ' B' , ' C' ])
2950
2949
df_mi
2951
2950
@@ -3127,7 +3126,7 @@ specified in the format: ``<float>(<unit>)``, where float may be signed (and fra
3127
3126
.. ipython :: python
3128
3127
3129
3128
from datetime import timedelta
3130
- dftd = DataFrame(dict (A = Timestamp(' 20130101' ), B = [ Timestamp(' 20130101' ) + timedelta(days = i,seconds = 10 ) for i in range (10 ) ]))
3129
+ dftd = pd. DataFrame(dict (A = Timestamp(' 20130101' ), B = [ Timestamp(' 20130101' ) + timedelta(days = i,seconds = 10 ) for i in range (10 ) ]))
3131
3130
dftd[' C' ] = dftd[' A' ]- dftd[' B' ]
3132
3131
dftd
3133
3132
store.append(' dftd' ,dftd,data_columns = True )
@@ -3163,8 +3162,8 @@ Oftentimes when appending large amounts of data to a store, it is useful to turn
3163
3162
3164
3163
.. ipython :: python
3165
3164
3166
- df_1 = DataFrame(randn(10 ,2 ),columns = list (' AB' ))
3167
- df_2 = DataFrame(randn(10 ,2 ),columns = list (' AB' ))
3165
+ df_1 = pd. DataFrame(randn(10 ,2 ),columns = list (' AB' ))
3166
+ df_2 = pd. DataFrame(randn(10 ,2 ),columns = list (' AB' ))
3168
3167
3169
3168
st = pd.HDFStore(' appends.h5' ,mode = ' w' )
3170
3169
st.append(' df' , df_1, data_columns = [' B' ], index = False )
@@ -3261,7 +3260,7 @@ chunks.
3261
3260
3262
3261
.. ipython :: python
3263
3262
3264
- dfeq = DataFrame({' number' : np.arange(1 ,11 )})
3263
+ dfeq = pd. DataFrame({' number' : np.arange(1 ,11 )})
3265
3264
dfeq
3266
3265
3267
3266
store.append(' dfeq' , dfeq, data_columns = [' number' ])
@@ -3301,7 +3300,7 @@ Sometimes you want to get the coordinates (a.k.a the index locations) of your qu
3301
3300
3302
3301
.. ipython :: python
3303
3302
3304
- df_coord = DataFrame(np.random.randn(1000 ,2 ),index = date_range(' 20000101' ,periods = 1000 ))
3303
+ df_coord = pd. DataFrame(np.random.randn(1000 ,2 ),index = date_range(' 20000101' ,periods = 1000 ))
3305
3304
store.append(' df_coord' ,df_coord)
3306
3305
c = store.select_as_coordinates(' df_coord' ,' index>20020101' )
3307
3306
c.summary()
@@ -3318,7 +3317,7 @@ a datetimeindex which are 5.
3318
3317
3319
3318
.. ipython :: python
3320
3319
3321
- df_mask = DataFrame(np.random.randn(1000 ,2 ),index = date_range(' 20000101' ,periods = 1000 ))
3320
+ df_mask = pd. DataFrame(np.random.randn(1000 ,2 ),index = date_range(' 20000101' ,periods = 1000 ))
3322
3321
store.append(' df_mask' ,df_mask)
3323
3322
c = store.select_column(' df_mask' ,' index' )
3324
3323
where = c[DatetimeIndex(c).month== 5 ].index
@@ -3366,7 +3365,7 @@ results.
3366
3365
3367
3366
.. ipython :: python
3368
3367
3369
- df_mt = DataFrame(randn(8 , 6 ), index = date_range(' 1/1/2000' , periods = 8 ),
3368
+ df_mt = pd. DataFrame(randn(8 , 6 ), index = date_range(' 1/1/2000' , periods = 8 ),
3370
3369
columns = [' A' , ' B' , ' C' , ' D' , ' E' , ' F' ])
3371
3370
df_mt[' foo' ] = ' bar'
3372
3371
df_mt.ix[1 , (' A' , ' B' )] = np.nan
@@ -3458,7 +3457,7 @@ Compression for all objects within the file
3458
3457
3459
3458
.. code-block :: python
3460
3459
3461
- store_compressed = HDFStore(' store_compressed.h5' , complevel = 9 , complib = ' blosc' )
3460
+ store_compressed = pd. HDFStore(' store_compressed.h5' , complevel = 9 , complib = ' blosc' )
3462
3461
3463
3462
Or on-the-fly compression (this only applies to tables). You can turn
3464
3463
off file compression for a specific table by passing ``complevel=0 ``
@@ -3556,7 +3555,7 @@ stored in a more efficient manner.
3556
3555
3557
3556
.. ipython :: python
3558
3557
3559
- dfcat = DataFrame({ ' A' : Series(list (' aabbcdba' )).astype(' category' ),
3558
+ dfcat = pd. DataFrame({ ' A' : Series(list (' aabbcdba' )).astype(' category' ),
3560
3559
' B' : np.random.randn(8 ) })
3561
3560
dfcat
3562
3561
dfcat.dtypes
@@ -3614,7 +3613,7 @@ Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed column
3614
3613
3615
3614
.. ipython :: python
3616
3615
3617
- dfs = DataFrame(dict (A = ' foo' , B = ' bar' ),index = list (range (5 )))
3616
+ dfs = pd. DataFrame(dict (A = ' foo' , B = ' bar' ),index = list (range (5 )))
3618
3617
dfs
3619
3618
3620
3619
# A and B have a size of 30
@@ -3633,7 +3632,7 @@ You could inadvertently turn an actual ``nan`` value into a missing value.
3633
3632
3634
3633
.. ipython :: python
3635
3634
3636
- dfss = DataFrame(dict (A = [' foo' ,' bar' ,' nan' ]))
3635
+ dfss = pd. DataFrame(dict (A = [' foo' ,' bar' ,' nan' ]))
3637
3636
dfss
3638
3637
3639
3638
store.append(' dfss' , dfss)
@@ -3667,7 +3666,7 @@ It is possible to write an ``HDFStore`` object that can easily be imported into
3667
3666
index = range (100 ))
3668
3667
df_for_r.head()
3669
3668
3670
- store_export = HDFStore(' export.h5' )
3669
+ store_export = pd. HDFStore(' export.h5' )
3671
3670
store_export.append(' df_for_r' , df_for_r, data_columns = df_dc.columns)
3672
3671
store_export
3673
3672
@@ -3756,7 +3755,7 @@ number of options, please see the docstring.
3756
3755
.. ipython :: python
3757
3756
3758
3757
# a legacy store
3759
- legacy_store = HDFStore(legacy_file_path,' r' )
3758
+ legacy_store = pd. HDFStore(legacy_file_path,' r' )
3760
3759
legacy_store
3761
3760
3762
3761
# copy (and return the new handle)
@@ -3920,7 +3919,7 @@ the database using :func:`~pandas.DataFrame.to_sql`.
3920
3919
(42 , datetime.datetime(2010 ,10 ,19 ), ' Y' , - 12.5 , False ),
3921
3920
(63 , datetime.datetime(2010 ,10 ,20 ), ' Z' , 5.73 , True )]
3922
3921
3923
- data = DataFrame(d, columns = c)
3922
+ data = pd. DataFrame(d, columns = c)
3924
3923
3925
3924
.. ipython :: python
3926
3925
@@ -4400,7 +4399,7 @@ into a .dta file. The format version of this file is always 115 (Stata 12).
4400
4399
4401
4400
.. ipython :: python
4402
4401
4403
- df = DataFrame(randn(10 , 2 ), columns = list (' AB' ))
4402
+ df = pd. DataFrame(randn(10 , 2 ), columns = list (' AB' ))
4404
4403
df.to_stata(' stata.dta' )
4405
4404
4406
4405
*Stata * data files have limited data type support; only strings with
@@ -4625,7 +4624,7 @@ This is an informal comparison of various IO methods, using pandas 0.13.1.
4625
4624
4626
4625
.. code-block :: python
4627
4626
4628
- In [1 ]: df = DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
4627
+ In [1 ]: df = pd. DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
4629
4628
4630
4629
In [2 ]: df.info()
4631
4630
< class ' pandas.core.frame.DataFrame' >
@@ -4699,7 +4698,7 @@ And here's the code
4699
4698
import os
4700
4699
from pandas.io import sql
4701
4700
4702
- df = DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
4701
+ df = pd. DataFrame(randn(1000000 ,2 ),columns = list (' AB' ))
4703
4702
4704
4703
def test_sql_write (df ):
4705
4704
if os.path.exists(' test.sql' ):
0 commit comments