From 90c4d534681be5d89aac7c4a6df315fc0b821558 Mon Sep 17 00:00:00 2001 From: DaCoEx Date: Tue, 9 Feb 2016 13:36:39 +0100 Subject: [PATCH] added pd as namespace more consistent with code in the rest of the document added more namespace Generally, also numpy's randn should used as np.random.randn isolated namespace changes removed warning on hdf groups remaining namespace --- doc/source/io.rst | 91 +++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 64967f1979807..5a42f58e69cc7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -19,10 +19,9 @@ import matplotlib.pyplot as plt plt.close('all') - from pandas import * options.display.max_rows=15 import pandas.util.testing as tm - clipdf = DataFrame({'A':[1,2,3],'B':[4,5,6],'C':['p','q','r']}, + clipdf = pd.DataFrame({'A':[1,2,3],'B':[4,5,6],'C':['p','q','r']}, index=['x','y','z']) =============================== @@ -1195,7 +1194,7 @@ class of the csv module. For this, you have to specify ``sep=None``. .. ipython:: python :suppress: - df = DataFrame(np.random.randn(10, 4)) + df = pd.DataFrame(np.random.randn(10, 4)) df.to_csv('tmp.sv', sep='|') df.to_csv('tmp2.sv', sep=':') @@ -1375,7 +1374,7 @@ Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datet .. ipython:: python - dfj = DataFrame(randn(5, 2), columns=list('AB')) + dfj = pd.DataFrame(randn(5, 2), columns=list('AB')) json = dfj.to_json() json @@ -1387,10 +1386,10 @@ file / string. Consider the following DataFrame and Series: .. ipython:: python - dfjo = DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)), + dfjo = pd.DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)), columns=list('ABC'), index=list('xyz')) dfjo - sjo = Series(dict(x=15, y=16, z=17), name='D') + sjo = pd.Series(dict(x=15, y=16, z=17), name='D') sjo **Column oriented** (the default for ``DataFrame``) serializes the data as @@ -1472,10 +1471,10 @@ Writing to a file, with a date index and a date column .. ipython:: python dfj2 = dfj.copy() - dfj2['date'] = Timestamp('20130101') + dfj2['date'] = pd.Timestamp('20130101') dfj2['ints'] = list(range(5)) dfj2['bools'] = True - dfj2.index = date_range('20130101', periods=5) + dfj2.index = pd.date_range('20130101', periods=5) dfj2.to_json('test.json') open('test.json').read() @@ -1506,7 +1505,7 @@ problems: In [141]: from datetime import timedelta - In [142]: dftd = DataFrame([timedelta(23), timedelta(seconds=5), 42]) + In [142]: dftd = pd.DataFrame([timedelta(23), timedelta(seconds=5), 42]) In [143]: dftd.to_json() @@ -1633,7 +1632,7 @@ Preserve string indices: .. ipython:: python - si = DataFrame(np.zeros((4, 4)), + si = pd.DataFrame(np.zeros((4, 4)), columns=list(range(4)), index=[str(i) for i in range(4)]) si @@ -1681,7 +1680,7 @@ data: randfloats = np.random.uniform(-100, 1000, 10000) randfloats.shape = (1000, 10) - dffloats = DataFrame(randfloats, columns=list('ABCDEFGHIJ')) + dffloats = pd.DataFrame(randfloats, columns=list('ABCDEFGHIJ')) jsonfloats = dffloats.to_json() @@ -1884,7 +1883,7 @@ Read in pandas ``to_html`` output (with some loss of floating point precision) .. code-block:: python - df = DataFrame(randn(2, 2)) + df = pd.DataFrame(randn(2, 2)) s = df.to_html(float_format='{0:.40g}'.format) dfin = read_html(s, index_col=0) @@ -1937,7 +1936,7 @@ in the method ``to_string`` described above. .. ipython:: python - df = DataFrame(randn(2, 2)) + df = pd.DataFrame(randn(2, 2)) df print(df.to_html()) # raw html @@ -2013,7 +2012,7 @@ Finally, the ``escape`` argument allows you to control whether the .. ipython:: python - df = DataFrame({'a': list('&<>'), 'b': randn(3)}) + df = pd.DataFrame({'a': list('&<>'), 'b': randn(3)}) .. ipython:: python @@ -2367,7 +2366,7 @@ Added support for Openpyxl >= 2.2 bio = BytesIO() # By setting the 'engine' in the ExcelWriter constructor. - writer = ExcelWriter(bio, engine='xlsxwriter') + writer = pd.ExcelWriter(bio, engine='xlsxwriter') df.to_excel(writer, sheet_name='Sheet1') # Save the workbook @@ -2423,7 +2422,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter') # By setting the 'engine' in the ExcelWriter constructor. - writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter') + writer = pd.ExcelWriter('path_to_file.xlsx', engine='xlsxwriter') # Or via pandas configuration. from pandas import options @@ -2559,7 +2558,7 @@ both on the writing (serialization), and reading (deserialization). .. ipython:: python - df = DataFrame(np.random.rand(5,2),columns=list('AB')) + df = pd.DataFrame(np.random.rand(5,2),columns=list('AB')) df.to_msgpack('foo.msg') pd.read_msgpack('foo.msg') s = Series(np.random.rand(5),index=date_range('20130101',periods=5)) @@ -2647,7 +2646,7 @@ for some advanced strategies .. ipython:: python - store = HDFStore('store.h5') + store = pd.HDFStore('store.h5') print(store) Objects can be written to the file just like adding key-value pairs to a @@ -2656,11 +2655,11 @@ dict: .. ipython:: python np.random.seed(1234) - index = date_range('1/1/2000', periods=8) - s = Series(randn(5), index=['a', 'b', 'c', 'd', 'e']) - df = DataFrame(randn(8, 3), index=index, + index = pd.date_range('1/1/2000', periods=8) + s = pd.Series(randn(5), index=['a', 'b', 'c', 'd', 'e']) + df = pd.DataFrame(randn(8, 3), index=index, columns=['A', 'B', 'C']) - wp = Panel(randn(2, 5, 4), items=['Item1', 'Item2'], + wp = pd.Panel(randn(2, 5, 4), items=['Item1', 'Item2'], major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B', 'C', 'D']) @@ -2705,7 +2704,7 @@ Closing a Store, Context Manager # Working with, and automatically closing the store with the context # manager - with HDFStore('store.h5') as store: + with pd.HDFStore('store.h5') as store: store.keys() .. ipython:: python @@ -2772,7 +2771,7 @@ This is also true for the major axis of a ``Panel``: [[np.nan, np.nan, np.nan], [np.nan,5,6]], [[np.nan, np.nan, np.nan],[np.nan,3,np.nan]]] - panel_with_major_axis_all_missing = Panel(matrix, + panel_with_major_axis_all_missing = pd.Panel(matrix, items=['Item1', 'Item2','Item3'], major_axis=[1,2], minor_axis=['A', 'B', 'C']) @@ -2816,7 +2815,7 @@ This format is specified by default when using ``put`` or ``to_hdf`` or by ``for .. code-block:: python - DataFrame(randn(10,2)).to_hdf('test_fixed.h5','df') + pd.DataFrame(randn(10,2)).to_hdf('test_fixed.h5','df') pd.read_hdf('test_fixed.h5','df',where='index>5') TypeError: cannot pass a where specification when reading a fixed format. @@ -2848,7 +2847,7 @@ enable ``put/append/to_hdf`` to by default store in the ``table`` format. .. ipython:: python - store = HDFStore('store.h5') + store = pd.HDFStore('store.h5') df1 = df[0:4] df2 = df[4:] @@ -2914,7 +2913,7 @@ defaults to `nan`. .. ipython:: python - df_mixed = DataFrame({ 'A' : randn(8), + df_mixed = pd.DataFrame({ 'A' : randn(8), 'B' : randn(8), 'C' : np.array(randn(8),dtype='float32'), 'string' :'string', @@ -2940,12 +2939,12 @@ storing/selecting from homogeneous index DataFrames. .. ipython:: python - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['foo', 'bar']) - df_mi = DataFrame(np.random.randn(10, 3), index=index, + df_mi = pd.DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) df_mi @@ -3127,7 +3126,7 @@ specified in the format: ``()``, where float may be signed (and fra .. ipython:: python from datetime import timedelta - dftd = DataFrame(dict(A = Timestamp('20130101'), B = [ Timestamp('20130101') + timedelta(days=i,seconds=10) for i in range(10) ])) + dftd = pd.DataFrame(dict(A = Timestamp('20130101'), B = [ Timestamp('20130101') + timedelta(days=i,seconds=10) for i in range(10) ])) dftd['C'] = dftd['A']-dftd['B'] dftd store.append('dftd',dftd,data_columns=True) @@ -3163,8 +3162,8 @@ Oftentimes when appending large amounts of data to a store, it is useful to turn .. ipython:: python - df_1 = DataFrame(randn(10,2),columns=list('AB')) - df_2 = DataFrame(randn(10,2),columns=list('AB')) + df_1 = pd.DataFrame(randn(10,2),columns=list('AB')) + df_2 = pd.DataFrame(randn(10,2),columns=list('AB')) st = pd.HDFStore('appends.h5',mode='w') st.append('df', df_1, data_columns=['B'], index=False) @@ -3261,7 +3260,7 @@ chunks. .. ipython:: python - dfeq = DataFrame({'number': np.arange(1,11)}) + dfeq = pd.DataFrame({'number': np.arange(1,11)}) dfeq store.append('dfeq', dfeq, data_columns=['number']) @@ -3301,7 +3300,7 @@ Sometimes you want to get the coordinates (a.k.a the index locations) of your qu .. ipython:: python - df_coord = DataFrame(np.random.randn(1000,2),index=date_range('20000101',periods=1000)) + df_coord = pd.DataFrame(np.random.randn(1000,2),index=date_range('20000101',periods=1000)) store.append('df_coord',df_coord) c = store.select_as_coordinates('df_coord','index>20020101') c.summary() @@ -3318,7 +3317,7 @@ a datetimeindex which are 5. .. ipython:: python - df_mask = DataFrame(np.random.randn(1000,2),index=date_range('20000101',periods=1000)) + df_mask = pd.DataFrame(np.random.randn(1000,2),index=date_range('20000101',periods=1000)) store.append('df_mask',df_mask) c = store.select_column('df_mask','index') where = c[DatetimeIndex(c).month==5].index @@ -3366,7 +3365,7 @@ results. .. ipython:: python - df_mt = DataFrame(randn(8, 6), index=date_range('1/1/2000', periods=8), + df_mt = pd.DataFrame(randn(8, 6), index=date_range('1/1/2000', periods=8), columns=['A', 'B', 'C', 'D', 'E', 'F']) df_mt['foo'] = 'bar' df_mt.ix[1, ('A', 'B')] = np.nan @@ -3458,7 +3457,7 @@ Compression for all objects within the file .. code-block:: python - store_compressed = HDFStore('store_compressed.h5', complevel=9, complib='blosc') + store_compressed = pd.HDFStore('store_compressed.h5', complevel=9, complib='blosc') Or on-the-fly compression (this only applies to tables). You can turn off file compression for a specific table by passing ``complevel=0`` @@ -3556,7 +3555,7 @@ stored in a more efficient manner. .. ipython:: python - dfcat = DataFrame({ 'A' : Series(list('aabbcdba')).astype('category'), + dfcat = pd.DataFrame({ 'A' : Series(list('aabbcdba')).astype('category'), 'B' : np.random.randn(8) }) dfcat dfcat.dtypes @@ -3614,7 +3613,7 @@ Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed column .. ipython:: python - dfs = DataFrame(dict(A = 'foo', B = 'bar'),index=list(range(5))) + dfs = pd.DataFrame(dict(A = 'foo', B = 'bar'),index=list(range(5))) dfs # A and B have a size of 30 @@ -3633,7 +3632,7 @@ You could inadvertently turn an actual ``nan`` value into a missing value. .. ipython:: python - dfss = DataFrame(dict(A = ['foo','bar','nan'])) + dfss = pd.DataFrame(dict(A = ['foo','bar','nan'])) dfss store.append('dfss', dfss) @@ -3667,7 +3666,7 @@ It is possible to write an ``HDFStore`` object that can easily be imported into index=range(100)) df_for_r.head() - store_export = HDFStore('export.h5') + store_export = pd.HDFStore('export.h5') store_export.append('df_for_r', df_for_r, data_columns=df_dc.columns) store_export @@ -3756,7 +3755,7 @@ number of options, please see the docstring. .. ipython:: python # a legacy store - legacy_store = HDFStore(legacy_file_path,'r') + legacy_store = pd.HDFStore(legacy_file_path,'r') legacy_store # copy (and return the new handle) @@ -3920,7 +3919,7 @@ the database using :func:`~pandas.DataFrame.to_sql`. (42, datetime.datetime(2010,10,19), 'Y', -12.5, False), (63, datetime.datetime(2010,10,20), 'Z', 5.73, True)] - data = DataFrame(d, columns=c) + data = pd.DataFrame(d, columns=c) .. ipython:: python @@ -4400,7 +4399,7 @@ into a .dta file. The format version of this file is always 115 (Stata 12). .. ipython:: python - df = DataFrame(randn(10, 2), columns=list('AB')) + df = pd.DataFrame(randn(10, 2), columns=list('AB')) df.to_stata('stata.dta') *Stata* data files have limited data type support; only strings with @@ -4625,7 +4624,7 @@ This is an informal comparison of various IO methods, using pandas 0.13.1. .. code-block:: python - In [1]: df = DataFrame(randn(1000000,2),columns=list('AB')) + In [1]: df = pd.DataFrame(randn(1000000,2),columns=list('AB')) In [2]: df.info() @@ -4699,7 +4698,7 @@ And here's the code import os from pandas.io import sql - df = DataFrame(randn(1000000,2),columns=list('AB')) + df = pd.DataFrame(randn(1000000,2),columns=list('AB')) def test_sql_write(df): if os.path.exists('test.sql'):