diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index ff867a2ddfe6d..8650b5ed1ba37 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -5,9 +5,10 @@ :suppress: import numpy as np - np.random.seed(123456) import pandas as pd - pd.options.display.max_rows=15 + + np.random.seed(123456) + pd.options.display.max_rows = 15 np.set_printoptions(precision=4, suppress=True) ************************** @@ -19,21 +20,21 @@ Reshaping by pivoting DataFrame objects .. image:: _static/reshaping_pivot.png -.. ipython:: +.. ipython:: python :suppress: - In [1]: import pandas.util.testing as tm; tm.N = 3 + import pandas.util.testing as tm + tm.N = 3 - In [2]: def unpivot(frame): - ...: N, K = frame.shape - ...: data = {'value' : frame.values.ravel('F'), - ...: 'variable' : np.asarray(frame.columns).repeat(N), - ...: 'date' : np.tile(np.asarray(frame.index), K)} - ...: columns = ['date', 'variable', 'value'] - ...: return pd.DataFrame(data, columns=columns) - ...: + def unpivot(frame): + N, K = frame.shape + data = {'value': frame.values.ravel('F'), + 'variable': np.asarray(frame.columns).repeat(N), + 'date': np.tile(np.asarray(frame.index), K)} + columns = ['date', 'variable', 'value'] + return pd.DataFrame(data, columns=columns) - In [3]: df = unpivot(tm.makeTimeDataFrame()) + df = unpivot(tm.makeTimeDataFrame()) Data is often stored in so-called "stacked" or "record" format: @@ -184,7 +185,7 @@ will result in a **sorted** copy of the original ``DataFrame`` or ``Series``: .. ipython:: python - index = pd.MultiIndex.from_product([[2,1], ['a', 'b']]) + index = pd.MultiIndex.from_product([[2, 1], ['a', 'b']]) df = pd.DataFrame(np.random.randn(4), index=index, columns=['A']) df all(df.unstack().stack() == df.sort_index()) @@ -204,9 +205,8 @@ processed individually. .. ipython:: python columns = pd.MultiIndex.from_tuples([ - ('A', 'cat', 'long'), ('B', 'cat', 'long'), - ('A', 'dog', 'short'), ('B', 'dog', 'short') - ], + ('A', 'cat', 'long'), ('B', 'cat', 'long'), + ('A', 'dog', 'short'), ('B', 'dog', 'short')], names=['exp', 'animal', 'hair_length'] ) df = pd.DataFrame(np.random.randn(4, 4), columns=columns) @@ -301,10 +301,10 @@ For instance, .. ipython:: python - cheese = pd.DataFrame({'first' : ['John', 'Mary'], - 'last' : ['Doe', 'Bo'], - 'height' : [5.5, 6.0], - 'weight' : [130, 150]}) + cheese = pd.DataFrame({'first': ['John', 'Mary'], + 'last': ['Doe', 'Bo'], + 'height': [5.5, 6.0], + 'weight': [130, 150]}) cheese cheese.melt(id_vars=['first', 'last']) cheese.melt(id_vars=['first', 'last'], var_name='quantity') @@ -315,11 +315,11 @@ user-friendly. .. ipython:: python - dft = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, - "A1980" : {0 : "d", 1 : "e", 2 : "f"}, - "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, - "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, - "X" : dict(zip(range(3), np.random.randn(3))) + dft = pd.DataFrame({"A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: .7}, + "B1980": {0: 3.2, 1: 1.3, 2: .1}, + "X": dict(zip(range(3), np.random.randn(3))) }) dft["id"] = dft.index dft @@ -390,7 +390,8 @@ We can produce pivot tables from this data very easily: pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C']) pd.pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum) - pd.pivot_table(df, values=['D','E'], index=['B'], columns=['A', 'C'], aggfunc=np.sum) + pd.pivot_table(df, values=['D', 'E'], index=['B'], columns=['A', 'C'], + aggfunc=np.sum) The result object is a ``DataFrame`` having potentially hierarchical indexes on the rows and columns. If the ``values`` column name is not given, the pivot table @@ -658,7 +659,7 @@ When a column contains only one level, it will be omitted in the result. .. ipython:: python - df = pd.DataFrame({'A':list('aaaaa'),'B':list('ababc')}) + df = pd.DataFrame({'A': list('aaaaa'), 'B': list('ababc')}) pd.get_dummies(df)