5
5
:suppress:
6
6
7
7
import numpy as np
8
- np.random.seed(123456 )
9
8
import pandas as pd
10
- pd.options.display.max_rows= 15
9
+
10
+ np.random.seed(123456 )
11
+ pd.options.display.max_rows = 15
11
12
np.set_printoptions(precision = 4 , suppress = True )
12
13
13
14
**************************
@@ -19,21 +20,21 @@ Reshaping by pivoting DataFrame objects
19
20
20
21
.. image :: _static/reshaping_pivot.png
21
22
22
- .. ipython ::
23
+ .. ipython :: python
23
24
:suppress:
24
25
25
- In [1]: import pandas.util.testing as tm; tm.N = 3
26
+ import pandas.util.testing as tm
27
+ tm.N = 3
26
28
27
- In [2]: def unpivot(frame):
28
- ...: N, K = frame.shape
29
- ...: data = {'value' : frame.values.ravel('F'),
30
- ...: 'variable' : np.asarray(frame.columns).repeat(N),
31
- ...: 'date' : np.tile(np.asarray(frame.index), K)}
32
- ...: columns = ['date', 'variable', 'value']
33
- ...: return pd.DataFrame(data, columns=columns)
34
- ...:
29
+ def unpivot (frame ):
30
+ N, K = frame.shape
31
+ data = {' value' : frame.values.ravel(' F' ),
32
+ ' variable' : np.asarray(frame.columns).repeat(N),
33
+ ' date' : np.tile(np.asarray(frame.index), K)}
34
+ columns = [' date' , ' variable' , ' value' ]
35
+ return pd.DataFrame(data, columns = columns)
35
36
36
- In [3]: df = unpivot(tm.makeTimeDataFrame())
37
+ df = unpivot(tm.makeTimeDataFrame())
37
38
38
39
Data is often stored in so-called "stacked" or "record" format:
39
40
@@ -184,7 +185,7 @@ will result in a **sorted** copy of the original ``DataFrame`` or ``Series``:
184
185
185
186
.. ipython :: python
186
187
187
- index = pd.MultiIndex.from_product([[2 ,1 ], [' a' , ' b' ]])
188
+ index = pd.MultiIndex.from_product([[2 , 1 ], [' a' , ' b' ]])
188
189
df = pd.DataFrame(np.random.randn(4 ), index = index, columns = [' A' ])
189
190
df
190
191
all (df.unstack().stack() == df.sort_index())
@@ -204,9 +205,8 @@ processed individually.
204
205
.. ipython :: python
205
206
206
207
columns = pd.MultiIndex.from_tuples([
207
- (' A' , ' cat' , ' long' ), (' B' , ' cat' , ' long' ),
208
- (' A' , ' dog' , ' short' ), (' B' , ' dog' , ' short' )
209
- ],
208
+ (' A' , ' cat' , ' long' ), (' B' , ' cat' , ' long' ),
209
+ (' A' , ' dog' , ' short' ), (' B' , ' dog' , ' short' )],
210
210
names = [' exp' , ' animal' , ' hair_length' ]
211
211
)
212
212
df = pd.DataFrame(np.random.randn(4 , 4 ), columns = columns)
@@ -301,10 +301,10 @@ For instance,
301
301
302
302
.. ipython :: python
303
303
304
- cheese = pd.DataFrame({' first' : [' John' , ' Mary' ],
305
- ' last' : [' Doe' , ' Bo' ],
306
- ' height' : [5.5 , 6.0 ],
307
- ' weight' : [130 , 150 ]})
304
+ cheese = pd.DataFrame({' first' : [' John' , ' Mary' ],
305
+ ' last' : [' Doe' , ' Bo' ],
306
+ ' height' : [5.5 , 6.0 ],
307
+ ' weight' : [130 , 150 ]})
308
308
cheese
309
309
cheese.melt(id_vars = [' first' , ' last' ])
310
310
cheese.melt(id_vars = [' first' , ' last' ], var_name = ' quantity' )
@@ -315,11 +315,11 @@ user-friendly.
315
315
316
316
.. ipython :: python
317
317
318
- dft = pd.DataFrame({" A1970" : {0 : " a" , 1 : " b" , 2 : " c" },
319
- " A1980" : {0 : " d" , 1 : " e" , 2 : " f" },
320
- " B1970" : {0 : 2.5 , 1 : 1.2 , 2 : .7 },
321
- " B1980" : {0 : 3.2 , 1 : 1.3 , 2 : .1 },
322
- " X" : dict (zip (range (3 ), np.random.randn(3 )))
318
+ dft = pd.DataFrame({" A1970" : {0 : " a" , 1 : " b" , 2 : " c" },
319
+ " A1980" : {0 : " d" , 1 : " e" , 2 : " f" },
320
+ " B1970" : {0 : 2.5 , 1 : 1.2 , 2 : .7 },
321
+ " B1980" : {0 : 3.2 , 1 : 1.3 , 2 : .1 },
322
+ " X" : dict (zip (range (3 ), np.random.randn(3 )))
323
323
})
324
324
dft[" id" ] = dft.index
325
325
dft
@@ -390,7 +390,8 @@ We can produce pivot tables from this data very easily:
390
390
391
391
pd.pivot_table(df, values = ' D' , index = [' A' , ' B' ], columns = [' C' ])
392
392
pd.pivot_table(df, values = ' D' , index = [' B' ], columns = [' A' , ' C' ], aggfunc = np.sum)
393
- pd.pivot_table(df, values = [' D' ,' E' ], index = [' B' ], columns = [' A' , ' C' ], aggfunc = np.sum)
393
+ pd.pivot_table(df, values = [' D' , ' E' ], index = [' B' ], columns = [' A' , ' C' ],
394
+ aggfunc = np.sum)
394
395
395
396
The result object is a ``DataFrame `` having potentially hierarchical indexes on the
396
397
rows and columns. If the ``values `` column name is not given, the pivot table
@@ -658,7 +659,7 @@ When a column contains only one level, it will be omitted in the result.
658
659
659
660
.. ipython :: python
660
661
661
- df = pd.DataFrame({' A' :list (' aaaaa' ),' B' :list (' ababc' )})
662
+ df = pd.DataFrame({' A' : list (' aaaaa' ), ' B' : list (' ababc' )})
662
663
663
664
pd.get_dummies(df)
664
665
0 commit comments