6
6
7
7
import pandas as pd
8
8
import numpy as np
9
- pd.options.display.max_rows= 15
9
+ pd.options.display.max_rows = 15
10
10
11
11
Comparison with R / R libraries
12
12
*******************************
@@ -165,16 +165,15 @@ function.
165
165
166
166
.. ipython :: python
167
167
168
- df = pd.DataFrame({
169
- ' v1' : [1 ,3 ,5 ,7 ,8 ,3 ,5 ,np.nan,4 ,5 ,7 ,9 ],
170
- ' v2' : [11 ,33 ,55 ,77 ,88 ,33 ,55 ,np.nan,44 ,55 ,77 ,99 ],
171
- ' by1' : [" red" , " blue" , 1 , 2 , np.nan, " big" , 1 , 2 , " red" , 1 , np.nan, 12 ],
172
- ' by2' : [" wet" , " dry" , 99 , 95 , np.nan, " damp" , 95 , 99 , " red" , 99 , np.nan,
173
- np.nan]
174
- })
168
+ df = pd.DataFrame(
169
+ {' v1' : [1 , 3 , 5 , 7 , 8 , 3 , 5 , np.nan, 4 , 5 , 7 , 9 ],
170
+ ' v2' : [11 , 33 , 55 , 77 , 88 , 33 , 55 , np.nan, 44 , 55 , 77 , 99 ],
171
+ ' by1' : [" red" , " blue" , 1 , 2 , np.nan, " big" , 1 , 2 , " red" , 1 , np.nan, 12 ],
172
+ ' by2' : [" wet" , " dry" , 99 , 95 , np.nan, " damp" , 95 , 99 , " red" , 99 , np.nan,
173
+ np.nan]})
175
174
176
- g = df.groupby([' by1' ,' by2' ])
177
- g[[' v1' ,' v2' ]].mean()
175
+ g = df.groupby([' by1' , ' by2' ])
176
+ g[[' v1' , ' v2' ]].mean()
178
177
179
178
For more details and examples see :ref: `the groupby documentation
180
179
<groupby.split>`.
@@ -195,7 +194,7 @@ The :meth:`~pandas.DataFrame.isin` method is similar to R ``%in%`` operator:
195
194
196
195
.. ipython :: python
197
196
198
- s = pd.Series(np.arange(5 ),dtype = np.float32)
197
+ s = pd.Series(np.arange(5 ), dtype = np.float32)
199
198
s.isin([2 , 4 ])
200
199
201
200
The ``match `` function returns a vector of the positions of matches
@@ -234,11 +233,11 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
234
233
import random
235
234
import string
236
235
237
- baseball = pd.DataFrame({
238
- ' team' : [" team %d " % (x+ 1 ) for x in range (5 )]* 5 ,
239
- ' player' : random.sample(list (string.ascii_lowercase),25 ),
240
- ' batting avg' : np.random.uniform(.200 , .400 , 25 )
241
- })
236
+ baseball = pd.DataFrame(
237
+ { ' team' : [" team %d " % (x + 1 ) for x in range (5 )] * 5 ,
238
+ ' player' : random.sample(list (string.ascii_lowercase), 25 ),
239
+ ' batting avg' : np.random.uniform(.200 , .400 , 25 )} )
240
+
242
241
baseball.pivot_table(values = ' batting avg' , columns = ' team' , aggfunc = np.max)
243
242
244
243
For more details and examples see :ref: `the reshaping documentation
@@ -341,15 +340,13 @@ In ``pandas`` the equivalent expression, using the
341
340
342
341
.. ipython :: python
343
342
344
- df = pd.DataFrame({
345
- ' x' : np.random.uniform(1 ., 168 ., 120 ),
346
- ' y' : np.random.uniform(7 ., 334 ., 120 ),
347
- ' z' : np.random.uniform(1.7 , 20.7 , 120 ),
348
- ' month' : [5 ,6 ,7 ,8 ]* 30 ,
349
- ' week' : np.random.randint(1 ,4 , 120 )
350
- })
343
+ df = pd.DataFrame({' x' : np.random.uniform(1 ., 168 ., 120 ),
344
+ ' y' : np.random.uniform(7 ., 334 ., 120 ),
345
+ ' z' : np.random.uniform(1.7 , 20.7 , 120 ),
346
+ ' month' : [5 , 6 , 7 , 8 ] * 30 ,
347
+ ' week' : np.random.randint(1 , 4 , 120 )})
351
348
352
- grouped = df.groupby([' month' ,' week' ])
349
+ grouped = df.groupby([' month' , ' week' ])
353
350
grouped[' x' ].agg([np.mean, np.std])
354
351
355
352
@@ -374,8 +371,8 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
374
371
375
372
.. ipython :: python
376
373
377
- a = np.array(list (range (1 ,24 ))+ [np.NAN ]).reshape(2 ,3 , 4 )
378
- pd.DataFrame([tuple (list (x)+ [val]) for x, val in np.ndenumerate(a)])
374
+ a = np.array(list (range (1 , 24 )) + [np.NAN ]).reshape(2 , 3 , 4 )
375
+ pd.DataFrame([tuple (list (x) + [val]) for x, val in np.ndenumerate(a)])
379
376
380
377
|meltlist |_
381
378
~~~~~~~~~~~~
@@ -393,7 +390,7 @@ In Python, this list would be a list of tuples, so
393
390
394
391
.. ipython :: python
395
392
396
- a = list (enumerate (list (range (1 ,5 ))+ [np.NAN ]))
393
+ a = list (enumerate (list (range (1 , 5 )) + [np.NAN ]))
397
394
pd.DataFrame(a)
398
395
399
396
For more details and examples see :ref: `the Into to Data Structures
@@ -419,12 +416,13 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
419
416
420
417
.. ipython :: python
421
418
422
- cheese = pd.DataFrame({' first' : [' John' , ' Mary' ],
423
- ' last' : [' Doe' , ' Bo' ],
424
- ' height' : [5.5 , 6.0 ],
425
- ' weight' : [130 , 150 ]})
419
+ cheese = pd.DataFrame({' first' : [' John' , ' Mary' ],
420
+ ' last' : [' Doe' , ' Bo' ],
421
+ ' height' : [5.5 , 6.0 ],
422
+ ' weight' : [130 , 150 ]})
423
+
426
424
pd.melt(cheese, id_vars = [' first' , ' last' ])
427
- cheese.set_index([' first' , ' last' ]).stack() # alternative way
425
+ cheese.set_index([' first' , ' last' ]).stack() # alternative way
428
426
429
427
For more details and examples see :ref: `the reshaping documentation
430
428
<reshaping.melt>`.
@@ -452,16 +450,15 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
452
450
453
451
.. ipython :: python
454
452
455
- df = pd.DataFrame({
456
- ' x' : np.random.uniform(1 ., 168 ., 12 ),
457
- ' y' : np.random.uniform(7 ., 334 ., 12 ),
458
- ' z' : np.random.uniform(1.7 , 20.7 , 12 ),
459
- ' month' : [5 ,6 ,7 ]* 4 ,
460
- ' week' : [1 ,2 ]* 6
461
- })
453
+ df = pd.DataFrame({' x' : np.random.uniform(1 ., 168 ., 12 ),
454
+ ' y' : np.random.uniform(7 ., 334 ., 12 ),
455
+ ' z' : np.random.uniform(1.7 , 20.7 , 12 ),
456
+ ' month' : [5 , 6 , 7 ] * 4 ,
457
+ ' week' : [1 , 2 ] * 6 })
458
+
462
459
mdf = pd.melt(df, id_vars = [' month' , ' week' ])
463
- pd.pivot_table(mdf, values = ' value' , index = [' variable' ,' week' ],
464
- columns = [' month' ], aggfunc = np.mean)
460
+ pd.pivot_table(mdf, values = ' value' , index = [' variable' , ' week' ],
461
+ columns = [' month' ], aggfunc = np.mean)
465
462
466
463
Similarly for ``dcast `` which uses a data.frame called ``df `` in R to
467
464
aggregate information based on ``Animal `` and ``FeedType ``:
@@ -491,13 +488,14 @@ using :meth:`~pandas.pivot_table`:
491
488
' Amount' : [10 , 7 , 4 , 2 , 5 , 6 , 2 ],
492
489
})
493
490
494
- df.pivot_table(values = ' Amount' , index = ' Animal' , columns = ' FeedType' , aggfunc = ' sum' )
491
+ df.pivot_table(values = ' Amount' , index = ' Animal' , columns = ' FeedType' ,
492
+ aggfunc = ' sum' )
495
493
496
494
The second approach is to use the :meth: `~pandas.DataFrame.groupby ` method:
497
495
498
496
.. ipython :: python
499
497
500
- df.groupby([' Animal' ,' FeedType' ])[' Amount' ].sum()
498
+ df.groupby([' Animal' , ' FeedType' ])[' Amount' ].sum()
501
499
502
500
For more details and examples see :ref: `the reshaping documentation
503
501
<reshaping.pivot>` or :ref: `the groupby documentation<groupby.split> `.
@@ -516,8 +514,8 @@ In pandas this is accomplished with ``pd.cut`` and ``astype("category")``:
516
514
517
515
.. ipython :: python
518
516
519
- pd.cut(pd.Series([1 ,2 , 3 , 4 , 5 , 6 ]), 3 )
520
- pd.Series([1 ,2 , 3 , 2 , 2 , 3 ]).astype(" category" )
517
+ pd.cut(pd.Series([1 , 2 , 3 , 4 , 5 , 6 ]), 3 )
518
+ pd.Series([1 , 2 , 3 , 2 , 2 , 3 ]).astype(" category" )
521
519
522
520
For more details and examples see :ref: `categorical introduction <categorical >` and the
523
521
:ref: `API documentation <api.categorical >`. There is also a documentation regarding the
0 commit comments