@@ -122,16 +122,16 @@ Selecting multiple columns by name in ``pandas`` is straightforward
122
122
123
123
.. ipython :: python
124
124
125
- df = pd.DataFrame(np.random.randn(10 , 3 ), columns = list (' abc' ))
126
- df[[' a ' , ' c ' ]]
127
- df.loc[:, [' a ' , ' c ' ]]
125
+ df = pd.DataFrame(np.random.randn(10 , 3 ), columns = list (" abc" ))
126
+ df[[" a " , " c " ]]
127
+ df.loc[:, [" a " , " c " ]]
128
128
129
129
Selecting multiple noncontiguous columns by integer location can be achieved
130
130
with a combination of the ``iloc `` indexer attribute and ``numpy.r_ ``.
131
131
132
132
.. ipython :: python
133
133
134
- named = list (' abcdefg' )
134
+ named = list (" abcdefg" )
135
135
n = 30
136
136
columns = named + np.arange(len (named), n).tolist()
137
137
df = pd.DataFrame(np.random.randn(n, n), columns = columns)
@@ -160,14 +160,29 @@ function.
160
160
.. ipython :: python
161
161
162
162
df = pd.DataFrame(
163
- {' v1' : [1 , 3 , 5 , 7 , 8 , 3 , 5 , np.nan, 4 , 5 , 7 , 9 ],
164
- ' v2' : [11 , 33 , 55 , 77 , 88 , 33 , 55 , np.nan, 44 , 55 , 77 , 99 ],
165
- ' by1' : [" red" , " blue" , 1 , 2 , np.nan, " big" , 1 , 2 , " red" , 1 , np.nan, 12 ],
166
- ' by2' : [" wet" , " dry" , 99 , 95 , np.nan, " damp" , 95 , 99 , " red" , 99 , np.nan,
167
- np.nan]})
163
+ {
164
+ " v1" : [1 , 3 , 5 , 7 , 8 , 3 , 5 , np.nan, 4 , 5 , 7 , 9 ],
165
+ " v2" : [11 , 33 , 55 , 77 , 88 , 33 , 55 , np.nan, 44 , 55 , 77 , 99 ],
166
+ " by1" : [" red" , " blue" , 1 , 2 , np.nan, " big" , 1 , 2 , " red" , 1 , np.nan, 12 ],
167
+ " by2" : [
168
+ " wet" ,
169
+ " dry" ,
170
+ 99 ,
171
+ 95 ,
172
+ np.nan,
173
+ " damp" ,
174
+ 95 ,
175
+ 99 ,
176
+ " red" ,
177
+ 99 ,
178
+ np.nan,
179
+ np.nan,
180
+ ],
181
+ }
182
+ )
168
183
169
- g = df.groupby([' by1' , ' by2' ])
170
- g[[' v1 ' , ' v2 ' ]].mean()
184
+ g = df.groupby([" by1" , " by2" ])
185
+ g[[" v1 " , " v2 " ]].mean()
171
186
172
187
For more details and examples see :ref: `the groupby documentation
173
188
<groupby.split>`.
@@ -228,11 +243,14 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
228
243
import string
229
244
230
245
baseball = pd.DataFrame(
231
- {' team' : [" team %d " % (x + 1 ) for x in range (5 )] * 5 ,
232
- ' player' : random.sample(list (string.ascii_lowercase), 25 ),
233
- ' batting avg' : np.random.uniform(.200 , .400 , 25 )})
246
+ {
247
+ " team" : [" team %d " % (x + 1 ) for x in range (5 )] * 5 ,
248
+ " player" : random.sample(list (string.ascii_lowercase), 25 ),
249
+ " batting avg" : np.random.uniform(0.200 , 0.400 , 25 ),
250
+ }
251
+ )
234
252
235
- baseball.pivot_table(values = ' batting avg' , columns = ' team' , aggfunc = np.max)
253
+ baseball.pivot_table(values = " batting avg" , columns = " team" , aggfunc = np.max)
236
254
237
255
For more details and examples see :ref: `the reshaping documentation
238
256
<reshaping.pivot>`.
@@ -256,10 +274,10 @@ index/slice as well as standard boolean indexing:
256
274
257
275
.. ipython :: python
258
276
259
- df = pd.DataFrame({' a ' : np.random.randn(10 ), ' b ' : np.random.randn(10 )})
260
- df.query(' a <= b' )
261
- df[df[' a ' ] <= df[' b ' ]]
262
- df.loc[df[' a ' ] <= df[' b ' ]]
277
+ df = pd.DataFrame({" a " : np.random.randn(10 ), " b " : np.random.randn(10 )})
278
+ df.query(" a <= b" )
279
+ df[df[" a " ] <= df[" b " ]]
280
+ df.loc[df[" a " ] <= df[" b " ]]
263
281
264
282
For more details and examples see :ref: `the query documentation
265
283
<indexing.query>`.
@@ -282,9 +300,9 @@ In ``pandas`` the equivalent expression, using the
282
300
283
301
.. ipython :: python
284
302
285
- df = pd.DataFrame({' a ' : np.random.randn(10 ), ' b ' : np.random.randn(10 )})
286
- df.eval(' a + b' )
287
- df[' a ' ] + df[' b ' ] # same as the previous expression
303
+ df = pd.DataFrame({" a " : np.random.randn(10 ), " b " : np.random.randn(10 )})
304
+ df.eval(" a + b" )
305
+ df[" a " ] + df[" b " ] # same as the previous expression
288
306
289
307
In certain cases :meth: `~pandas.DataFrame.eval ` will be much faster than
290
308
evaluation in pure Python. For more details and examples see :ref: `the eval
@@ -334,14 +352,18 @@ In ``pandas`` the equivalent expression, using the
334
352
335
353
.. ipython :: python
336
354
337
- df = pd.DataFrame({' x' : np.random.uniform(1 ., 168 ., 120 ),
338
- ' y' : np.random.uniform(7 ., 334 ., 120 ),
339
- ' z' : np.random.uniform(1.7 , 20.7 , 120 ),
340
- ' month' : [5 , 6 , 7 , 8 ] * 30 ,
341
- ' week' : np.random.randint(1 , 4 , 120 )})
355
+ df = pd.DataFrame(
356
+ {
357
+ " x" : np.random.uniform(1.0 , 168.0 , 120 ),
358
+ " y" : np.random.uniform(7.0 , 334.0 , 120 ),
359
+ " z" : np.random.uniform(1.7 , 20.7 , 120 ),
360
+ " month" : [5 , 6 , 7 , 8 ] * 30 ,
361
+ " week" : np.random.randint(1 , 4 , 120 ),
362
+ }
363
+ )
342
364
343
- grouped = df.groupby([' month' , ' week' ])
344
- grouped[' x ' ].agg([np.mean, np.std])
365
+ grouped = df.groupby([" month" , " week" ])
366
+ grouped[" x " ].agg([np.mean, np.std])
345
367
346
368
347
369
For more details and examples see :ref: `the groupby documentation
@@ -410,13 +432,17 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
410
432
411
433
.. ipython :: python
412
434
413
- cheese = pd.DataFrame({' first' : [' John' , ' Mary' ],
414
- ' last' : [' Doe' , ' Bo' ],
415
- ' height' : [5.5 , 6.0 ],
416
- ' weight' : [130 , 150 ]})
435
+ cheese = pd.DataFrame(
436
+ {
437
+ " first" : [" John" , " Mary" ],
438
+ " last" : [" Doe" , " Bo" ],
439
+ " height" : [5.5 , 6.0 ],
440
+ " weight" : [130 , 150 ],
441
+ }
442
+ )
417
443
418
- pd.melt(cheese, id_vars = [' first' , ' last' ])
419
- cheese.set_index([' first' , ' last' ]).stack() # alternative way
444
+ pd.melt(cheese, id_vars = [" first" , " last" ])
445
+ cheese.set_index([" first" , " last" ]).stack() # alternative way
420
446
421
447
For more details and examples see :ref: `the reshaping documentation
422
448
<reshaping.melt>`.
@@ -444,15 +470,24 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
444
470
445
471
.. ipython :: python
446
472
447
- df = pd.DataFrame({' x' : np.random.uniform(1 ., 168 ., 12 ),
448
- ' y' : np.random.uniform(7 ., 334 ., 12 ),
449
- ' z' : np.random.uniform(1.7 , 20.7 , 12 ),
450
- ' month' : [5 , 6 , 7 ] * 4 ,
451
- ' week' : [1 , 2 ] * 6 })
473
+ df = pd.DataFrame(
474
+ {
475
+ " x" : np.random.uniform(1.0 , 168.0 , 12 ),
476
+ " y" : np.random.uniform(7.0 , 334.0 , 12 ),
477
+ " z" : np.random.uniform(1.7 , 20.7 , 12 ),
478
+ " month" : [5 , 6 , 7 ] * 4 ,
479
+ " week" : [1 , 2 ] * 6 ,
480
+ }
481
+ )
452
482
453
- mdf = pd.melt(df, id_vars = [' month' , ' week' ])
454
- pd.pivot_table(mdf, values = ' value' , index = [' variable' , ' week' ],
455
- columns = [' month' ], aggfunc = np.mean)
483
+ mdf = pd.melt(df, id_vars = [" month" , " week" ])
484
+ pd.pivot_table(
485
+ mdf,
486
+ values = " value" ,
487
+ index = [" variable" , " week" ],
488
+ columns = [" month" ],
489
+ aggfunc = np.mean,
490
+ )
456
491
457
492
Similarly for ``dcast `` which uses a data.frame called ``df `` in R to
458
493
aggregate information based on ``Animal `` and ``FeedType ``:
@@ -475,21 +510,29 @@ using :meth:`~pandas.pivot_table`:
475
510
476
511
.. ipython :: python
477
512
478
- df = pd.DataFrame({
479
- ' Animal' : [' Animal1' , ' Animal2' , ' Animal3' , ' Animal2' , ' Animal1' ,
480
- ' Animal2' , ' Animal3' ],
481
- ' FeedType' : [' A' , ' B' , ' A' , ' A' , ' B' , ' B' , ' A' ],
482
- ' Amount' : [10 , 7 , 4 , 2 , 5 , 6 , 2 ],
483
- })
513
+ df = pd.DataFrame(
514
+ {
515
+ " Animal" : [
516
+ " Animal1" ,
517
+ " Animal2" ,
518
+ " Animal3" ,
519
+ " Animal2" ,
520
+ " Animal1" ,
521
+ " Animal2" ,
522
+ " Animal3" ,
523
+ ],
524
+ " FeedType" : [" A" , " B" , " A" , " A" , " B" , " B" , " A" ],
525
+ " Amount" : [10 , 7 , 4 , 2 , 5 , 6 , 2 ],
526
+ }
527
+ )
484
528
485
- df.pivot_table(values = ' Amount' , index = ' Animal' , columns = ' FeedType' ,
486
- aggfunc = ' sum' )
529
+ df.pivot_table(values = " Amount" , index = " Animal" , columns = " FeedType" , aggfunc = " sum" )
487
530
488
531
The second approach is to use the :meth: `~pandas.DataFrame.groupby ` method:
489
532
490
533
.. ipython :: python
491
534
492
- df.groupby([' Animal' , ' FeedType' ])[' Amount' ].sum()
535
+ df.groupby([" Animal" , " FeedType" ])[" Amount" ].sum()
493
536
494
537
For more details and examples see :ref: `the reshaping documentation
495
538
<reshaping.pivot>` or :ref: `the groupby documentation<groupby.split> `.
0 commit comments