6
6
7
7
import pandas as pd
8
8
import numpy as np
9
- options.display.max_rows= 15
9
+ pd. options.display.max_rows= 15
10
10
11
11
Comparison with R / R libraries
12
12
*******************************
@@ -51,7 +51,7 @@ Selecting multiple columns by name in ``pandas`` is straightforward
51
51
52
52
.. ipython :: python
53
53
54
- df = DataFrame(np.random.randn(10 , 3 ), columns = list (' abc' ))
54
+ df = pd. DataFrame(np.random.randn(10 , 3 ), columns = list (' abc' ))
55
55
df[[' a' , ' c' ]]
56
56
df.loc[:, [' a' , ' c' ]]
57
57
@@ -63,7 +63,7 @@ with a combination of the ``iloc`` indexer attribute and ``numpy.r_``.
63
63
named = list (' abcdefg' )
64
64
n = 30
65
65
columns = named + np.arange(len (named), n).tolist()
66
- df = DataFrame(np.random.randn(n, n), columns = columns)
66
+ df = pd. DataFrame(np.random.randn(n, n), columns = columns)
67
67
68
68
df.iloc[:, np.r_[:10 , 24 :30 ]]
69
69
@@ -88,8 +88,7 @@ function.
88
88
89
89
.. ipython :: python
90
90
91
- from pandas import DataFrame
92
- df = DataFrame({
91
+ df = pd.DataFrame({
93
92
' v1' : [1 ,3 ,5 ,7 ,8 ,3 ,5 ,np.nan,4 ,5 ,7 ,9 ],
94
93
' v2' : [11 ,33 ,55 ,77 ,88 ,33 ,55 ,np.nan,44 ,55 ,77 ,99 ],
95
94
' by1' : [" red" , " blue" , 1 , 2 , np.nan, " big" , 1 , 2 , " red" , 1 , np.nan, 12 ],
@@ -166,7 +165,7 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
166
165
import random
167
166
import string
168
167
169
- baseball = DataFrame({
168
+ baseball = pd. DataFrame({
170
169
' team' : [" team %d " % (x+ 1 ) for x in range (5 )]* 5 ,
171
170
' player' : random.sample(list (string.ascii_lowercase),25 ),
172
171
' batting avg' : np.random.uniform(.200 , .400 , 25 )
@@ -197,7 +196,7 @@ index/slice as well as standard boolean indexing:
197
196
198
197
.. ipython :: python
199
198
200
- df = DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
199
+ df = pd. DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
201
200
df.query(' a <= b' )
202
201
df[df.a <= df.b]
203
202
df.loc[df.a <= df.b]
@@ -225,7 +224,7 @@ In ``pandas`` the equivalent expression, using the
225
224
226
225
.. ipython :: python
227
226
228
- df = DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
227
+ df = pd. DataFrame({' a' : np.random.randn(10 ), ' b' : np.random.randn(10 )})
229
228
df.eval(' a + b' )
230
229
df.a + df.b # same as the previous expression
231
230
@@ -283,7 +282,7 @@ In ``pandas`` the equivalent expression, using the
283
282
284
283
.. ipython :: python
285
284
286
- df = DataFrame({
285
+ df = pd. DataFrame({
287
286
' x' : np.random.uniform(1 ., 168 ., 120 ),
288
287
' y' : np.random.uniform(7 ., 334 ., 120 ),
289
288
' z' : np.random.uniform(1.7 , 20.7 , 120 ),
@@ -317,7 +316,7 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
317
316
.. ipython :: python
318
317
319
318
a = np.array(list (range (1 ,24 ))+ [np.NAN ]).reshape(2 ,3 ,4 )
320
- DataFrame([tuple (list (x)+ [val]) for x, val in np.ndenumerate(a)])
319
+ pd. DataFrame([tuple (list (x)+ [val]) for x, val in np.ndenumerate(a)])
321
320
322
321
|meltlist |_
323
322
~~~~~~~~~~~~
@@ -336,7 +335,7 @@ In Python, this list would be a list of tuples, so
336
335
.. ipython :: python
337
336
338
337
a = list (enumerate (list (range (1 ,5 ))+ [np.NAN ]))
339
- DataFrame(a)
338
+ pd. DataFrame(a)
340
339
341
340
For more details and examples see :ref: `the Into to Data Structures
342
341
documentation <basics.dataframe.from_items>`.
@@ -361,7 +360,7 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
361
360
362
361
.. ipython :: python
363
362
364
- cheese = DataFrame({' first' : [' John' , ' Mary' ],
363
+ cheese = pd. DataFrame({' first' : [' John' , ' Mary' ],
365
364
' last' : [' Doe' , ' Bo' ],
366
365
' height' : [5.5 , 6.0 ],
367
366
' weight' : [130 , 150 ]})
@@ -394,7 +393,7 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
394
393
395
394
.. ipython :: python
396
395
397
- df = DataFrame({
396
+ df = pd. DataFrame({
398
397
' x' : np.random.uniform(1 ., 168 ., 12 ),
399
398
' y' : np.random.uniform(7 ., 334 ., 12 ),
400
399
' z' : np.random.uniform(1.7 , 20.7 , 12 ),
@@ -426,7 +425,7 @@ using :meth:`~pandas.pivot_table`:
426
425
427
426
.. ipython :: python
428
427
429
- df = DataFrame({
428
+ df = pd. DataFrame({
430
429
' Animal' : [' Animal1' , ' Animal2' , ' Animal3' , ' Animal2' , ' Animal1' ,
431
430
' Animal2' , ' Animal3' ],
432
431
' FeedType' : [' A' , ' B' , ' A' , ' A' , ' B' , ' B' , ' A' ],
@@ -444,6 +443,30 @@ The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
444
443
For more details and examples see :ref: `the reshaping documentation
445
444
<reshaping.pivot>` or :ref: `the groupby documentation<groupby.split> `.
446
445
446
+ |factor |_
447
+ ~~~~~~~~
448
+
449
+ .. versionadded :: 0.15
450
+
451
+ pandas has a data type for categorical data.
452
+
453
+ .. code-block :: r
454
+
455
+ cut(c(1,2,3,4,5,6), 3)
456
+ factor(c(1,2,3,2,2,3))
457
+
458
+ In pandas this is accomplished with ``pd.cut `` and ``astype("category") ``:
459
+
460
+ .. ipython :: python
461
+
462
+ pd.cut(pd.Series([1 ,2 ,3 ,4 ,5 ,6 ]), 3 )
463
+ pd.Series([1 ,2 ,3 ,2 ,2 ,3 ]).astype(" category" )
464
+
465
+ For more details and examples see :ref: `categorical introduction <categorical >` and the
466
+ :ref: `API documentation <api.categorical >`. There is also a documentation regarding the
467
+ :ref: `differences to R's factor <categorical.rfactor >`.
468
+
469
+
447
470
.. |c | replace :: ``c ``
448
471
.. _c : http://stat.ethz.ch/R-manual/R-patched/library/base/html/c.html
449
472
@@ -477,3 +500,5 @@ For more details and examples see :ref:`the reshaping documentation
477
500
.. |cast | replace :: ``cast ``
478
501
.. cast: http://www.inside-r.org/packages/cran/reshape2/docs/cast
479
502
503
+ .. |factor | replace :: ``factor ``
504
+ .. _factor : https://stat.ethz.ch/R-manual/R-devel/library/base/html/factor.html
0 commit comments