5
5
.. ipython :: python
6
6
:suppress:
7
7
8
+ import os
8
9
import numpy as np
10
+
9
11
import pandas as pd
10
- import os
12
+
11
13
np.random.seed(123456 )
12
14
np.set_printoptions(precision = 4 , suppress = True )
13
- import matplotlib
14
- # matplotlib.style.use('default')
15
15
pd.options.display.max_rows = 15
16
16
17
- # ### portions of this were borrowed from the
18
- # ### Pandas cheatsheet
19
- # ### created during the PyData Workshop-Sprint 2012
20
- # ### Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
17
+ # portions of this were borrowed from the
18
+ # Pandas cheatsheet
19
+ # created during the PyData Workshop-Sprint 2012
20
+ # Hannah Chen, Henry Chow, Eric Cox, Robert Mauriello
21
21
22
22
23
23
********************
@@ -31,9 +31,8 @@ Customarily, we import as follows:
31
31
32
32
.. ipython :: python
33
33
34
- import pandas as pd
35
34
import numpy as np
36
- import matplotlib.pyplot as plt
35
+ import pandas as pd
37
36
38
37
Object Creation
39
38
---------------
@@ -55,7 +54,7 @@ and labeled columns:
55
54
56
55
dates = pd.date_range(' 20130101' , periods = 6 )
57
56
dates
58
- df = pd.DataFrame(np.random.randn(6 ,4 ), index = dates, columns = list (' ABCD' ))
57
+ df = pd.DataFrame(np.random.randn(6 , 4 ), index = dates, columns = list (' ABCD' ))
59
58
df
60
59
61
60
Creating a ``DataFrame `` by passing a dict of objects that can be converted to series-like.
@@ -64,7 +63,7 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
64
63
65
64
df2 = pd.DataFrame({' A' : 1 .,
66
65
' B' : pd.Timestamp(' 20130102' ),
67
- ' C' : pd.Series(1 , index = list (range (4 )),dtype = ' float32' ),
66
+ ' C' : pd.Series(1 , index = list (range (4 )), dtype = ' float32' ),
68
67
' D' : np.array([3 ] * 4 , dtype = ' int32' ),
69
68
' E' : pd.Categorical([" test" , " train" , " test" , " train" ]),
70
69
' F' : ' foo' })
@@ -190,31 +189,31 @@ Selecting on a multi-axis by label:
190
189
191
190
.. ipython :: python
192
191
193
- df.loc[:,[' A' ,' B' ]]
192
+ df.loc[:, [' A' , ' B' ]]
194
193
195
194
Showing label slicing, both endpoints are *included *:
196
195
197
196
.. ipython :: python
198
197
199
- df.loc[' 20130102' :' 20130104' ,[' A' ,' B' ]]
198
+ df.loc[' 20130102' :' 20130104' , [' A' , ' B' ]]
200
199
201
200
Reduction in the dimensions of the returned object:
202
201
203
202
.. ipython :: python
204
203
205
- df.loc[' 20130102' ,[' A' ,' B' ]]
204
+ df.loc[' 20130102' , [' A' , ' B' ]]
206
205
207
206
For getting a scalar value:
208
207
209
208
.. ipython :: python
210
209
211
- df.loc[dates[0 ],' A' ]
210
+ df.loc[dates[0 ], ' A' ]
212
211
213
212
For getting fast access to a scalar (equivalent to the prior method):
214
213
215
214
.. ipython :: python
216
215
217
- df.at[dates[0 ],' A' ]
216
+ df.at[dates[0 ], ' A' ]
218
217
219
218
Selection by Position
220
219
~~~~~~~~~~~~~~~~~~~~~
@@ -231,37 +230,37 @@ By integer slices, acting similar to numpy/python:
231
230
232
231
.. ipython :: python
233
232
234
- df.iloc[3 :5 ,0 :2 ]
233
+ df.iloc[3 :5 , 0 :2 ]
235
234
236
235
By lists of integer position locations, similar to the numpy/python style:
237
236
238
237
.. ipython :: python
239
238
240
- df.iloc[[1 ,2 , 4 ],[0 ,2 ]]
239
+ df.iloc[[1 , 2 , 4 ], [0 , 2 ]]
241
240
242
241
For slicing rows explicitly:
243
242
244
243
.. ipython :: python
245
244
246
- df.iloc[1 :3 ,:]
245
+ df.iloc[1 :3 , :]
247
246
248
247
For slicing columns explicitly:
249
248
250
249
.. ipython :: python
251
250
252
- df.iloc[:,1 :3 ]
251
+ df.iloc[:, 1 :3 ]
253
252
254
253
For getting a value explicitly:
255
254
256
255
.. ipython :: python
257
256
258
- df.iloc[1 ,1 ]
257
+ df.iloc[1 , 1 ]
259
258
260
259
For getting fast access to a scalar (equivalent to the prior method):
261
260
262
261
.. ipython :: python
263
262
264
- df.iat[1 ,1 ]
263
+ df.iat[1 , 1 ]
265
264
266
265
Boolean Indexing
267
266
~~~~~~~~~~~~~~~~
@@ -303,19 +302,19 @@ Setting values by label:
303
302
304
303
.. ipython :: python
305
304
306
- df.at[dates[0 ],' A' ] = 0
305
+ df.at[dates[0 ], ' A' ] = 0
307
306
308
307
Setting values by position:
309
308
310
309
.. ipython :: python
311
310
312
- df.iat[0 ,1 ] = 0
311
+ df.iat[0 , 1 ] = 0
313
312
314
313
Setting by assigning with a NumPy array:
315
314
316
315
.. ipython :: python
317
316
318
- df.loc[:,' D' ] = np.array([5 ] * len (df))
317
+ df.loc[:, ' D' ] = np.array([5 ] * len (df))
319
318
320
319
The result of the prior setting operations.
321
320
@@ -345,7 +344,7 @@ returns a copy of the data.
345
344
.. ipython :: python
346
345
347
346
df1 = df.reindex(index = dates[0 :4 ], columns = list (df.columns) + [' E' ])
348
- df1.loc[dates[0 ]:dates[1 ],' E' ] = 1
347
+ df1.loc[dates[0 ]:dates[1 ], ' E' ] = 1
349
348
df1
350
349
351
350
To drop any rows that have missing data.
@@ -653,7 +652,8 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the
653
652
654
653
.. ipython :: python
655
654
656
- df = pd.DataFrame({" id" :[1 , 2 , 3 , 4 , 5 , 6 ], " raw_grade" :[' a' , ' b' , ' b' , ' a' , ' a' , ' e' ]})
655
+ df = pd.DataFrame({" id" : [1 , 2 , 3 , 4 , 5 , 6 ],
656
+ " raw_grade" : [' a' , ' b' , ' b' , ' a' , ' a' , ' e' ]})
657
657
658
658
Convert the raw grades to a categorical data type.
659
659
@@ -674,7 +674,8 @@ Reorder the categories and simultaneously add the missing categories (methods un
674
674
675
675
.. ipython :: python
676
676
677
- df[" grade" ] = df[" grade" ].cat.set_categories([" very bad" , " bad" , " medium" , " good" , " very good" ])
677
+ df[" grade" ] = df[" grade" ].cat.set_categories([" very bad" , " bad" , " medium" ,
678
+ " good" , " very good" ])
678
679
df[" grade" ]
679
680
680
681
Sorting is per order in the categories, not lexical order.
@@ -703,7 +704,8 @@ See the :ref:`Plotting <visualization>` docs.
703
704
704
705
.. ipython :: python
705
706
706
- ts = pd.Series(np.random.randn(1000 ), index = pd.date_range(' 1/1/2000' , periods = 1000 ))
707
+ ts = pd.Series(np.random.randn(1000 ),
708
+ index = pd.date_range(' 1/1/2000' , periods = 1000 ))
707
709
ts = ts.cumsum()
708
710
709
711
@savefig series_plot_basic.png
@@ -718,8 +720,10 @@ of the columns with labels:
718
720
columns = [' A' , ' B' , ' C' , ' D' ])
719
721
df = df.cumsum()
720
722
723
+ plt.figure()
724
+ df.plot()
721
725
@savefig frame_plot_basic.png
722
- plt.figure(); df.plot(); plt. legend(loc = ' best' )
726
+ plt.legend(loc = ' best' )
723
727
724
728
Getting Data In/Out
725
729
-------------------
0 commit comments