@@ -67,7 +67,7 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
67
67
' E' : ' foo' })
68
68
df2
69
69
70
- Having specific dtypes
70
+ Having specific :ref: ` dtypes < basics.dtypes >`
71
71
72
72
.. ipython :: python
73
73
@@ -83,7 +83,7 @@ See the top & bottom rows of the frame
83
83
.. ipython :: python
84
84
85
85
df.head()
86
- df.tail()
86
+ df.tail(3 )
87
87
88
88
Display the index,columns, and the underlying numpy data
89
89
@@ -99,6 +99,24 @@ Describe shows a quick statistic summary of your data
99
99
100
100
df.describe()
101
101
102
+ Transposing your data
103
+
104
+ .. ipython :: python
105
+
106
+ df.T
107
+
108
+ Sorting by an axis
109
+
110
+ .. ipython :: python
111
+
112
+ df.sort_index(axis = 1 , ascending = False )
113
+
114
+ Sorting by values
115
+
116
+ .. ipython :: python
117
+
118
+ df.sort(columns = ' B' )
119
+
102
120
Selection
103
121
---------
104
122
@@ -112,6 +130,7 @@ Selecting a single column, which yields a ``Series``
112
130
113
131
.. ipython :: python
114
132
133
+ # equivalently ``df.A``
115
134
df[' A' ]
116
135
117
136
Selecting via ``[] ``, which slices the rows.
@@ -167,7 +186,6 @@ Select via the position of the passed integers
167
186
168
187
.. ipython :: python
169
188
170
- # this is a cross-section of the object
171
189
df.iloc[3 ]
172
190
173
191
By integer slices, acting similar to numpy/python
@@ -220,7 +238,7 @@ Pandas will detect this and raise ``IndexError``, rather than return an empty st
220
238
221
239
::
222
240
223
- >>> df.iloc[:,3:6 ]
241
+ >>> df.iloc[:,8:10 ]
224
242
IndexError: out-of-bounds on slice (end)
225
243
226
244
Boolean Indexing
@@ -232,7 +250,7 @@ Using a single column's values to select data.
232
250
233
251
df[df.A > 0 ]
234
252
235
- A ``where `` operation.
253
+ A ``where `` operation for getting .
236
254
237
255
.. ipython :: python
238
256
@@ -270,6 +288,14 @@ Setting by assigning with a numpy array
270
288
df.loc[:,' D' ] = np.array([5 ] * len (df))
271
289
df
272
290
291
+ A ``where `` operation with setting.
292
+
293
+ .. ipython :: python
294
+
295
+ df2 = df.copy()
296
+ df2[df2 > 0 ] = - df2
297
+ df2
298
+
273
299
Missing Data
274
300
------------
275
301
@@ -297,6 +323,12 @@ Filling missing data
297
323
298
324
df1.fillna(value = 5 )
299
325
326
+ To get the boolean mask where values are ``nan ``
327
+
328
+ .. ipython :: python
329
+
330
+ pd.isnull(df1)
331
+
300
332
301
333
Operations
302
334
----------
@@ -306,6 +338,8 @@ See the :ref:`Basic section on Binary Ops <basics.binop>`
306
338
Stats
307
339
~~~~~
308
340
341
+ Operations in general *exclude * missing data.
342
+
309
343
Performing a descriptive statistic
310
344
311
345
.. ipython :: python
@@ -318,11 +352,15 @@ Same operation on the other axis
318
352
319
353
df.mean(1 )
320
354
321
- Operations on missing data, exclude the data
355
+ Operating with objects that have different dimensionality and need alignment.
356
+ In addition, pandas automatically broadcasts along the specified dimension.
322
357
323
358
.. ipython :: python
324
359
325
- df1.mean()
360
+ s = pd.Series([1 ,3 ,5 ,np.nan,6 ,8 ],index = dates).shift(2 )
361
+ s
362
+ df.sub(s,axis = ' index' )
363
+
326
364
327
365
Apply
328
366
~~~~~
@@ -334,6 +372,27 @@ Applying functions to the data
334
372
df.apply(np.cumsum)
335
373
df.apply(lambda x : x.max() - x.min())
336
374
375
+ Histogramming
376
+ ~~~~~~~~~~~~~
377
+
378
+ See more at :ref: `Histogramming and Discretization <basics.discretization >`
379
+
380
+ .. ipython :: python
381
+
382
+ s = Series(np.random.randint(0 ,7 ,size = 10 ))
383
+ s
384
+ s.value_counts()
385
+
386
+ String Methods
387
+ ~~~~~~~~~~~~~~
388
+
389
+ See more at :ref: `Vectorized String Methods <basics.string_methods >`
390
+
391
+ .. ipython :: python
392
+
393
+ s = Series([' A' , ' B' , ' C' , ' Aaba' , ' Baca' , np.nan, ' CABA' , ' dog' , ' cat' ])
394
+ s.str.lower()
395
+
337
396
Merge
338
397
-----
339
398
@@ -425,6 +484,9 @@ Reshaping
425
484
See the section on :ref: `Hierarchical Indexing <indexing.hierarchical >` and
426
485
see the section on :ref: `Reshaping <reshaping.stacking >`).
427
486
487
+ Stack
488
+ ~~~~~
489
+
428
490
.. ipython :: python
429
491
430
492
tuples = zip (* [[' bar' , ' bar' , ' baz' , ' baz' ,
@@ -453,6 +515,26 @@ unstacks the **last level**:
453
515
stacked.unstack(1 )
454
516
stacked.unstack(0 )
455
517
518
+ Pivot Tables
519
+ ~~~~~~~~~~~~
520
+ See the section on :ref: `Pivot Tables <reshaping.pivot >`).
521
+
522
+ .. ipython :: python
523
+
524
+ df = DataFrame({' A' : [' one' , ' one' , ' two' , ' three' ] * 3 ,
525
+ ' B' : [' A' , ' B' , ' C' ] * 4 ,
526
+ ' C' : [' foo' , ' foo' , ' foo' , ' bar' , ' bar' , ' bar' ] * 2 ,
527
+ ' D' : np.random.randn(12 ),
528
+ ' E' : np.random.randn(12 )})
529
+ df
530
+
531
+ We can produce pivot tables from this data very easily:
532
+
533
+ .. ipython :: python
534
+
535
+ pivot_table(df, values = ' D' , rows = [' A' , ' B' ], cols = [' C' ])
536
+
537
+
456
538
Time Series
457
539
-----------
458
540
@@ -581,3 +663,25 @@ Reading from a HDF5 Store
581
663
store.close()
582
664
os.remove(' foo.h5' )
583
665
666
+ Excel
667
+ ~~~~~
668
+
669
+ Reading and writing to :ref: `MS Excel <io.excel >`
670
+
671
+ Writing to an excel file
672
+
673
+ .. ipython :: python
674
+
675
+ df.to_excel(' foo.xlsx' , sheet_name = ' sheet1' )
676
+
677
+ Reading from an excel file
678
+
679
+ .. ipython :: python
680
+
681
+ xls = ExcelFile(' foo.xlsx' )
682
+ xls.parse(' sheet1' , index_col = None , na_values = [' NA' ])
683
+
684
+ .. ipython :: python
685
+ :suppress:
686
+
687
+ os.remove(' foo.xlsx' )
0 commit comments