From b6e9fe23e7b0d06658ad9426c1e491a6e891ac35 Mon Sep 17 00:00:00 2001 From: Winterflower Date: Fri, 4 Sep 2015 23:07:21 +0100 Subject: [PATCH] DOC: Improve reindex examples DOC: add more reindex examples DOC: fixing some PEP8 issues DOC: added shoyer suggestions DOC: Fixes to reindex based on comments --- pandas/core/generic.py | 117 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d3a63f9f5d851..f2a724361df4a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1775,7 +1775,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, New labels / index to conform to. Preferably an Index object to avoid duplicating data method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional - Method to use for filling holes in reindexed DataFrame: + method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. * default: don't fill gaps * pad / ffill: propagate last valid observation forward to next valid * backfill / bfill: use next valid observation to fill gap @@ -1799,7 +1801,118 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Examples -------- - >>> df.reindex(index=[date1, date2, date3], columns=['A', 'B', 'C']) + + Create a dataframe with some fictional data. + + >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] + >>> df = pd.DataFrame({ + ... 'http_status': [200,200,404,404,301], + ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}, + ... index=index) + >>> df + http_status response_time + Firefox 200 0.04 + Chrome 200 0.02 + Safari 404 0.07 + IE10 404 0.08 + Konqueror 301 1.00 + + Create a new index and reindex the dataframe. By default + values in the new index that do not have corresponding + records in the dataframe are assigned ``NaN``. + + >>> new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', + ... 'Chrome'] + >>> df.reindex(new_index) + http_status response_time + Safari 404 0.07 + Iceweasel NaN NaN + Comodo Dragon NaN NaN + IE10 404 0.08 + Chrome 200 0.02 + + We can fill in the missing values by passing a value to + the keyword ``fill_value``. Because the index is not monotonically + increasing or decreasing, we cannot use arguments to the keyword + ``method`` to fill the ``NaN`` values. + + >>> df.reindex(new_index, fill_value=0) + http_status response_time + Safari 404 0.07 + Iceweasel 0 0.00 + Comodo Dragon 0 0.00 + IE10 404 0.08 + Chrome 200 0.02 + + >>> df.reindex(new_index, fill_value='missing') + http_status response_time + Safari 404 0.07 + Iceweasel missing missing + Comodo Dragon missing missing + IE10 404 0.08 + Chrome 200 0.02 + + To further illustrate the filling functionality in + ``reindex``, we will create a dataframe with a + monotonically increasing index (for example, a sequence + of dates). + + >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') + >>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]}, + index=date_index) + >>> df2 + prices + 2010-01-01 100 + 2010-01-02 101 + 2010-01-03 NaN + 2010-01-04 100 + 2010-01-05 89 + 2010-01-06 88 + + Suppose we decide to expand the dataframe to cover a wider + date range. + + >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') + >>> df2.reindex(date_index2) + prices + 2009-12-29 NaN + 2009-12-30 NaN + 2009-12-31 NaN + 2010-01-01 100 + 2010-01-02 101 + 2010-01-03 NaN + 2010-01-04 100 + 2010-01-05 89 + 2010-01-06 88 + 2010-01-07 NaN + + The index entries that did not have a value in the original data frame + (for example, '2009-12-29') are by default filled with ``NaN``. + If desired, we can fill in the missing values using one of several + options. + + For example, to backpropagate the last valid value to fill the ``NaN`` + values, pass ``bfill`` as an argument to the ``method`` keyword. + + >>> df2.reindex(date_index2, method='bfill') + prices + 2009-12-29 100 + 2009-12-30 100 + 2009-12-31 100 + 2010-01-01 100 + 2010-01-02 101 + 2010-01-03 NaN + 2010-01-04 100 + 2010-01-05 89 + 2010-01-06 88 + 2010-01-07 NaN + + Please note that the ``NaN`` value present in the original dataframe + (at index value 2010-01-03) will not be filled by any of the + value propagation schemes. This is because filling while reindexing + does not look at dataframe values, but only compares the original and + desired indexes. If you do want to fill in the ``NaN`` values present + in the original dataframe, use the ``fillna()`` method. Returns -------