@@ -624,18 +624,160 @@ def backfill(self, limit=None):
624
624
625
625
def fillna (self , method , limit = None ):
626
626
"""
627
- Fill missing values
627
+ Fill missing values introduced by upsampling.
628
+
629
+ In statistics, imputation is the process of replacing missing data with
630
+ substituted values [1]_. When resampling data, missing values may
631
+ appear (e.g., when the resampling frequency is higher than the original
632
+ frequency).
633
+
634
+ Missing values that existed in the orginal data will
635
+ not be modified.
628
636
629
637
Parameters
630
638
----------
631
- method : str, method of resampling ('ffill', 'bfill')
639
+ method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'}
640
+ Method to use for filling holes in resampled data
641
+
642
+ * 'pad' or 'ffill': use previous valid observation to fill gap
643
+ (forward fill).
644
+ * 'backfill' or 'bfill': use next valid observation to fill gap.
645
+ * 'nearest': use nearest valid observation to fill gap.
646
+
632
647
limit : integer, optional
633
- limit of how many values to fill
648
+ Limit of how many consecutive missing values to fill.
649
+
650
+ Returns
651
+ -------
652
+ Series or DataFrame
653
+ An upsampled Series or DataFrame with missing values filled.
634
654
635
655
See Also
636
656
--------
637
- Series.fillna
638
- DataFrame.fillna
657
+ backfill : Backward fill NaN values in the resampled data.
658
+ pad : Forward fill NaN values in the resampled data.
659
+ nearest : Fill NaN values in the resampled data
660
+ with nearest neighbor starting from center.
661
+ pandas.Series.fillna : Fill NaN values in the Series using the
662
+ specified method, which can be 'bfill' and 'ffill'.
663
+ pandas.DataFrame.fillna : Fill NaN values in the DataFrame using the
664
+ specified method, which can be 'bfill' and 'ffill'.
665
+
666
+ Examples
667
+ --------
668
+ Resampling a Series:
669
+
670
+ >>> s = pd.Series([1, 2, 3],
671
+ ... index=pd.date_range('20180101', periods=3, freq='h'))
672
+ >>> s
673
+ 2018-01-01 00:00:00 1
674
+ 2018-01-01 01:00:00 2
675
+ 2018-01-01 02:00:00 3
676
+ Freq: H, dtype: int64
677
+
678
+ Without filling the missing values you get:
679
+
680
+ >>> s.resample("30min").asfreq()
681
+ 2018-01-01 00:00:00 1.0
682
+ 2018-01-01 00:30:00 NaN
683
+ 2018-01-01 01:00:00 2.0
684
+ 2018-01-01 01:30:00 NaN
685
+ 2018-01-01 02:00:00 3.0
686
+ Freq: 30T, dtype: float64
687
+
688
+ >>> s.resample('30min').fillna("backfill")
689
+ 2018-01-01 00:00:00 1
690
+ 2018-01-01 00:30:00 2
691
+ 2018-01-01 01:00:00 2
692
+ 2018-01-01 01:30:00 3
693
+ 2018-01-01 02:00:00 3
694
+ Freq: 30T, dtype: int64
695
+
696
+ >>> s.resample('15min').fillna("backfill", limit=2)
697
+ 2018-01-01 00:00:00 1.0
698
+ 2018-01-01 00:15:00 NaN
699
+ 2018-01-01 00:30:00 2.0
700
+ 2018-01-01 00:45:00 2.0
701
+ 2018-01-01 01:00:00 2.0
702
+ 2018-01-01 01:15:00 NaN
703
+ 2018-01-01 01:30:00 3.0
704
+ 2018-01-01 01:45:00 3.0
705
+ 2018-01-01 02:00:00 3.0
706
+ Freq: 15T, dtype: float64
707
+
708
+ >>> s.resample('30min').fillna("pad")
709
+ 2018-01-01 00:00:00 1
710
+ 2018-01-01 00:30:00 1
711
+ 2018-01-01 01:00:00 2
712
+ 2018-01-01 01:30:00 2
713
+ 2018-01-01 02:00:00 3
714
+ Freq: 30T, dtype: int64
715
+
716
+ >>> s.resample('30min').fillna("nearest")
717
+ 2018-01-01 00:00:00 1
718
+ 2018-01-01 00:30:00 2
719
+ 2018-01-01 01:00:00 2
720
+ 2018-01-01 01:30:00 3
721
+ 2018-01-01 02:00:00 3
722
+ Freq: 30T, dtype: int64
723
+
724
+ Missing values present before the upsampling are not affected.
725
+
726
+ >>> sm = pd.Series([1, None, 3],
727
+ ... index=pd.date_range('20180101', periods=3, freq='h'))
728
+ >>> sm
729
+ 2018-01-01 00:00:00 1.0
730
+ 2018-01-01 01:00:00 NaN
731
+ 2018-01-01 02:00:00 3.0
732
+ Freq: H, dtype: float64
733
+
734
+ >>> sm.resample('30min').fillna('backfill')
735
+ 2018-01-01 00:00:00 1.0
736
+ 2018-01-01 00:30:00 NaN
737
+ 2018-01-01 01:00:00 NaN
738
+ 2018-01-01 01:30:00 3.0
739
+ 2018-01-01 02:00:00 3.0
740
+ Freq: 30T, dtype: float64
741
+
742
+ >>> sm.resample('30min').fillna('pad')
743
+ 2018-01-01 00:00:00 1.0
744
+ 2018-01-01 00:30:00 1.0
745
+ 2018-01-01 01:00:00 NaN
746
+ 2018-01-01 01:30:00 NaN
747
+ 2018-01-01 02:00:00 3.0
748
+ Freq: 30T, dtype: float64
749
+
750
+ >>> sm.resample('30min').fillna('nearest')
751
+ 2018-01-01 00:00:00 1.0
752
+ 2018-01-01 00:30:00 NaN
753
+ 2018-01-01 01:00:00 NaN
754
+ 2018-01-01 01:30:00 3.0
755
+ 2018-01-01 02:00:00 3.0
756
+ Freq: 30T, dtype: float64
757
+
758
+ DataFrame resampling is done column-wise. All the same options are
759
+ available.
760
+
761
+ >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},
762
+ ... index=pd.date_range('20180101', periods=3,
763
+ ... freq='h'))
764
+ >>> df
765
+ a b
766
+ 2018-01-01 00:00:00 2.0 1
767
+ 2018-01-01 01:00:00 NaN 3
768
+ 2018-01-01 02:00:00 6.0 5
769
+
770
+ >>> df.resample('30min').fillna("bfill")
771
+ a b
772
+ 2018-01-01 00:00:00 2.0 1
773
+ 2018-01-01 00:30:00 NaN 3
774
+ 2018-01-01 01:00:00 NaN 3
775
+ 2018-01-01 01:30:00 6.0 5
776
+ 2018-01-01 02:00:00 6.0 5
777
+
778
+ References
779
+ ----------
780
+ .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)
639
781
"""
640
782
return self ._upsample (method , limit = limit )
641
783
0 commit comments