@@ -632,28 +632,37 @@ def _calc_stats(data, samples_per_window, sample_interval, H):
632
632
# shift to get forward difference, .diff() is backward difference instead
633
633
data_diff = data .diff ().shift (- 1 )
634
634
data_slope = data_diff / sample_interval
635
- data_slope_nstd = _calc_windowed_stat (
636
- data , _slope_nstd , samples_per_window , H )
635
+ data_slope_nstd = _slope_nstd_windowed (data , H , samples_per_window )
637
636
data_slope_nstd = data_slope_nstd
638
637
639
638
return data_mean , data_max , data_slope_nstd , data_slope
640
639
641
640
642
- def _slope_nstd (data ):
641
+ def _slope_nstd_windowed (data , H , samples_per_window ):
643
642
with np .errstate (divide = 'ignore' , invalid = 'ignore' ):
644
- return np .diff (data ).std (ddof = 1 ) / data .mean ()
643
+ raw = np .diff (data )
644
+ raw = raw [H [:- 1 , ]].std (ddof = 1 , axis = 0 ) / data [H ].mean (axis = 0 )
645
+ return _to_centered_series (raw , data .index , samples_per_window )
645
646
646
647
647
- def _line_length (data , sample_interval ):
648
- """
649
- Calculates line length where sample_interval is the time step
650
- between data points.
651
- """
652
- return np .sum (np .sqrt (np .diff (data )** 2 + sample_interval ** 2 ))
648
+ def _max_diff_windowed (data , H , samples_per_window ):
649
+ raw = np .diff (data )
650
+ raw = np .abs (raw [H [:- 1 , ]]).max (axis = 0 )
651
+ return _to_centered_series (raw , data .index , samples_per_window )
653
652
654
653
655
- def _max_diff (data ):
656
- return np .abs (np .diff (data )).max ()
654
+ def _line_length_windowed (data , H , samples_per_window ,
655
+ sample_interval ):
656
+ raw = np .sqrt (np .diff (data )** 2. + sample_interval ** 2. )
657
+ raw = np .sum (raw [H [:- 1 , ]], axis = 0 )
658
+ return _to_centered_series (raw , data .index , samples_per_window )
659
+
660
+
661
+ def _to_centered_series (vals , idx , samples_per_window ):
662
+ vals = np .pad (vals , ((0 , len (idx ) - len (vals )),), mode = 'constant' ,
663
+ constant_values = np .nan )
664
+ shift = samples_per_window // 2 # align = 'center' only
665
+ return pd .Series (index = idx , data = vals ).shift (shift )
657
666
658
667
659
668
def _get_sample_intervals (times , win_length ):
@@ -673,26 +682,6 @@ def _get_sample_intervals(times, win_length):
673
682
'times. consider resampling your data.' )
674
683
675
684
676
- def _to_centered_series (vals , idx , H , samples_per_window ):
677
- vals = np .pad (vals , ((0 , len (idx ) - H .shape [1 ]),), mode = 'constant' ,
678
- constant_values = np .nan )
679
- shift = samples_per_window // 2 # align = 'center' only
680
- return pd .Series (index = idx , data = vals ).shift (shift )
681
-
682
-
683
- def _calc_windowed_stat (data , func , samples_per_window , H , args = ()):
684
- """
685
- Applies func to each rolling window in data. data must be Series.
686
- func accepts a 1d array and returns a float. args are passed to func.
687
- H is a Hankel matrix defining the indices for each window.
688
-
689
- Returns a Series of the output of func in each window, aligned to center
690
- of each window.
691
- """
692
- vals = np .apply_along_axis (func , 0 , data .values [H ], * args )
693
- return _to_centered_series (vals , data .index , H , samples_per_window )
694
-
695
-
696
685
def _clear_sample_index (clear_windows , samples_per_window , align , H ):
697
686
"""
698
687
Returns indices of clear samples in clear windows
@@ -856,8 +845,8 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10,
856
845
# calculate measurement statistics
857
846
meas_mean , meas_max , meas_slope_nstd , meas_slope \
858
847
= _calc_stats (meas , samples_per_window , sample_interval , H )
859
- meas_line_length = _calc_windowed_stat (
860
- meas , _line_length , samples_per_window , H , args = ( sample_interval ,) )
848
+ meas_line_length = _line_length_windowed (
849
+ meas , H , samples_per_window , sample_interval )
861
850
862
851
# calculate clear sky statistics
863
852
clear_mean , clear_max , _ , clear_slope \
@@ -872,13 +861,12 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10,
872
861
alpha = 1
873
862
for iteration in range (max_iterations ):
874
863
scaled_clear = alpha * clear
875
- clear_line_length = _calc_windowed_stat (
876
- scaled_clear , _line_length , samples_per_window , H ,
877
- args = (sample_interval ,))
864
+ clear_line_length = _line_length_windowed (
865
+ scaled_clear , H , samples_per_window , sample_interval )
878
866
879
867
line_diff = meas_line_length - clear_line_length
880
- slope_max_diff = _calc_windowed_stat (
881
- meas - scaled_clear , _max_diff , samples_per_window , H )
868
+ slope_max_diff = _max_diff_windowed (
869
+ meas - scaled_clear , H , samples_per_window )
882
870
# evaluate comparison criteria
883
871
c1 = np .abs (meas_mean - alpha * clear_mean ) < mean_diff
884
872
c2 = np .abs (meas_max - alpha * clear_max ) < max_diff
0 commit comments