@@ -455,8 +455,9 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
455
455
456
456
457
457
cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
458
- float64_t x, float64_t xx,
459
- float64_t xxx) nogil:
458
+ float64_t x, float64_t xx, float64_t xxx,
459
+ int64_t num_consecutive_same_value
460
+ ) nogil:
460
461
cdef:
461
462
float64_t result, dnobs
462
463
float64_t A, B, C, R
@@ -467,6 +468,12 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
467
468
B = xx / dnobs - A * A
468
469
C = xxx / dnobs - A * A * A - 3 * A * B
469
470
471
+ if nobs < 3 :
472
+ result = NaN
473
+ # GH 42064 46431
474
+ # uniform case, force result to be 0
475
+ elif num_consecutive_same_value >= nobs:
476
+ result = 0.0
470
477
# #18044: with uniform distribution, floating issue will
471
478
# cause B != 0. and cause the result is a very
472
479
# large number.
@@ -476,7 +483,7 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
476
483
# if the variance is less than 1e-14, it could be
477
484
# treat as zero, here we follow the original
478
485
# skew/kurt behaviour to check B <= 1e-14
479
- if B <= 1e-14 or nobs < 3 :
486
+ elif B <= 1e-14 :
480
487
result = NaN
481
488
else :
482
489
R = sqrt(B)
@@ -493,7 +500,10 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
493
500
float64_t * xxx,
494
501
float64_t * compensation_x,
495
502
float64_t * compensation_xx,
496
- float64_t * compensation_xxx) nogil:
503
+ float64_t * compensation_xxx,
504
+ int64_t * num_consecutive_same_value,
505
+ float64_t * prev_value,
506
+ ) nogil:
497
507
""" add a value from the skew calc """
498
508
cdef:
499
509
float64_t y, t
@@ -515,6 +525,14 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
515
525
compensation_xxx[0 ] = t - xxx[0 ] - y
516
526
xxx[0 ] = t
517
527
528
+ # GH#42064, record num of same values to remove floating point artifacts
529
+ if val == prev_value[0 ]:
530
+ num_consecutive_same_value[0 ] += 1
531
+ else :
532
+ # reset to 1 (include current value itself)
533
+ num_consecutive_same_value[0 ] = 1
534
+ prev_value[0 ] = val
535
+
518
536
519
537
cdef inline void remove_skew(float64_t val, int64_t * nobs,
520
538
float64_t * x, float64_t * xx,
@@ -553,8 +571,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
553
571
float64_t compensation_xx_add , compensation_xx_remove
554
572
float64_t compensation_x_add , compensation_x_remove
555
573
float64_t x , xx , xxx
574
+ float64_t prev_value
556
575
int64_t nobs = 0 , N = len (start), V = len (values), nobs_mean = 0
557
- int64_t s , e
576
+ int64_t s , e , num_consecutive_same_value
558
577
ndarray[float64_t] output , mean_array , values_copy
559
578
bint is_monotonic_increasing_bounds
560
579
@@ -588,6 +607,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
588
607
# never removed
589
608
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
590
609
610
+ prev_value = values[s]
611
+ num_consecutive_same_value = 0
612
+
591
613
compensation_xxx_add = compensation_xxx_remove = 0
592
614
compensation_xx_add = compensation_xx_remove = 0
593
615
compensation_x_add = compensation_x_remove = 0
@@ -596,7 +618,8 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
596
618
for j in range (s, e):
597
619
val = values_copy[j]
598
620
add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
599
- & compensation_xx_add, & compensation_xxx_add)
621
+ & compensation_xx_add, & compensation_xxx_add,
622
+ & num_consecutive_same_value, & prev_value)
600
623
601
624
else :
602
625
@@ -612,9 +635,10 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
612
635
for j in range (end[i - 1 ], e):
613
636
val = values_copy[j]
614
637
add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
615
- & compensation_xx_add, & compensation_xxx_add)
638
+ & compensation_xx_add, & compensation_xxx_add,
639
+ & num_consecutive_same_value, & prev_value)
616
640
617
- output[i] = calc_skew(minp, nobs, x, xx, xxx)
641
+ output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value )
618
642
619
643
if not is_monotonic_increasing_bounds:
620
644
nobs = 0
@@ -630,35 +654,44 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
630
654
631
655
cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs,
632
656
float64_t x, float64_t xx,
633
- float64_t xxx, float64_t xxxx) nogil:
657
+ float64_t xxx, float64_t xxxx,
658
+ int64_t num_consecutive_same_value,
659
+ ) nogil:
634
660
cdef:
635
661
float64_t result, dnobs
636
662
float64_t A, B, C, D, R, K
637
663
638
664
if nobs >= minp:
639
- dnobs = < float64_t> nobs
640
- A = x / dnobs
641
- R = A * A
642
- B = xx / dnobs - R
643
- R = R * A
644
- C = xxx / dnobs - R - 3 * A * B
645
- R = R * A
646
- D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
647
-
648
- # #18044: with uniform distribution, floating issue will
649
- # cause B != 0. and cause the result is a very
650
- # large number.
651
- #
652
- # in core/nanops.py nanskew/nankurt call the function
653
- # _zero_out_fperr(m2) to fix floating error.
654
- # if the variance is less than 1e-14, it could be
655
- # treat as zero, here we follow the original
656
- # skew/kurt behaviour to check B <= 1e-14
657
- if B <= 1e-14 or nobs < 4 :
665
+ if nobs < 4 :
658
666
result = NaN
667
+ # GH 42064 46431
668
+ # uniform case, force result to be -3.
669
+ elif num_consecutive_same_value >= nobs:
670
+ result = - 3.
659
671
else :
660
- K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
661
- result = K / ((dnobs - 2. ) * (dnobs - 3. ))
672
+ dnobs = < float64_t> nobs
673
+ A = x / dnobs
674
+ R = A * A
675
+ B = xx / dnobs - R
676
+ R = R * A
677
+ C = xxx / dnobs - R - 3 * A * B
678
+ R = R * A
679
+ D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
680
+
681
+ # #18044: with uniform distribution, floating issue will
682
+ # cause B != 0. and cause the result is a very
683
+ # large number.
684
+ #
685
+ # in core/nanops.py nanskew/nankurt call the function
686
+ # _zero_out_fperr(m2) to fix floating error.
687
+ # if the variance is less than 1e-14, it could be
688
+ # treat as zero, here we follow the original
689
+ # skew/kurt behaviour to check B <= 1e-14
690
+ if B <= 1e-14 :
691
+ result = NaN
692
+ else :
693
+ K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
694
+ result = K / ((dnobs - 2. ) * (dnobs - 3. ))
662
695
else :
663
696
result = NaN
664
697
@@ -671,7 +704,10 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
671
704
float64_t * compensation_x,
672
705
float64_t * compensation_xx,
673
706
float64_t * compensation_xxx,
674
- float64_t * compensation_xxxx) nogil:
707
+ float64_t * compensation_xxxx,
708
+ int64_t * num_consecutive_same_value,
709
+ float64_t * prev_value
710
+ ) nogil:
675
711
""" add a value from the kurotic calc """
676
712
cdef:
677
713
float64_t y, t
@@ -697,6 +733,14 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
697
733
compensation_xxxx[0 ] = t - xxxx[0 ] - y
698
734
xxxx[0 ] = t
699
735
736
+ # GH#42064, record num of same values to remove floating point artifacts
737
+ if val == prev_value[0 ]:
738
+ num_consecutive_same_value[0 ] += 1
739
+ else :
740
+ # reset to 1 (include current value itself)
741
+ num_consecutive_same_value[0 ] = 1
742
+ prev_value[0 ] = val
743
+
700
744
701
745
cdef inline void remove_kurt(float64_t val, int64_t * nobs,
702
746
float64_t * x, float64_t * xx,
@@ -741,7 +785,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
741
785
float64_t compensation_xx_remove , compensation_xx_add
742
786
float64_t compensation_x_remove , compensation_x_add
743
787
float64_t x , xx , xxx , xxxx
744
- int64_t nobs , s , e , N = len (start), V = len (values), nobs_mean = 0
788
+ float64_t prev_value
789
+ int64_t nobs , s , e , num_consecutive_same_value
790
+ int64_t N = len (start), V = len (values), nobs_mean = 0
745
791
ndarray[float64_t] output , values_copy
746
792
bint is_monotonic_increasing_bounds
747
793
@@ -775,6 +821,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
775
821
# never removed
776
822
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
777
823
824
+ prev_value = values[s]
825
+ num_consecutive_same_value = 0
826
+
778
827
compensation_xxxx_add = compensation_xxxx_remove = 0
779
828
compensation_xxx_remove = compensation_xxx_add = 0
780
829
compensation_xx_remove = compensation_xx_add = 0
@@ -784,7 +833,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
784
833
for j in range (s, e):
785
834
add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
786
835
& compensation_x_add, & compensation_xx_add,
787
- & compensation_xxx_add, & compensation_xxxx_add)
836
+ & compensation_xxx_add, & compensation_xxxx_add,
837
+ & num_consecutive_same_value, & prev_value)
788
838
789
839
else :
790
840
@@ -800,9 +850,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
800
850
for j in range (end[i - 1 ], e):
801
851
add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
802
852
& compensation_x_add, & compensation_xx_add,
803
- & compensation_xxx_add, & compensation_xxxx_add)
853
+ & compensation_xxx_add, & compensation_xxxx_add,
854
+ & num_consecutive_same_value, & prev_value)
804
855
805
- output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
856
+ output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value )
806
857
807
858
if not is_monotonic_increasing_bounds:
808
859
nobs = 0
0 commit comments