@@ -409,8 +409,9 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
409
409
410
410
411
411
cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
412
- float64_t x, float64_t xx,
413
- float64_t xxx) nogil:
412
+ float64_t x, float64_t xx, float64_t xxx,
413
+ int64_t num_consecutive_same_value
414
+ ) nogil:
414
415
cdef:
415
416
float64_t result, dnobs
416
417
float64_t A, B, C, R
@@ -421,6 +422,12 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
421
422
B = xx / dnobs - A * A
422
423
C = xxx / dnobs - A * A * A - 3 * A * B
423
424
425
+ if nobs < 3 :
426
+ result = NaN
427
+ # GH 42064 46431
428
+ # uniform case, force result to be 0
429
+ elif num_consecutive_same_value >= nobs:
430
+ result = 0.0
424
431
# #18044: with uniform distribution, floating issue will
425
432
# cause B != 0. and cause the result is a very
426
433
# large number.
@@ -430,7 +437,7 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
430
437
# if the variance is less than 1e-14, it could be
431
438
# treat as zero, here we follow the original
432
439
# skew/kurt behaviour to check B <= 1e-14
433
- if B <= 1e-14 or nobs < 3 :
440
+ elif B <= 1e-14 :
434
441
result = NaN
435
442
else :
436
443
R = sqrt(B)
@@ -447,7 +454,10 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
447
454
float64_t * xxx,
448
455
float64_t * compensation_x,
449
456
float64_t * compensation_xx,
450
- float64_t * compensation_xxx) nogil:
457
+ float64_t * compensation_xxx,
458
+ int64_t * num_consecutive_same_value,
459
+ float64_t * prev_value,
460
+ ) nogil:
451
461
""" add a value from the skew calc """
452
462
cdef:
453
463
float64_t y, t
@@ -469,6 +479,14 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
469
479
compensation_xxx[0 ] = t - xxx[0 ] - y
470
480
xxx[0 ] = t
471
481
482
+ # GH#42064, record num of same values to remove floating point artifacts
483
+ if val == prev_value[0 ]:
484
+ num_consecutive_same_value[0 ] += 1
485
+ else :
486
+ # reset to 1 (include current value itself)
487
+ num_consecutive_same_value[0 ] = 1
488
+ prev_value[0 ] = val
489
+
472
490
473
491
cdef inline void remove_skew(float64_t val, int64_t * nobs,
474
492
float64_t * x, float64_t * xx,
@@ -507,8 +525,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
507
525
float64_t compensation_xx_add , compensation_xx_remove
508
526
float64_t compensation_x_add , compensation_x_remove
509
527
float64_t x , xx , xxx
528
+ float64_t prev_value
510
529
int64_t nobs = 0 , N = len (start), V = len (values), nobs_mean = 0
511
- int64_t s , e
530
+ int64_t s , e , num_consecutive_same_value
512
531
ndarray[float64_t] output , mean_array , values_copy
513
532
bint is_monotonic_increasing_bounds
514
533
@@ -542,6 +561,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
542
561
# never removed
543
562
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
544
563
564
+ prev_value = values[s]
565
+ num_consecutive_same_value = 0
566
+
545
567
compensation_xxx_add = compensation_xxx_remove = 0
546
568
compensation_xx_add = compensation_xx_remove = 0
547
569
compensation_x_add = compensation_x_remove = 0
@@ -550,7 +572,8 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
550
572
for j in range (s, e):
551
573
val = values_copy[j]
552
574
add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
553
- & compensation_xx_add, & compensation_xxx_add)
575
+ & compensation_xx_add, & compensation_xxx_add,
576
+ & num_consecutive_same_value, & prev_value)
554
577
555
578
else :
556
579
@@ -566,9 +589,10 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
566
589
for j in range (end[i - 1 ], e):
567
590
val = values_copy[j]
568
591
add_skew(val, & nobs, & x, & xx, & xxx, & compensation_x_add,
569
- & compensation_xx_add, & compensation_xxx_add)
592
+ & compensation_xx_add, & compensation_xxx_add,
593
+ & num_consecutive_same_value, & prev_value)
570
594
571
- output[i] = calc_skew(minp, nobs, x, xx, xxx)
595
+ output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value )
572
596
573
597
if not is_monotonic_increasing_bounds:
574
598
nobs = 0
@@ -584,35 +608,44 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
584
608
585
609
cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs,
586
610
float64_t x, float64_t xx,
587
- float64_t xxx, float64_t xxxx) nogil:
611
+ float64_t xxx, float64_t xxxx,
612
+ int64_t num_consecutive_same_value,
613
+ ) nogil:
588
614
cdef:
589
615
float64_t result, dnobs
590
616
float64_t A, B, C, D, R, K
591
617
592
618
if nobs >= minp:
593
- dnobs = < float64_t> nobs
594
- A = x / dnobs
595
- R = A * A
596
- B = xx / dnobs - R
597
- R = R * A
598
- C = xxx / dnobs - R - 3 * A * B
599
- R = R * A
600
- D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
601
-
602
- # #18044: with uniform distribution, floating issue will
603
- # cause B != 0. and cause the result is a very
604
- # large number.
605
- #
606
- # in core/nanops.py nanskew/nankurt call the function
607
- # _zero_out_fperr(m2) to fix floating error.
608
- # if the variance is less than 1e-14, it could be
609
- # treat as zero, here we follow the original
610
- # skew/kurt behaviour to check B <= 1e-14
611
- if B <= 1e-14 or nobs < 4 :
619
+ if nobs < 4 :
612
620
result = NaN
621
+ # GH 42064 46431
622
+ # uniform case, force result to be -3.
623
+ elif num_consecutive_same_value >= nobs:
624
+ result = - 3.
613
625
else :
614
- K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
615
- result = K / ((dnobs - 2. ) * (dnobs - 3. ))
626
+ dnobs = < float64_t> nobs
627
+ A = x / dnobs
628
+ R = A * A
629
+ B = xx / dnobs - R
630
+ R = R * A
631
+ C = xxx / dnobs - R - 3 * A * B
632
+ R = R * A
633
+ D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
634
+
635
+ # #18044: with uniform distribution, floating issue will
636
+ # cause B != 0. and cause the result is a very
637
+ # large number.
638
+ #
639
+ # in core/nanops.py nanskew/nankurt call the function
640
+ # _zero_out_fperr(m2) to fix floating error.
641
+ # if the variance is less than 1e-14, it could be
642
+ # treat as zero, here we follow the original
643
+ # skew/kurt behaviour to check B <= 1e-14
644
+ if B <= 1e-14 :
645
+ result = NaN
646
+ else :
647
+ K = (dnobs * dnobs - 1. ) * D / (B * B) - 3 * ((dnobs - 1. ) ** 2 )
648
+ result = K / ((dnobs - 2. ) * (dnobs - 3. ))
616
649
else :
617
650
result = NaN
618
651
@@ -625,7 +658,10 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
625
658
float64_t * compensation_x,
626
659
float64_t * compensation_xx,
627
660
float64_t * compensation_xxx,
628
- float64_t * compensation_xxxx) nogil:
661
+ float64_t * compensation_xxxx,
662
+ int64_t * num_consecutive_same_value,
663
+ float64_t * prev_value
664
+ ) nogil:
629
665
""" add a value from the kurotic calc """
630
666
cdef:
631
667
float64_t y, t
@@ -651,6 +687,14 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
651
687
compensation_xxxx[0 ] = t - xxxx[0 ] - y
652
688
xxxx[0 ] = t
653
689
690
+ # GH#42064, record num of same values to remove floating point artifacts
691
+ if val == prev_value[0 ]:
692
+ num_consecutive_same_value[0 ] += 1
693
+ else :
694
+ # reset to 1 (include current value itself)
695
+ num_consecutive_same_value[0 ] = 1
696
+ prev_value[0 ] = val
697
+
654
698
655
699
cdef inline void remove_kurt(float64_t val, int64_t * nobs,
656
700
float64_t * x, float64_t * xx,
@@ -695,7 +739,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
695
739
float64_t compensation_xx_remove , compensation_xx_add
696
740
float64_t compensation_x_remove , compensation_x_add
697
741
float64_t x , xx , xxx , xxxx
698
- int64_t nobs , s , e , N = len (start), V = len (values), nobs_mean = 0
742
+ float64_t prev_value
743
+ int64_t nobs , s , e , num_consecutive_same_value
744
+ int64_t N = len (start), V = len (values), nobs_mean = 0
699
745
ndarray[float64_t] output , values_copy
700
746
bint is_monotonic_increasing_bounds
701
747
@@ -729,6 +775,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
729
775
# never removed
730
776
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
731
777
778
+ prev_value = values[s]
779
+ num_consecutive_same_value = 0
780
+
732
781
compensation_xxxx_add = compensation_xxxx_remove = 0
733
782
compensation_xxx_remove = compensation_xxx_add = 0
734
783
compensation_xx_remove = compensation_xx_add = 0
@@ -738,7 +787,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
738
787
for j in range (s, e):
739
788
add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
740
789
& compensation_x_add, & compensation_xx_add,
741
- & compensation_xxx_add, & compensation_xxxx_add)
790
+ & compensation_xxx_add, & compensation_xxxx_add,
791
+ & num_consecutive_same_value, & prev_value)
742
792
743
793
else :
744
794
@@ -754,9 +804,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
754
804
for j in range (end[i - 1 ], e):
755
805
add_kurt(values_copy[j], & nobs, & x, & xx, & xxx, & xxxx,
756
806
& compensation_x_add, & compensation_xx_add,
757
- & compensation_xxx_add, & compensation_xxxx_add)
807
+ & compensation_xxx_add, & compensation_xxxx_add,
808
+ & num_consecutive_same_value, & prev_value)
758
809
759
- output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
810
+ output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value )
760
811
761
812
if not is_monotonic_increasing_bounds:
762
813
nobs = 0
0 commit comments