@@ -474,6 +474,13 @@ def nanargmin(values, axis=None, skipna=True):
474
474
475
475
@disallow ('M8' , 'm8' )
476
476
def nanskew (values , axis = None , skipna = True ):
477
+ """ Compute the sample skewness.
478
+
479
+ The statistic computed here is the adjusted Fisher-Pearson standardized
480
+ moment coefficient G1. The algorithm computes this coefficient directly
481
+ from the second and third central moment.
482
+
483
+ """
477
484
478
485
mask = isnull (values )
479
486
if not is_float_dtype (values .dtype ):
@@ -486,32 +493,48 @@ def nanskew(values, axis=None, skipna=True):
486
493
values = values .copy ()
487
494
np .putmask (values , mask , 0 )
488
495
489
- typ = values .dtype .type
490
- A = values .sum (axis ) / count
491
- B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
492
- C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
496
+ mean = values .sum (axis , dtype = np .float64 ) / count
497
+ if axis is not None :
498
+ mean = np .expand_dims (mean , axis )
499
+
500
+ adjusted = values - mean
501
+ if skipna :
502
+ np .putmask (adjusted , mask , 0 )
503
+ adjusted2 = adjusted ** 2
504
+ adjusted3 = adjusted2 * adjusted
505
+ m2 = adjusted2 .sum (axis , dtype = np .float64 )
506
+ m3 = adjusted3 .sum (axis , dtype = np .float64 )
493
507
494
508
# floating point error
495
- B = _zero_out_fperr (B )
496
- C = _zero_out_fperr (C )
509
+ m2 = _zero_out_fperr (m2 )
510
+ m3 = _zero_out_fperr (m3 )
497
511
498
- result = ((np .sqrt (count * count - count ) * C ) /
499
- ((count - typ (2 )) * np .sqrt (B )** typ (3 )))
512
+ result = (count * (count - 1 ) ** 0.5 / (count - 2 )) * (m3 / m2 ** 1.5 )
513
+
514
+ dtype = values .dtype
515
+ if is_float_dtype (dtype ):
516
+ result = result .astype (dtype )
500
517
501
518
if isinstance (result , np .ndarray ):
502
- result = np .where (B == 0 , 0 , result )
519
+ result = np .where (m2 == 0 , 0 , result )
503
520
result [count < 3 ] = np .nan
504
521
return result
505
522
else :
506
- result = 0 if B == 0 else result
523
+ result = 0 if m2 == 0 else result
507
524
if count < 3 :
508
525
return np .nan
509
526
return result
510
527
511
528
512
529
@disallow ('M8' , 'm8' )
513
530
def nankurt (values , axis = None , skipna = True ):
531
+ """ Compute the sample skewness.
532
+
533
+ The statistic computed here is the adjusted Fisher-Pearson standardized
534
+ moment coefficient G2, computed directly from the second and fourth
535
+ central moment.
514
536
537
+ """
515
538
mask = isnull (values )
516
539
if not is_float_dtype (values .dtype ):
517
540
values = values .astype ('f8' )
@@ -523,30 +546,43 @@ def nankurt(values, axis=None, skipna=True):
523
546
values = values .copy ()
524
547
np .putmask (values , mask , 0 )
525
548
526
- typ = values .dtype .type
527
- A = values .sum (axis ) / count
528
- B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
529
- C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
530
- D = ((values ** 4 ).sum (axis ) / count - A ** typ (4 ) -
531
- typ (6 ) * B * A * A - typ (4 ) * C * A )
549
+ mean = values .sum (axis , dtype = np .float64 ) / count
550
+ if axis is not None :
551
+ mean = np .expand_dims (mean , axis )
552
+
553
+ adjusted = values - mean
554
+ if skipna :
555
+ np .putmask (adjusted , mask , 0 )
556
+ adjusted2 = adjusted ** 2
557
+ adjusted4 = adjusted2 ** 2
558
+ m2 = adjusted2 .sum (axis , dtype = np .float64 )
559
+ m4 = adjusted4 .sum (axis , dtype = np .float64 )
560
+
561
+ adj = 3 * (count - 1 ) ** 2 / ((count - 2 ) * (count - 3 ))
562
+ numer = count * (count + 1 ) * (count - 1 ) * m4
563
+ denom = (count - 2 ) * (count - 3 ) * m2 ** 2
564
+ result = numer / denom - adj
532
565
533
- B = _zero_out_fperr (B )
534
- D = _zero_out_fperr (D )
566
+ # floating point error
567
+ numer = _zero_out_fperr (numer )
568
+ denom = _zero_out_fperr (denom )
535
569
536
- if not isinstance (B , np .ndarray ):
537
- # if B is a scalar, check these corner cases first before doing
538
- # division
570
+ if not isinstance (denom , np .ndarray ):
571
+ # if ``denom`` is a scalar, check these corner cases first before
572
+ # doing division
539
573
if count < 4 :
540
574
return np .nan
541
- if B == 0 :
575
+ if denom == 0 :
542
576
return 0
543
577
544
- result = (((count * count - typ (1 )) * D / (B * B ) - typ (3 ) *
545
- ((count - typ (1 ))** typ (2 ))) / ((count - typ (2 )) *
546
- (count - typ (3 ))))
578
+ result = numer / denom - adj
579
+
580
+ dtype = values .dtype
581
+ if is_float_dtype (dtype ):
582
+ result = result .astype (dtype )
547
583
548
584
if isinstance (result , np .ndarray ):
549
- result = np .where (B == 0 , 0 , result )
585
+ result = np .where (denom == 0 , 0 , result )
550
586
result [count < 4 ] = np .nan
551
587
552
588
return result
0 commit comments