1
1
import itertools
2
2
import functools
3
3
import numpy as np
4
+ import operator
4
5
5
6
try :
6
7
import bottleneck as bn
10
11
11
12
import pandas .hashtable as _hash
12
13
from pandas import compat , lib , algos , tslib
13
- from pandas .compat import builtins
14
14
from pandas .core .common import (isnull , notnull , _values_from_object ,
15
- _maybe_upcast_putmask ,
16
- ensure_float , _ensure_float64 ,
17
- _ensure_int64 , _ensure_object ,
18
- is_float , is_integer , is_complex ,
19
- is_float_dtype ,
15
+ _maybe_upcast_putmask , _ensure_float64 ,
16
+ _ensure_int64 , _ensure_object , is_float ,
17
+ is_integer , is_complex , is_float_dtype ,
20
18
is_complex_dtype , is_integer_dtype ,
21
19
is_bool_dtype , is_object_dtype ,
22
20
is_datetime64_dtype , is_timedelta64_dtype ,
26
24
27
25
28
26
class disallow (object ):
29
-
30
27
def __init__ (self , * dtypes ):
31
28
super (disallow , self ).__init__ ()
32
29
self .dtypes = tuple (np .dtype (dtype ).type for dtype in dtypes )
@@ -41,8 +38,8 @@ def _f(*args, **kwargs):
41
38
obj_iter = itertools .chain (args , compat .itervalues (kwargs ))
42
39
if any (self .check (obj ) for obj in obj_iter ):
43
40
raise TypeError ('reduction operation {0!r} not allowed for '
44
- 'this dtype' .format (f . __name__ . replace ( 'nan' ,
45
- '' )))
41
+ 'this dtype' .format (
42
+ f . __name__ . replace ( 'nan' , '' )))
46
43
try :
47
44
return f (* args , ** kwargs )
48
45
except ValueError as e :
@@ -53,11 +50,11 @@ def _f(*args, **kwargs):
53
50
if is_object_dtype (args [0 ]):
54
51
raise TypeError (e )
55
52
raise
53
+
56
54
return _f
57
55
58
56
59
57
class bottleneck_switch (object ):
60
-
61
58
def __init__ (self , zero_value = None , ** kwargs ):
62
59
self .zero_value = zero_value
63
60
self .kwargs = kwargs
@@ -91,8 +88,8 @@ def f(values, axis=None, skipna=True, **kwds):
91
88
result .fill (0 )
92
89
return result
93
90
94
- if _USE_BOTTLENECK and skipna and _bn_ok_dtype ( values . dtype ,
95
- bn_name ):
91
+ if ( _USE_BOTTLENECK and skipna and
92
+ _bn_ok_dtype ( values . dtype , bn_name ) ):
96
93
result = bn_func (values , axis = axis , ** kwds )
97
94
98
95
# prefer to treat inf/-inf as NA, but must compute the func
@@ -121,8 +118,7 @@ def f(values, axis=None, skipna=True, **kwds):
121
118
122
119
def _bn_ok_dtype (dt , name ):
123
120
# Bottleneck chokes on datetime64
124
- if (not is_object_dtype (dt ) and
125
- not is_datetime_or_timedelta_dtype (dt )):
121
+ if (not is_object_dtype (dt ) and not is_datetime_or_timedelta_dtype (dt )):
126
122
127
123
# bottleneck does not properly upcast during the sum
128
124
# so can overflow
@@ -142,7 +138,7 @@ def _has_infs(result):
142
138
return lib .has_infs_f4 (result .ravel ())
143
139
try :
144
140
return np .isinf (result ).any ()
145
- except (TypeError , NotImplementedError ) as e :
141
+ except (TypeError , NotImplementedError ):
146
142
# if it doesn't support infs, then it can't have infs
147
143
return False
148
144
@@ -173,8 +169,9 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
173
169
def _get_values (values , skipna , fill_value = None , fill_value_typ = None ,
174
170
isfinite = False , copy = True ):
175
171
""" utility to get the values view, mask, dtype
176
- if necessary copy and mask using the specified fill_value
177
- copy = True will force the copy """
172
+ if necessary copy and mask using the specified fill_value
173
+ copy = True will force the copy
174
+ """
178
175
values = _values_from_object (values )
179
176
if isfinite :
180
177
mask = _isfinite (values )
@@ -331,7 +328,8 @@ def get_median(x):
331
328
if values .ndim > 1 :
332
329
# there's a non-empty array to apply over otherwise numpy raises
333
330
if notempty :
334
- return _wrap_results (np .apply_along_axis (get_median , axis , values ), dtype )
331
+ return _wrap_results (
332
+ np .apply_along_axis (get_median , axis , values ), dtype )
335
333
336
334
# must return the correct shape, but median is not defined for the
337
335
# empty set so return nans of shape "everything but the passed axis"
@@ -400,7 +398,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1):
400
398
avg = _ensure_numeric (values .sum (axis = axis , dtype = np .float64 )) / count
401
399
if axis is not None :
402
400
avg = np .expand_dims (avg , axis )
403
- sqr = _ensure_numeric ((avg - values ) ** 2 )
401
+ sqr = _ensure_numeric ((avg - values )** 2 )
404
402
np .putmask (sqr , mask , 0 )
405
403
result = sqr .sum (axis = axis , dtype = np .float64 ) / d
406
404
@@ -429,13 +427,10 @@ def _nanminmax(meth, fill_value_typ):
429
427
@bottleneck_switch ()
430
428
def reduction (values , axis = None , skipna = True ):
431
429
values , mask , dtype , dtype_max = _get_values (
432
- values ,
433
- skipna ,
434
- fill_value_typ = fill_value_typ ,
435
- )
430
+ values , skipna , fill_value_typ = fill_value_typ , )
436
431
437
- if ((axis is not None and values .shape [axis ] == 0 )
438
- or values .size == 0 ):
432
+ if ((axis is not None and values .shape [axis ] == 0 ) or
433
+ values .size == 0 ):
439
434
try :
440
435
result = getattr (values , meth )(axis , dtype = dtype_max )
441
436
result .fill (np .nan )
@@ -477,7 +472,7 @@ def nanargmin(values, axis=None, skipna=True):
477
472
return result
478
473
479
474
480
- @disallow ('M8' ,'m8' )
475
+ @disallow ('M8' , 'm8' )
481
476
def nanskew (values , axis = None , skipna = True ):
482
477
483
478
mask = isnull (values )
@@ -493,15 +488,15 @@ def nanskew(values, axis=None, skipna=True):
493
488
494
489
typ = values .dtype .type
495
490
A = values .sum (axis ) / count
496
- B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
497
- C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
491
+ B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
492
+ C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
498
493
499
494
# floating point error
500
495
B = _zero_out_fperr (B )
501
496
C = _zero_out_fperr (C )
502
497
503
498
result = ((np .sqrt (count * count - count ) * C ) /
504
- ((count - typ (2 )) * np .sqrt (B ) ** typ (3 )))
499
+ ((count - typ (2 )) * np .sqrt (B )** typ (3 )))
505
500
506
501
if isinstance (result , np .ndarray ):
507
502
result = np .where (B == 0 , 0 , result )
@@ -514,7 +509,7 @@ def nanskew(values, axis=None, skipna=True):
514
509
return result
515
510
516
511
517
- @disallow ('M8' ,'m8' )
512
+ @disallow ('M8' , 'm8' )
518
513
def nankurt (values , axis = None , skipna = True ):
519
514
520
515
mask = isnull (values )
@@ -530,22 +525,25 @@ def nankurt(values, axis=None, skipna=True):
530
525
531
526
typ = values .dtype .type
532
527
A = values .sum (axis ) / count
533
- B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
534
- C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
535
- D = (values ** 4 ).sum (axis ) / count - A ** typ (4 ) - typ (6 ) * B * A * A - typ (4 ) * C * A
528
+ B = (values ** 2 ).sum (axis ) / count - A ** typ (2 )
529
+ C = (values ** 3 ).sum (axis ) / count - A ** typ (3 ) - typ (3 ) * A * B
530
+ D = ((values ** 4 ).sum (axis ) / count - A ** typ (4 ) -
531
+ typ (6 ) * B * A * A - typ (4 ) * C * A )
536
532
537
533
B = _zero_out_fperr (B )
538
534
D = _zero_out_fperr (D )
539
535
540
536
if not isinstance (B , np .ndarray ):
541
- # if B is a scalar, check these corner cases first before doing division
537
+ # if B is a scalar, check these corner cases first before doing
538
+ # division
542
539
if count < 4 :
543
540
return np .nan
544
541
if B == 0 :
545
542
return 0
546
543
547
- result = (((count * count - typ (1 )) * D / (B * B ) - typ (3 ) * ((count - typ (1 )) ** typ (2 ))) /
548
- ((count - typ (2 )) * (count - typ (3 ))))
544
+ result = (((count * count - typ (1 )) * D / (B * B ) - typ (3 ) *
545
+ ((count - typ (1 ))** typ (2 ))) / ((count - typ (2 )) *
546
+ (count - typ (3 ))))
549
547
550
548
if isinstance (result , np .ndarray ):
551
549
result = np .where (B == 0 , 0 , result )
@@ -554,7 +552,7 @@ def nankurt(values, axis=None, skipna=True):
554
552
return result
555
553
556
554
557
- @disallow ('M8' ,'m8' )
555
+ @disallow ('M8' , 'm8' )
558
556
def nanprod (values , axis = None , skipna = True ):
559
557
mask = isnull (values )
560
558
if skipna and not is_any_int_dtype (values ):
@@ -621,7 +619,7 @@ def _zero_out_fperr(arg):
621
619
return arg .dtype .type (0 ) if np .abs (arg ) < 1e-14 else arg
622
620
623
621
624
- @disallow ('M8' ,'m8' )
622
+ @disallow ('M8' , 'm8' )
625
623
def nancorr (a , b , method = 'pearson' , min_periods = None ):
626
624
"""
627
625
a, b: ndarrays
@@ -668,7 +666,7 @@ def _spearman(a, b):
668
666
return _cor_methods [method ]
669
667
670
668
671
- @disallow ('M8' ,'m8' )
669
+ @disallow ('M8' , 'm8' )
672
670
def nancov (a , b , min_periods = None ):
673
671
if len (a ) != len (b ):
674
672
raise AssertionError ('Operands to nancov must have same size' )
@@ -711,8 +709,6 @@ def _ensure_numeric(x):
711
709
712
710
# NA-friendly array comparisons
713
711
714
- import operator
715
-
716
712
717
713
def make_nancomp (op ):
718
714
def f (x , y ):
@@ -728,8 +724,10 @@ def f(x, y):
728
724
np .putmask (result , mask , np .nan )
729
725
730
726
return result
727
+
731
728
return f
732
729
730
+
733
731
nangt = make_nancomp (operator .gt )
734
732
nange = make_nancomp (operator .ge )
735
733
nanlt = make_nancomp (operator .lt )
0 commit comments