Merge pull request #7547 from toddrjen/nanopsclean

jreback · jreback · commit f8b101c5323a · 2014-06-26T06:26:02.000-04:00
Minor cleanups for nanops
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -2175,94 +2175,98 @@ def is_number(obj):
     return isinstance(obj, (numbers.Number, np.number))
 
 
-def is_integer_dtype(arr_or_dtype):
+def _get_dtype(arr_or_dtype):
     if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    else:
-        tipo = arr_or_dtype.dtype.type
-    return (issubclass(tipo, np.integer) and not
-            (issubclass(tipo, np.datetime64) or
-             issubclass(tipo, np.timedelta64)))
+        return arr_or_dtype
+    if isinstance(arr_or_dtype, type):
+        return np.dtype(arr_or_dtype)
+    return arr_or_dtype.dtype
 
 
-def _is_int_or_datetime_dtype(arr_or_dtype):
-    # also timedelta64
+def _get_dtype_type(arr_or_dtype):
     if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    else:
-        tipo = arr_or_dtype.dtype.type
+        return arr_or_dtype.type
+    if isinstance(arr_or_dtype, type):
+        return np.dtype(arr_or_dtype).type
+    return arr_or_dtype.dtype.type
+
+
+def _is_any_int_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.integer)
 
 
+def is_integer_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return (issubclass(tipo, np.integer) and
+            not issubclass(tipo, (np.datetime64, np.timedelta64)))
+
+
+def _is_int_or_datetime_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return (issubclass(tipo, np.integer) or
+            issubclass(tipo, (np.datetime64, np.timedelta64)))
+
+
 def is_datetime64_dtype(arr_or_dtype):
-    if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    elif isinstance(arr_or_dtype, type):
-        tipo = np.dtype(arr_or_dtype).type
-    else:
-        tipo = arr_or_dtype.dtype.type
+    tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.datetime64)
 
 
 def is_datetime64_ns_dtype(arr_or_dtype):
-    if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype
-    elif isinstance(arr_or_dtype, type):
-        tipo = np.dtype(arr_or_dtype)
-    else:
-        tipo = arr_or_dtype.dtype
+    tipo = _get_dtype(arr_or_dtype)
     return tipo == _NS_DTYPE
 
 
 def is_timedelta64_dtype(arr_or_dtype):
-    if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    elif isinstance(arr_or_dtype, type):
-        tipo = np.dtype(arr_or_dtype).type
-    else:
-        tipo = arr_or_dtype.dtype.type
+    tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.timedelta64)
 
 
 def is_timedelta64_ns_dtype(arr_or_dtype):
-    if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    elif isinstance(arr_or_dtype, type):
-        tipo = np.dtype(arr_or_dtype).type
-    else:
-        tipo = arr_or_dtype.dtype.type
+    tipo = _get_dtype_type(arr_or_dtype)
     return tipo == _TD_DTYPE
 
 
-def needs_i8_conversion(arr_or_dtype):
-    return (is_datetime64_dtype(arr_or_dtype) or
-            is_timedelta64_dtype(arr_or_dtype))
+def _is_datetime_or_timedelta_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return issubclass(tipo, (np.datetime64, np.timedelta64))
+
+
+needs_i8_conversion = _is_datetime_or_timedelta_dtype
 
 
 def is_numeric_dtype(arr_or_dtype):
-    if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    else:
-        tipo = arr_or_dtype.dtype.type
+    tipo = _get_dtype_type(arr_or_dtype)
     return (issubclass(tipo, (np.number, np.bool_))
             and not issubclass(tipo, (np.datetime64, np.timedelta64)))
 
+
 def is_float_dtype(arr_or_dtype):
-    if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    else:
-        tipo = arr_or_dtype.dtype.type
+    tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.floating)
 
 
+def _is_floating_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return isinstance(tipo, np.floating)
+
+
+def is_bool_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return issubclass(tipo, np.bool_)
+
+
 def is_complex_dtype(arr_or_dtype):
-    if isinstance(arr_or_dtype, np.dtype):
-        tipo = arr_or_dtype.type
-    else:
-        tipo = arr_or_dtype.dtype.type
+    tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.complexfloating)
 
 
+def is_object_dtype(arr_or_dtype):
+    tipo = _get_dtype_type(arr_or_dtype)
+    return issubclass(tipo, np.object_)
+
+
 def is_re(obj):
     return isinstance(obj, re._pattern_type)
 
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -14,7 +14,17 @@
 import pandas.hashtable as _hash
 from pandas import compat, lib, algos, tslib
 from pandas.compat import builtins
-from pandas.core.common import isnull, notnull, _values_from_object, is_float
+from pandas.core.common import (isnull, notnull, _values_from_object,
+                                _maybe_upcast_putmask,
+                                ensure_float, _ensure_float64,
+                                _ensure_int64, _ensure_object,
+                                is_float, is_integer, is_complex,
+                                is_float_dtype, _is_floating_dtype,
+                                is_complex_dtype, is_integer_dtype,
+                                is_bool_dtype, is_object_dtype,
+                                is_datetime64_dtype, is_timedelta64_dtype,
+                                _is_datetime_or_timedelta_dtype,
+                                _is_int_or_datetime_dtype, _is_any_int_dtype)
 
 
 class disallow(object):
@@ -90,8 +100,8 @@ def f(values, axis=None, skipna=True, **kwds):
 
 def _bn_ok_dtype(dt, name):
     # Bottleneck chokes on datetime64
-    if dt != np.object_ and not issubclass(dt.type, (np.datetime64,
-                                                     np.timedelta64)):
+    if (not is_object_dtype(dt) and
+            not _is_datetime_or_timedelta_dtype(dt)):
 
         # bottleneck does not properly upcast during the sum
         # so can overflow
@@ -166,8 +176,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
 
         # promote if needed
         else:
-            values, changed = com._maybe_upcast_putmask(values, mask,
-                                                        fill_value)
+            values, changed = _maybe_upcast_putmask(values, mask, fill_value)
 
     elif copy:
         values = values.copy()
@@ -176,47 +185,42 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
 
     # return a platform independent precision dtype
     dtype_max = dtype
-    if dtype.kind == 'i' and not issubclass(dtype.type, (np.bool,
-                                                         np.datetime64,
-                                                         np.timedelta64)):
+    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
         dtype_max = np.int64
-    elif dtype.kind in ['b'] or issubclass(dtype.type, np.bool):
-        dtype_max = np.int64
-    elif dtype.kind in ['f']:
+    elif is_float_dtype(dtype):
         dtype_max = np.float64
 
     return values, mask, dtype, dtype_max
 
 
 def _isfinite(values):
-    if issubclass(values.dtype.type, (np.timedelta64, np.datetime64)):
+    if _is_datetime_or_timedelta_dtype(values):
         return isnull(values)
-    elif isinstance(values.dtype, object):
-        return ~np.isfinite(values.astype('float64'))
-
-    return ~np.isfinite(values)
+    if (is_complex_dtype(values) or is_float_dtype(values) or
+            is_integer_dtype(values) or is_bool_dtype(values)):
+        return ~np.isfinite(values)
+    return ~np.isfinite(values.astype('float64'))
 
 
 def _na_ok_dtype(dtype):
-    return not issubclass(dtype.type, (np.integer, np.datetime64,
-                                       np.timedelta64))
+    return not _is_int_or_datetime_dtype(dtype)
 
 
 def _view_if_needed(values):
-    if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)):
+    if _is_datetime_or_timedelta_dtype(values):
         return values.view(np.int64)
     return values
 
 
 def _wrap_results(result, dtype):
     """ wrap our results if needed """
 
-    if issubclass(dtype.type, np.datetime64):
+    if is_datetime64_dtype(dtype):
         if not isinstance(result, np.ndarray):
             result = lib.Timestamp(result)
         else:
             result = result.view(dtype)
-    elif issubclass(dtype.type, np.timedelta64):
+    elif is_timedelta64_dtype(dtype):
         if not isinstance(result, np.ndarray):
 
             # this is a scalar timedelta result!
@@ -334,7 +338,7 @@ def _get_counts_nanvar(mask, axis, ddof):
 @disallow('M8')
 @bottleneck_switch(ddof=1)
 def nanvar(values, axis=None, skipna=True, ddof=1):
-    if not isinstance(values.dtype.type, np.floating):
+    if not _is_floating_dtype(values):
         values = values.astype('f8')
 
     mask = isnull(values)
@@ -353,7 +357,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1):
 def nansem(values, axis=None, skipna=True, ddof=1):
     var = nanvar(values, axis, skipna, ddof=ddof)
 
-    if not isinstance(values.dtype.type, np.floating):
+    if not _is_floating_dtype(values):
         values = values.astype('f8')
     mask = isnull(values)
     count, _ = _get_counts_nanvar(mask, axis, ddof)
@@ -367,7 +371,7 @@ def nanmin(values, axis=None, skipna=True):
                                                  fill_value_typ='+inf')
 
     # numpy 1.6.1 workaround in Python 3.x
-    if (values.dtype == np.object_ and compat.PY3):
+    if is_object_dtype(values) and compat.PY3:
         if values.ndim > 1:
             apply_ax = axis if axis is not None else 0
             result = np.apply_along_axis(builtins.min, apply_ax, values)
@@ -380,7 +384,7 @@ def nanmin(values, axis=None, skipna=True):
         if ((axis is not None and values.shape[axis] == 0)
                 or values.size == 0):
             try:
-                result = com.ensure_float(values.sum(axis, dtype=dtype_max))
+                result = ensure_float(values.sum(axis, dtype=dtype_max))
                 result.fill(np.nan)
             except:
                 result = np.nan
@@ -397,7 +401,7 @@ def nanmax(values, axis=None, skipna=True):
                                                  fill_value_typ='-inf')
 
     # numpy 1.6.1 workaround in Python 3.x
-    if (values.dtype == np.object_ and compat.PY3):
+    if is_object_dtype(values) and compat.PY3:
 
         if values.ndim > 1:
             apply_ax = axis if axis is not None else 0
@@ -411,7 +415,7 @@ def nanmax(values, axis=None, skipna=True):
         if ((axis is not None and values.shape[axis] == 0)
                 or values.size == 0):
             try:
-                result = com.ensure_float(values.sum(axis, dtype=dtype_max))
+                result = ensure_float(values.sum(axis, dtype=dtype_max))
                 result.fill(np.nan)
             except:
                 result = np.nan
@@ -446,7 +450,7 @@ def nanargmin(values, axis=None, skipna=True):
 
 @disallow('M8')
 def nanskew(values, axis=None, skipna=True):
-    if not isinstance(values.dtype.type, np.floating):
+    if not _is_floating_dtype(values):
         values = values.astype('f8')
 
     mask = isnull(values)
@@ -480,7 +484,7 @@ def nanskew(values, axis=None, skipna=True):
 
 @disallow('M8')
 def nankurt(values, axis=None, skipna=True):
-    if not isinstance(values.dtype.type, np.floating):
+    if not _is_floating_dtype(values):
         values = values.astype('f8')
 
     mask = isnull(values)
@@ -515,7 +519,7 @@ def nankurt(values, axis=None, skipna=True):
 @disallow('M8')
 def nanprod(values, axis=None, skipna=True):
     mask = isnull(values)
-    if skipna and not issubclass(values.dtype.type, np.integer):
+    if skipna and not _is_any_int_dtype(values):
         values = values.copy()
         values[mask] = 1
     result = values.prod(axis)
@@ -644,17 +648,17 @@ def nancov(a, b, min_periods=None):
 
 def _ensure_numeric(x):
     if isinstance(x, np.ndarray):
-        if x.dtype.kind in ['i', 'b']:
+        if is_integer_dtype(x) or is_bool_dtype(x):
             x = x.astype(np.float64)
-        elif x.dtype == np.object_:
+        elif is_object_dtype(x):
             try:
                 x = x.astype(np.complex128)
             except:
                 x = x.astype(np.float64)
             else:
                 if not np.any(x.imag):
                     x = x.real
-    elif not (com.is_float(x) or com.is_integer(x) or com.is_complex(x)):
+    elif not (is_float(x) or is_integer(x) or is_complex(x)):
         try:
             x = float(x)
         except Exception:
@@ -678,7 +682,7 @@ def f(x, y):
         result = op(x, y)
 
         if mask.any():
-            if result.dtype == np.bool_:
+            if is_bool_dtype(result):
                 result = result.astype('O')
             np.putmask(result, mask, np.nan)
 
@@ -699,16 +703,16 @@ def unique1d(values):
     """
     if np.issubdtype(values.dtype, np.floating):
         table = _hash.Float64HashTable(len(values))
-        uniques = np.array(table.unique(com._ensure_float64(values)),
+        uniques = np.array(table.unique(_ensure_float64(values)),
                            dtype=np.float64)
     elif np.issubdtype(values.dtype, np.datetime64):
         table = _hash.Int64HashTable(len(values))
-        uniques = table.unique(com._ensure_int64(values))
+        uniques = table.unique(_ensure_int64(values))
         uniques = uniques.view('M8[ns]')
     elif np.issubdtype(values.dtype, np.integer):
         table = _hash.Int64HashTable(len(values))
-        uniques = table.unique(com._ensure_int64(values))
+        uniques = table.unique(_ensure_int64(values))
     else:
         table = _hash.PyObjectHashTable(len(values))
-        uniques = table.unique(com._ensure_object(values))
+        uniques = table.unique(_ensure_object(values))
     return uniques