Merge pull request #11356 from jreback/warnings

jreback · jreback · commit 5d953e3fba42 · 2015-10-18T19:52:37.000-04:00
WIP: avoid some numpy warnings #8537
diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -41,6 +41,7 @@ API changes
 - Regression from 0.16.2 for output formatting of long floats/nan, restored in (:issue:`11302`)
 - Prettyprinting sets (e.g. in DataFrame cells) now uses set literal syntax (``{x, y}``) instead of
   Legacy Python syntax (``set([x, y])``) (:issue:`11215`)
+- Indexing with a null key will raise a ``TypeError``, instead of a ``ValueError`` (:issue:`11356`)
 
 .. _whatsnew_0171.deprecations:
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -444,14 +444,24 @@ def mask_missing(arr, values_to_mask):
     mask = None
     for x in nonna:
         if mask is None:
-            mask = arr == x
+
+            # numpy elementwise comparison warning
+            if is_numeric_v_string_like(arr, x):
+                mask = False
+            else:
+                mask = arr == x
 
             # if x is a string and arr is not, then we get False and we must
             # expand the mask to size arr.shape
             if np.isscalar(mask):
                 mask = np.zeros(arr.shape, dtype=bool)
         else:
-            mask |= arr == x
+
+            # numpy elementwise comparison warning
+            if is_numeric_v_string_like(arr, x):
+                mask |= False
+            else:
+                mask |= arr == x
 
     if na_mask.any():
         if mask is None:
@@ -2382,6 +2392,9 @@ def _maybe_make_list(obj):
 is_complex = lib.is_complex
 
 
+def is_string_like(obj):
+    return isinstance(obj, (compat.text_type, compat.string_types))
+
 def is_iterator(obj):
     # python 3 generators have __next__ instead of next
     return hasattr(obj, 'next') or hasattr(obj, '__next__')
@@ -2525,6 +2538,27 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype):
     return issubclass(tipo, (np.datetime64, np.timedelta64))
 
 
+def is_numeric_v_string_like(a, b):
+    """
+    numpy doesn't like to compare numeric arrays vs scalar string-likes
+
+    return a boolean result if this is the case for a,b or b,a
+
+    """
+    is_a_array = isinstance(a, np.ndarray)
+    is_b_array = isinstance(b, np.ndarray)
+
+    is_a_numeric_array = is_a_array and is_numeric_dtype(a)
+    is_b_numeric_array = is_b_array and is_numeric_dtype(b)
+
+    is_a_scalar_string_like = not is_a_array and is_string_like(a)
+    is_b_scalar_string_like = not is_b_array and is_string_like(b)
+
+    return (
+        is_a_numeric_array and is_b_scalar_string_like) or (
+        is_b_numeric_array and is_a_scalar_string_like
+        )
+
 def is_datetimelike_v_numeric(a, b):
     # return if we have an i8 convertible and numeric comparision
     if not hasattr(a,'dtype'):
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -862,9 +862,10 @@ def to_int():
             return self._invalid_indexer('label', key)
 
         if is_float(key):
-            if not self.is_floating():
-                warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format(
-                    type(self).__name__), FutureWarning, stacklevel=3)
+            if isnull(key):
+                return self._invalid_indexer('label', key)
+            warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format(
+                type(self).__name__), FutureWarning, stacklevel=3)
             return to_int()
 
         return key
@@ -3721,9 +3722,23 @@ def astype(self, dtype):
         return Index(self._values, name=self.name, dtype=dtype)
 
     def _convert_scalar_indexer(self, key, kind=None):
+        """
+        convert a scalar indexer
+
+        Parameters
+        ----------
+        key : label of the slice bound
+        kind : optional, type of the indexing operation (loc/ix/iloc/None)
+
+        right now we are converting
+        floats -> ints if the index supports it
+        """
+
         if kind == 'iloc':
-            return super(Float64Index, self)._convert_scalar_indexer(key,
-                                                                     kind=kind)
+            if is_integer(key):
+                return key
+            return super(Float64Index, self)._convert_scalar_indexer(key, kind=kind)
+
         return key
 
     def _convert_slice_indexer(self, key, kind=None):
@@ -4276,7 +4291,7 @@ def _reference_duplicate_name(self, name):
         Returns True if the name refered to in self.names is duplicated.
         """
         # count the times name equals an element in self.names.
-        return np.sum(name == np.asarray(self.names)) > 1
+        return sum(name == n for n in self.names) > 1
 
     def _format_native_types(self, **kwargs):
         return self.values
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1285,7 +1285,7 @@ def _has_valid_type(self, key, axis):
 
             def error():
                 if isnull(key):
-                    raise ValueError(
+                    raise TypeError(
                         "cannot use label indexing with a null key")
                 raise KeyError("the label [%s] is not in the [%s]" %
                                (key, self.obj._get_axis_name(axis)))
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -17,7 +17,7 @@
                                 is_datetime64tz_dtype, is_datetimetz, is_sparse,
                                 array_equivalent, _maybe_convert_string_to_object,
                                 is_categorical, needs_i8_conversion, is_datetimelike_v_numeric,
-                                is_internal_type)
+                                is_numeric_v_string_like, is_internal_type)
 from pandas.core.dtypes import DatetimeTZDtype
 
 from pandas.core.index import Index, MultiIndex, _ensure_index
@@ -1082,8 +1082,16 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None):
         # get the result, may need to transpose the other
         def get_result(other):
 
-            # compute
-            result = func(values, other)
+            # avoid numpy warning of comparisons again None
+            if other is None:
+                result = not func.__name__ == 'eq'
+
+            # avoid numpy warning of elementwise comparisons to object
+            elif is_numeric_v_string_like(values, other):
+                result = False
+
+            else:
+                result = func(values, other)
 
             # mask if needed
             if isinstance(values_mask, np.ndarray) and values_mask.any():
@@ -3214,7 +3222,7 @@ def get(self, item, fastpath=True):
         else:
 
             if isnull(item):
-                raise ValueError("cannot label index with a null key")
+                raise TypeError("cannot label index with a null key")
 
             indexer = self.items.get_indexer_for([item])
             return self.reindex_indexer(new_axis=self.items[indexer],
@@ -4251,11 +4259,16 @@ def _possibly_compare(a, b, op):
 
     # numpy deprecation warning to have i8 vs integer comparisions
     if is_datetimelike_v_numeric(a, b):
-        res = False
+        result = False
+
+    # numpy deprecation warning if comparing numeric vs string-like
+    elif is_numeric_v_string_like(a, b):
+        result = False
+
     else:
-        res = op(a, b)
+        result = op(a, b)
 
-    if np.isscalar(res) and (is_a_array or is_b_array):
+    if lib.isscalar(result) and (is_a_array or is_b_array):
         type_names = [type(a).__name__, type(b).__name__]
 
         if is_a_array:
@@ -4265,7 +4278,7 @@ def _possibly_compare(a, b, op):
             type_names[1] = 'ndarray(dtype=%s)' % b.dtype
 
         raise TypeError("Cannot compare types %r and %r" % tuple(type_names))
-    return res
+    return result
 
 
 def _concat_indexes(indexes):
diff --git a/pandas/io/data.py b/pandas/io/data.py
@@ -1024,7 +1024,7 @@ def _validate_expiry(self, expiry):
         if expiry in expiry_dates:
             return expiry
         else:
-            index = DatetimeIndex(expiry_dates).order()
+            index = DatetimeIndex(expiry_dates).sort_values()
             return index[index.date >= expiry][0].date()
 
     def get_forward_data(self, months, call=True, put=False, near=False,
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -5839,7 +5839,7 @@ def check(df):
 
                 def f():
                     df.loc[:,np.nan]
-                self.assertRaises(ValueError, f)
+                self.assertRaises(TypeError, f)
 
 
         df = DataFrame([[1,2,3],[4,5,6]], index=[1,np.nan])