Skip to content

Commit 5d953e3

Browse files
committed
Merge pull request #11356 from jreback/warnings
WIP: avoid some numpy warnings #8537
2 parents b602676 + 59103db commit 5d953e3

File tree

7 files changed

+82
-19
lines changed

7 files changed

+82
-19
lines changed

doc/source/whatsnew/v0.17.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ API changes
4141
- Regression from 0.16.2 for output formatting of long floats/nan, restored in (:issue:`11302`)
4242
- Prettyprinting sets (e.g. in DataFrame cells) now uses set literal syntax (``{x, y}``) instead of
4343
Legacy Python syntax (``set([x, y])``) (:issue:`11215`)
44+
- Indexing with a null key will raise a ``TypeError``, instead of a ``ValueError`` (:issue:`11356`)
4445

4546
.. _whatsnew_0171.deprecations:
4647

pandas/core/common.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -444,14 +444,24 @@ def mask_missing(arr, values_to_mask):
444444
mask = None
445445
for x in nonna:
446446
if mask is None:
447-
mask = arr == x
447+
448+
# numpy elementwise comparison warning
449+
if is_numeric_v_string_like(arr, x):
450+
mask = False
451+
else:
452+
mask = arr == x
448453

449454
# if x is a string and arr is not, then we get False and we must
450455
# expand the mask to size arr.shape
451456
if np.isscalar(mask):
452457
mask = np.zeros(arr.shape, dtype=bool)
453458
else:
454-
mask |= arr == x
459+
460+
# numpy elementwise comparison warning
461+
if is_numeric_v_string_like(arr, x):
462+
mask |= False
463+
else:
464+
mask |= arr == x
455465

456466
if na_mask.any():
457467
if mask is None:
@@ -2382,6 +2392,9 @@ def _maybe_make_list(obj):
23822392
is_complex = lib.is_complex
23832393

23842394

2395+
def is_string_like(obj):
2396+
return isinstance(obj, (compat.text_type, compat.string_types))
2397+
23852398
def is_iterator(obj):
23862399
# python 3 generators have __next__ instead of next
23872400
return hasattr(obj, 'next') or hasattr(obj, '__next__')
@@ -2525,6 +2538,27 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype):
25252538
return issubclass(tipo, (np.datetime64, np.timedelta64))
25262539

25272540

2541+
def is_numeric_v_string_like(a, b):
2542+
"""
2543+
numpy doesn't like to compare numeric arrays vs scalar string-likes
2544+
2545+
return a boolean result if this is the case for a,b or b,a
2546+
2547+
"""
2548+
is_a_array = isinstance(a, np.ndarray)
2549+
is_b_array = isinstance(b, np.ndarray)
2550+
2551+
is_a_numeric_array = is_a_array and is_numeric_dtype(a)
2552+
is_b_numeric_array = is_b_array and is_numeric_dtype(b)
2553+
2554+
is_a_scalar_string_like = not is_a_array and is_string_like(a)
2555+
is_b_scalar_string_like = not is_b_array and is_string_like(b)
2556+
2557+
return (
2558+
is_a_numeric_array and is_b_scalar_string_like) or (
2559+
is_b_numeric_array and is_a_scalar_string_like
2560+
)
2561+
25282562
def is_datetimelike_v_numeric(a, b):
25292563
# return if we have an i8 convertible and numeric comparision
25302564
if not hasattr(a,'dtype'):

pandas/core/index.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -862,9 +862,10 @@ def to_int():
862862
return self._invalid_indexer('label', key)
863863

864864
if is_float(key):
865-
if not self.is_floating():
866-
warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format(
867-
type(self).__name__), FutureWarning, stacklevel=3)
865+
if isnull(key):
866+
return self._invalid_indexer('label', key)
867+
warnings.warn("scalar indexers for index type {0} should be integers and not floating point".format(
868+
type(self).__name__), FutureWarning, stacklevel=3)
868869
return to_int()
869870

870871
return key
@@ -3721,9 +3722,23 @@ def astype(self, dtype):
37213722
return Index(self._values, name=self.name, dtype=dtype)
37223723

37233724
def _convert_scalar_indexer(self, key, kind=None):
3725+
"""
3726+
convert a scalar indexer
3727+
3728+
Parameters
3729+
----------
3730+
key : label of the slice bound
3731+
kind : optional, type of the indexing operation (loc/ix/iloc/None)
3732+
3733+
right now we are converting
3734+
floats -> ints if the index supports it
3735+
"""
3736+
37243737
if kind == 'iloc':
3725-
return super(Float64Index, self)._convert_scalar_indexer(key,
3726-
kind=kind)
3738+
if is_integer(key):
3739+
return key
3740+
return super(Float64Index, self)._convert_scalar_indexer(key, kind=kind)
3741+
37273742
return key
37283743

37293744
def _convert_slice_indexer(self, key, kind=None):
@@ -4276,7 +4291,7 @@ def _reference_duplicate_name(self, name):
42764291
Returns True if the name refered to in self.names is duplicated.
42774292
"""
42784293
# count the times name equals an element in self.names.
4279-
return np.sum(name == np.asarray(self.names)) > 1
4294+
return sum(name == n for n in self.names) > 1
42804295

42814296
def _format_native_types(self, **kwargs):
42824297
return self.values

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1285,7 +1285,7 @@ def _has_valid_type(self, key, axis):
12851285

12861286
def error():
12871287
if isnull(key):
1288-
raise ValueError(
1288+
raise TypeError(
12891289
"cannot use label indexing with a null key")
12901290
raise KeyError("the label [%s] is not in the [%s]" %
12911291
(key, self.obj._get_axis_name(axis)))

pandas/core/internals.py

+21-8
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
is_datetime64tz_dtype, is_datetimetz, is_sparse,
1818
array_equivalent, _maybe_convert_string_to_object,
1919
is_categorical, needs_i8_conversion, is_datetimelike_v_numeric,
20-
is_internal_type)
20+
is_numeric_v_string_like, is_internal_type)
2121
from pandas.core.dtypes import DatetimeTZDtype
2222

2323
from pandas.core.index import Index, MultiIndex, _ensure_index
@@ -1082,8 +1082,16 @@ def eval(self, func, other, raise_on_error=True, try_cast=False, mgr=None):
10821082
# get the result, may need to transpose the other
10831083
def get_result(other):
10841084

1085-
# compute
1086-
result = func(values, other)
1085+
# avoid numpy warning of comparisons again None
1086+
if other is None:
1087+
result = not func.__name__ == 'eq'
1088+
1089+
# avoid numpy warning of elementwise comparisons to object
1090+
elif is_numeric_v_string_like(values, other):
1091+
result = False
1092+
1093+
else:
1094+
result = func(values, other)
10871095

10881096
# mask if needed
10891097
if isinstance(values_mask, np.ndarray) and values_mask.any():
@@ -3214,7 +3222,7 @@ def get(self, item, fastpath=True):
32143222
else:
32153223

32163224
if isnull(item):
3217-
raise ValueError("cannot label index with a null key")
3225+
raise TypeError("cannot label index with a null key")
32183226

32193227
indexer = self.items.get_indexer_for([item])
32203228
return self.reindex_indexer(new_axis=self.items[indexer],
@@ -4251,11 +4259,16 @@ def _possibly_compare(a, b, op):
42514259

42524260
# numpy deprecation warning to have i8 vs integer comparisions
42534261
if is_datetimelike_v_numeric(a, b):
4254-
res = False
4262+
result = False
4263+
4264+
# numpy deprecation warning if comparing numeric vs string-like
4265+
elif is_numeric_v_string_like(a, b):
4266+
result = False
4267+
42554268
else:
4256-
res = op(a, b)
4269+
result = op(a, b)
42574270

4258-
if np.isscalar(res) and (is_a_array or is_b_array):
4271+
if lib.isscalar(result) and (is_a_array or is_b_array):
42594272
type_names = [type(a).__name__, type(b).__name__]
42604273

42614274
if is_a_array:
@@ -4265,7 +4278,7 @@ def _possibly_compare(a, b, op):
42654278
type_names[1] = 'ndarray(dtype=%s)' % b.dtype
42664279

42674280
raise TypeError("Cannot compare types %r and %r" % tuple(type_names))
4268-
return res
4281+
return result
42694282

42704283

42714284
def _concat_indexes(indexes):

pandas/io/data.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1024,7 +1024,7 @@ def _validate_expiry(self, expiry):
10241024
if expiry in expiry_dates:
10251025
return expiry
10261026
else:
1027-
index = DatetimeIndex(expiry_dates).order()
1027+
index = DatetimeIndex(expiry_dates).sort_values()
10281028
return index[index.date >= expiry][0].date()
10291029

10301030
def get_forward_data(self, months, call=True, put=False, near=False,

pandas/tests/test_frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5839,7 +5839,7 @@ def check(df):
58395839

58405840
def f():
58415841
df.loc[:,np.nan]
5842-
self.assertRaises(ValueError, f)
5842+
self.assertRaises(TypeError, f)
58435843

58445844

58455845
df = DataFrame([[1,2,3],[4,5,6]], index=[1,np.nan])

0 commit comments

Comments
 (0)