From 53b40679897b088a65a829249b19a62f06784586 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 16 Jan 2018 22:41:09 -0800 Subject: [PATCH 1/4] dispatch some Series comparison methods to Index subclasses --- pandas/core/indexes/datetimes.py | 5 +- pandas/core/ops.py | 103 +++++++++++++++++-------------- pandas/tests/indexes/common.py | 15 ++--- 3 files changed, 69 insertions(+), 54 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index d83d2d2c93ec8..978674b9d2a8d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -666,7 +666,10 @@ def _assert_tzawareness_compat(self, other): if is_datetime64tz_dtype(other): # Get tzinfo from Series dtype other_tz = other.dtype.tz - if self.tz is None: + if other is libts.NaT: + # pd.NaT quacks both aware and naive + pass + elif self.tz is None: if other_tz is not None: raise TypeError('Cannot compare tz-naive and tz-aware ' 'datetime-like objects.') diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 99f7e7309d463..0db71c628d884 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -39,7 +39,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, - ABCIndex, ABCDatetimeIndex, + ABCIndex, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex) # ----------------------------------------------------------------------------- @@ -715,7 +715,7 @@ def _get_series_op_result_name(left, right): def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) - if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): + if isinstance(y, (np.ndarray, ABCSeries, ABCIndexClass)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) @@ -742,8 +742,9 @@ def na_op(x, y): return op(x, y) elif is_categorical_dtype(y) and not is_scalar(y): return op(y, x) + # TODO: Does this make sense or should op be reversed? - if is_object_dtype(x.dtype): + elif is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) else: @@ -764,15 +765,9 @@ def na_op(x, y): # we have a datetime/timedelta and may need to convert mask = None - if (needs_i8_conversion(x) or - (not is_scalar(y) and needs_i8_conversion(y))): - - if is_scalar(y): - mask = isna(x) - y = libindex.convert_scalar(x, _values_from_object(y)) - else: - mask = isna(x) | isna(y) - y = y.view('i8') + if not is_scalar(y) and needs_i8_conversion(y): + mask = isna(x) | isna(y) + y = y.view('i8') x = x.view('i8') try: @@ -788,20 +783,36 @@ def na_op(x, y): return result - def wrapper(self, other, axis=None): - # Validate the axis parameter - if axis is not None: - self._get_axis_number(axis) - - if isinstance(other, ABCSeries): - name = _maybe_match_name(self, other) - if not self._indexed_same(other): - msg = 'Can only compare identically-labeled Series objects' - raise ValueError(msg) - return self._constructor(na_op(self.values, other.values), - index=self.index, name=name) - elif isinstance(other, ABCDataFrame): # pragma: no cover + def wrapper(self, other): + if isinstance(other, ABCDataFrame): # pragma: no cover return NotImplemented + + elif isinstance(other, ABCSeries) and not self._indexed_same(other): + msg = 'Can only compare identically-labeled Series objects' + raise ValueError(msg) + + res_name = _get_series_op_result_name(self, other) + + if is_timedelta64_dtype(self): + res = op(pd.TimedeltaIndex(self), other) + return self._constructor(res, index=self.index, name=res_name) + + elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self): + # kludge; DatetimeIndex refuses to compare against None or + # datetime.date; until the "right" behavior is resolved, we cast + # these types here to types that DatetimeIndex understand. + if type(other) is datetime.date: + other = datetime.datetime(other.year, other.month, other.day) + elif other is None: + other = pd.NaT + res = op(pd.DatetimeIndex(self), other) + return self._constructor(res, index=self.index, name=res_name) + + elif isinstance(other, ABCSeries): + res = na_op(self.values, other.values) + return self._constructor(res, + index=self.index, name=res_name) + elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast @@ -809,33 +820,32 @@ def wrapper(self, other, axis=None): len(self) != len(other)): raise ValueError('Lengths must match to compare') - if isinstance(other, ABCPeriodIndex): - # temp workaround until fixing GH 13637 - # tested in test_nat_comparisons - # (pandas.tests.series.test_operators.TestSeriesOperators) - return self._constructor(na_op(self.values, - other.astype(object).values), - index=self.index) - - return self._constructor(na_op(self.values, np.asarray(other)), + res = na_op(self.values, np.asarray(other)) + return self._constructor(res, index=self.index).__finalize__(self) + # TODO: Why __finalize__ here but not elsewhere? - elif isinstance(other, pd.Categorical): - if not is_categorical_dtype(self): - msg = ("Cannot compare a Categorical for op {op} with Series " - "of dtype {typ}.\nIf you want to compare values, use " - "'series np.asarray(other)'.") - raise TypeError(msg.format(op=op, typ=self.dtype)) - - if is_categorical_dtype(self): + elif is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account # we can go directly to op, as the na_op would just test again and # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) + return self._constructor(res, index=self.index, + name=res_name, dtype='bool') + + elif isinstance(other, pd.Categorical): + # Ordering of conditions here matters; we know at this point + # that not is_categorical_dtype(self) + msg = ("Cannot compare a Categorical for op {op} with Series " + "of dtype {typ}.\nIf you want to compare values, use " + "'series np.asarray(other)'.") + raise TypeError(msg.format(op=op, typ=self.dtype)) + else: values = self.get_values() + # TODO: why get_values() here and just values elsewhere? if isinstance(other, (list, np.ndarray)): other = np.asarray(other) @@ -848,9 +858,10 @@ def wrapper(self, other, axis=None): # always return a full value series here res = _values_from_object(res) - res = pd.Series(res, index=self.index, name=self.name, dtype='bool') - return res - + res = pd.Series(res, index=self.index, name=res_name, dtype='bool') + # TODO: Why not use self._constructor here? + # TODO: pass dtype='bool' in other locations? + return res return wrapper @@ -868,7 +879,7 @@ def na_op(x, y): y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries)): - if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): + if is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype): result = op(x, y) # when would this be hit? else: x = _ensure_object(x) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 8948c5f79900d..bfba134541164 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -760,11 +760,12 @@ def test_equals_op(self): if isinstance(index_a, PeriodIndex): return + len_msg = "Lengths must match|different lengths" n = len(index_a) index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) @@ -776,7 +777,7 @@ def test_equals_op(self): array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) @@ -786,22 +787,22 @@ def test_equals_op(self): series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == index_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): index_a == array_d msg = "Can only compare identically-labeled Series objects" with tm.assert_raises_regex(ValueError, msg): series_a == series_d - with tm.assert_raises_regex(ValueError, "Lengths must match"): + with tm.assert_raises_regex(ValueError, len_msg): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding From e93e879cfe740268fc6fbe6505df268f4ee51b28 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 17 Jan 2018 10:23:00 -0800 Subject: [PATCH 2/4] Move ndarray and Index cases to the end --- pandas/core/ops.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0db71c628d884..8109e1145b01b 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -809,22 +809,12 @@ def wrapper(self, other): return self._constructor(res, index=self.index, name=res_name) elif isinstance(other, ABCSeries): + # Note: Ordering matters; this needs to go before + # is_categorical_dtype(self) branch. res = na_op(self.values, other.values) return self._constructor(res, index=self.index, name=res_name) - elif isinstance(other, (np.ndarray, pd.Index)): - # do not check length of zerodim array - # as it will broadcast - if (not is_scalar(lib.item_from_zerodim(other)) and - len(self) != len(other)): - raise ValueError('Lengths must match to compare') - - res = na_op(self.values, np.asarray(other)) - return self._constructor(res, - index=self.index).__finalize__(self) - # TODO: Why __finalize__ here but not elsewhere? - elif is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, # which would then not take categories ordering into account @@ -832,6 +822,7 @@ def wrapper(self, other): # dispatch to it. with np.errstate(all='ignore'): res = op(self.values, other) + # Note: self.values is a pd.Categorical object return self._constructor(res, index=self.index, name=res_name, dtype='bool') @@ -843,6 +834,18 @@ def wrapper(self, other): "'series np.asarray(other)'.") raise TypeError(msg.format(op=op, typ=self.dtype)) + elif isinstance(other, (np.ndarray, pd.Index)): + # do not check length of zerodim array + # as it will broadcast + if (not is_scalar(lib.item_from_zerodim(other)) and + len(self) != len(other)): + raise ValueError('Lengths must match to compare') + + res = na_op(self.values, np.asarray(other)) + return self._constructor(res, + index=self.index).__finalize__(self) + # TODO: Why __finalize__ here but not elsewhere? + else: values = self.get_values() # TODO: why get_values() here and just values elsewhere? From f8c36053a49bc84c18fe94097e69ae5f93c9e25d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 17 Jan 2018 10:28:55 -0800 Subject: [PATCH 3/4] revert unrelated edit --- pandas/core/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 8109e1145b01b..b7166a49a0ffa 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -882,7 +882,7 @@ def na_op(x, y): y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries)): - if is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype): + if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: x = _ensure_object(x) From 947c81a62f339d63ef49f2e0b9c03a800e2b795f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 17 Jan 2018 22:50:38 -0800 Subject: [PATCH 4/4] flake8 unused imports --- pandas/core/ops.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3b4c9869edcd0..cfd75201fcd53 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -11,7 +11,7 @@ import pandas as pd import datetime -from pandas._libs import (lib, index as libindex, +from pandas._libs import (lib, tslib as libts, algos as libalgos, iNaT) from pandas import compat @@ -39,8 +39,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, - ABCIndex, ABCDatetimeIndex, ABCIndexClass, - ABCPeriodIndex) + ABCIndex, ABCDatetimeIndex, ABCIndexClass) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory