diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index ab57f1fb6ea10..2cba928be5ab7 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -116,3 +116,7 @@ Bug Fixes - Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`) - Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`) + +- Bug in unequal comparisons between a ``Series`` of dtype `"category"` and a scalar (e.g. + ``Series(Categorical(list("abc"), categories=list("cba"), ordered=True)) > "b"``, which + wouldn't use the order of the categories but use the lexicographical order. (:issue:`9848`) \ No newline at end of file diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 954d2c8a77326..2af9cd43faaef 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -594,20 +594,26 @@ def wrapper(self, other): mask = isnull(self) - values = self.get_values() - other = _index.convert_scalar(values,_values_from_object(other)) + if com.is_categorical_dtype(self): + # cats are a special case as get_values() would return an ndarray, which would then + # not take categories ordering into account + # we can go directly to op, as the na_op would just test again and dispatch to it. + res = op(self.values, other) + else: + values = self.get_values() + other = _index.convert_scalar(values,_values_from_object(other)) - if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): - values = values.view('i8') + if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): + values = values.view('i8') - # scalars - res = na_op(values, other) - if np.isscalar(res): - raise TypeError('Could not compare %s type with Series' - % type(other)) + # scalars + res = na_op(values, other) + if np.isscalar(res): + raise TypeError('Could not compare %s type with Series' + % type(other)) - # always return a full value series here - res = _values_from_object(res) + # always return a full value series here + res = _values_from_object(res) res = pd.Series(res, index=self.index, name=self.name, dtype='bool') diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 7f4b3fcb94dfa..4c5678bf6633f 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -114,6 +114,9 @@ def f(): Categorical([1,2], [1,2,np.nan, np.nan]) self.assertRaises(ValueError, f) + # The default should be unordered + c1 = Categorical(["a", "b", "c", "a"]) + self.assertFalse(c1.ordered) # Categorical as input c1 = Categorical(["a", "b", "c", "a"]) @@ -367,6 +370,13 @@ def f(): self.assertRaises(TypeError, lambda: a < cat) self.assertRaises(TypeError, lambda: a < cat_rev) + # Make sure that unequal comparison take the categories order in account + cat_rev = pd.Categorical(list("abc"), categories=list("cba"), ordered=True) + exp = np.array([True, False, False]) + res = cat_rev > "b" + self.assert_numpy_array_equal(res, exp) + + def test_na_flags_int_categories(self): # #1457 @@ -2390,6 +2400,18 @@ def test_comparisons(self): exp = Series([False, False, True]) tm.assert_series_equal(res, exp) + scalar = base[1] + res = cat > scalar + exp = Series([False, False, True]) + exp2 = cat.values > scalar + tm.assert_series_equal(res, exp) + tm.assert_numpy_array_equal(res.values, exp2) + res_rev = cat_rev > scalar + exp_rev = Series([True, False, False]) + exp_rev2 = cat_rev.values > scalar + tm.assert_series_equal(res_rev, exp_rev) + tm.assert_numpy_array_equal(res_rev.values, exp_rev2) + # Only categories with same categories can be compared def f(): cat > cat_rev @@ -2408,9 +2430,16 @@ def f(): self.assertRaises(TypeError, lambda: a < cat) self.assertRaises(TypeError, lambda: a < cat_rev) - # Categoricals can be compared to scalar values - res = cat_rev > base[0] - tm.assert_series_equal(res, exp) + # unequal comparison should raise for unordered cats + cat = Series(Categorical(list("abc"))) + def f(): + cat > "b" + self.assertRaises(TypeError, f) + cat = Series(Categorical(list("abc"), ordered=False)) + def f(): + cat > "b" + self.assertRaises(TypeError, f) + # And test NaN handling... cat = Series(Categorical(["a","b","c", np.nan]))