Skip to content

Commit 1a6da8d

Browse files
committed
BUG: fix reflected comparison operations for Categorical
By the time the comparison gets dispatched to _cat_compare_op, the first argument becomes zerodim array and no longer passes isscalar test. ENH: add pd.lib.item_from_zerodim to extract values from zerodim arrays ENH: make pd.lib.isscalar detect pd.Period, datetime.date and datetime.time
1 parent 3579304 commit 1a6da8d

File tree

9 files changed

+155
-5
lines changed

9 files changed

+155
-5
lines changed

doc/source/whatsnew/v0.15.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ Bug Fixes
186186
- Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`)
187187
- Bug in groupby-transform with a Categorical (:issue:`8623`)
188188
- Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`)
189+
- Bug in ``Categorical`` reflected comparison operator raising if the first argument was a numpy array scalar (e.g. np.int64) (:issue:`8658`)
189190

190191

191192

pandas/core/categorical.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,16 @@ def f(self, other):
4242
# In other series, the leads to False, so do that here too
4343
ret[na_mask] = False
4444
return ret
45-
elif lib.isscalar(other):
45+
46+
# Numpy-1.9 and earlier may convert a scalar to a zerodim array during
47+
# comparison operation when second arg has higher priority, e.g.
48+
#
49+
# cat[0] < cat
50+
#
51+
# With cat[0], for example, being ``np.int64(1)`` by the time it gets
52+
# into this function would become ``np.array(1)``.
53+
other = lib.item_from_zerodim(other)
54+
if lib.isscalar(other):
4655
if other in self.categories:
4756
i = self.categories.get_loc(other)
4857
return getattr(self._codes, op)(i)

pandas/core/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def _check(cls, inst):
8484
ABCSparseArray = create_pandas_abc_type("ABCSparseArray", "_subtyp",
8585
('sparse_array', 'sparse_series'))
8686
ABCCategorical = create_pandas_abc_type("ABCCategorical","_typ",("categorical"))
87-
87+
ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period",))
8888

8989
class _ABCGeneric(type):
9090

pandas/lib.pyx

+48-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import numpy as np
44

55
from numpy cimport *
66

7+
np.import_array()
78

89
cdef extern from "numpy/arrayobject.h":
910
cdef enum NPY_TYPES:
@@ -234,8 +235,54 @@ cpdef checknull_old(object val):
234235
else:
235236
return util._checknull(val)
236237

238+
# ABCPeriod cannot be imported right away from pandas.core.common.
239+
ABCPeriod = None
237240
def isscalar(object val):
238-
return np.isscalar(val) or val is None or PyDateTime_Check(val) or PyDelta_Check(val)
241+
"""
242+
Return True if given value is scalar.
243+
244+
This includes:
245+
- numpy array scalar (e.g. np.int64)
246+
- Python builtin numerics
247+
- Python builtin byte arrays and strings
248+
- None
249+
- instances of datetime.datetime
250+
- instances of datetime.timedelta
251+
- any type previously registered with :func:`register_scalar_type` function
252+
253+
"""
254+
global ABCPeriod
255+
if ABCPeriod is None:
256+
from pandas.core.common import ABCPeriod as _ABCPeriod
257+
ABCPeriod = _ABCPeriod
258+
259+
return (np.PyArray_IsAnyScalar(val)
260+
# As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3.
261+
or PyBytes_Check(val)
262+
or val is None
263+
or PyDate_Check(val)
264+
or PyDelta_Check(val)
265+
or PyTime_Check(val)
266+
or isinstance(val, ABCPeriod))
267+
268+
269+
def item_from_zerodim(object val):
270+
"""
271+
If the value is a zerodim array, return the item it contains.
272+
273+
Examples
274+
--------
275+
>>> item_from_zerodim(1)
276+
1
277+
>>> item_from_zerodim('foobar')
278+
'foobar'
279+
>>> item_from_zerodim(np.array(1))
280+
1
281+
>>> item_from_zerodim(np.array([1]))
282+
array([1])
283+
284+
"""
285+
return util.unbox_if_zerodim(val)
239286

240287

241288
@cython.wraparound(False)

pandas/src/numpy_helper.h

+15
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,21 @@ void set_array_not_contiguous(PyArrayObject *ao) {
167167
}
168168

169169

170+
// If arr is zerodim array, return a proper array scalar (e.g. np.int64).
171+
// Otherwise, return arr as is.
172+
PANDAS_INLINE PyObject*
173+
unbox_if_zerodim(PyObject* arr) {
174+
if (PyArray_IsZeroDim(arr)) {
175+
PyObject *ret;
176+
ret = PyArray_ToScalar(PyArray_DATA(arr), arr);
177+
return ret;
178+
} else {
179+
Py_INCREF(arr);
180+
return arr;
181+
}
182+
}
183+
184+
170185
// PANDAS_INLINE PyObject*
171186
// get_base_ndarray(PyObject* ap) {
172187
// // if (!ap || (NULL == ap)) {

pandas/src/util.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cdef extern from "numpy_helper.h":
2222
inline void transfer_object_column(char *dst, char *src, size_t stride,
2323
size_t length)
2424
object sarr_from_data(cnp.dtype, int length, void* data)
25+
inline object unbox_if_zerodim(object arr)
2526

2627
cdef inline object get_value_at(ndarray arr, object loc):
2728
cdef:
@@ -64,7 +65,6 @@ cdef inline int is_contiguous(ndarray arr):
6465
cdef inline is_array(object o):
6566
return cnp.PyArray_Check(o)
6667

67-
6868
cdef inline bint _checknull(object val):
6969
try:
7070
return val is None or (cpython.PyFloat_Check(val) and val != val)

pandas/tests/test_categorical.py

+6
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,12 @@ def test_datetime_categorical_comparison(self):
917917
self.assert_numpy_array_equal(dt_cat > dt_cat[0], [False, True, True])
918918
self.assert_numpy_array_equal(dt_cat[0] < dt_cat, [False, True, True])
919919

920+
def test_reflected_comparison_with_scalars(self):
921+
# GH8658
922+
cat = pd.Categorical([1, 2, 3])
923+
self.assert_numpy_array_equal(cat > cat[0], [False, True, True])
924+
self.assert_numpy_array_equal(cat[0] < cat, [False, True, True])
925+
920926

921927
class TestCategoricalAsBlock(tm.TestCase):
922928
_multiprocess_can_split_ = True

pandas/tests/test_lib.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from datetime import datetime, timedelta, date, time
2+
3+
import numpy as np
4+
5+
import pandas as pd
6+
from pandas.lib import isscalar, item_from_zerodim
7+
import pandas.util.testing as tm
8+
9+
10+
class TestIsscalar(tm.TestCase):
11+
def test_isscalar_builtin_scalars(self):
12+
self.assertTrue(isscalar(None))
13+
self.assertTrue(isscalar(True))
14+
self.assertTrue(isscalar(False))
15+
self.assertTrue(isscalar(0.))
16+
self.assertTrue(isscalar(np.nan))
17+
self.assertTrue(isscalar('foobar'))
18+
self.assertTrue(isscalar(b'foobar'))
19+
self.assertTrue(isscalar(u'foobar'))
20+
self.assertTrue(isscalar(datetime(2014, 1, 1)))
21+
self.assertTrue(isscalar(date(2014, 1, 1)))
22+
self.assertTrue(isscalar(time(12, 0)))
23+
self.assertTrue(isscalar(timedelta(hours=1)))
24+
self.assertTrue(isscalar(pd.NaT))
25+
26+
def test_isscalar_builtin_nonscalars(self):
27+
self.assertFalse(isscalar({}))
28+
self.assertFalse(isscalar([]))
29+
self.assertFalse(isscalar([1]))
30+
self.assertFalse(isscalar(()))
31+
self.assertFalse(isscalar((1,)))
32+
self.assertFalse(isscalar(slice(None)))
33+
self.assertFalse(isscalar(Ellipsis))
34+
35+
def test_isscalar_numpy_array_scalars(self):
36+
self.assertTrue(isscalar(np.int64(1)))
37+
self.assertTrue(isscalar(np.float64(1.)))
38+
self.assertTrue(isscalar(np.int32(1)))
39+
self.assertTrue(isscalar(np.object_('foobar')))
40+
self.assertTrue(isscalar(np.str_('foobar')))
41+
self.assertTrue(isscalar(np.unicode_(u'foobar')))
42+
self.assertTrue(isscalar(np.bytes_(b'foobar')))
43+
self.assertTrue(isscalar(np.datetime64('2014-01-01')))
44+
self.assertTrue(isscalar(np.timedelta64(1, 'h')))
45+
46+
def test_isscalar_numpy_zerodim_arrays(self):
47+
for zerodim in [np.array(1),
48+
np.array('foobar'),
49+
np.array(np.datetime64('2014-01-01')),
50+
np.array(np.timedelta64(1, 'h'))]:
51+
self.assertFalse(isscalar(zerodim))
52+
self.assertTrue(isscalar(item_from_zerodim(zerodim)))
53+
54+
def test_isscalar_numpy_arrays(self):
55+
self.assertFalse(isscalar(np.array([])))
56+
self.assertFalse(isscalar(np.array([[]])))
57+
self.assertFalse(isscalar(np.matrix('1; 2')))
58+
59+
def test_isscalar_pandas_scalars(self):
60+
self.assertTrue(isscalar(pd.Timestamp('2014-01-01')))
61+
self.assertTrue(isscalar(pd.Timedelta(hours=1)))
62+
self.assertTrue(isscalar(pd.Period('2014-01-01')))
63+
64+
def test_isscalar_pandas_containers(self):
65+
self.assertFalse(isscalar(pd.Series()))
66+
self.assertFalse(isscalar(pd.Series([1])))
67+
self.assertFalse(isscalar(pd.DataFrame()))
68+
self.assertFalse(isscalar(pd.DataFrame([[1]])))
69+
self.assertFalse(isscalar(pd.Panel()))
70+
self.assertFalse(isscalar(pd.Panel([[[1]]])))
71+
self.assertFalse(isscalar(pd.Index([])))
72+
self.assertFalse(isscalar(pd.Index([1])))

pandas/tseries/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class Period(PandasObject):
6363
"""
6464
__slots__ = ['freq', 'ordinal']
6565
_comparables = ['name','freqstr']
66+
_typ = 'period'
6667

6768
@classmethod
6869
def _from_ordinal(cls, ordinal, freq):
@@ -498,7 +499,6 @@ def strftime(self, fmt):
498499
base, mult = _gfc(self.freq)
499500
return tslib.period_format(self.ordinal, base, fmt)
500501

501-
502502
def _get_ordinals(data, freq):
503503
f = lambda x: Period(x, freq=freq).ordinal
504504
if isinstance(data[0], Period):

0 commit comments

Comments
 (0)