Skip to content

Commit 02de853

Browse files
committed
Merge branch 'add-basic-zerodim-array-support' of https://github.com/immerrr/pandas into immerrr-add-basic-zerodim-array-support
2 parents 1746df2 + 1a6da8d commit 02de853

File tree

10 files changed

+157
-9
lines changed

10 files changed

+157
-9
lines changed

doc/source/whatsnew/v0.15.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ Bug Fixes
187187
- Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`)
188188
- Bug in groupby-transform with a Categorical (:issue:`8623`)
189189
- Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`)
190+
- Bug in ``Categorical`` reflected comparison operator raising if the first argument was a numpy array scalar (e.g. np.int64) (:issue:`8658`)
190191

191192

192193

pandas/core/categorical.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,16 @@ def f(self, other):
4242
# In other series, the leads to False, so do that here too
4343
ret[na_mask] = False
4444
return ret
45-
elif lib.isscalar(other):
45+
46+
# Numpy-1.9 and earlier may convert a scalar to a zerodim array during
47+
# comparison operation when second arg has higher priority, e.g.
48+
#
49+
# cat[0] < cat
50+
#
51+
# With cat[0], for example, being ``np.int64(1)`` by the time it gets
52+
# into this function would become ``np.array(1)``.
53+
other = lib.item_from_zerodim(other)
54+
if lib.isscalar(other):
4655
if other in self.categories:
4756
i = self.categories.get_loc(other)
4857
return getattr(self._codes, op)(i)

pandas/core/common.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,7 @@ class AmbiguousIndexError(PandasError, KeyError):
5858
def create_pandas_abc_type(name, attr, comp):
5959
@classmethod
6060
def _check(cls, inst):
61-
result = getattr(inst, attr, None)
62-
if result is None:
63-
return False
64-
return result in comp
61+
return getattr(inst, attr, '_typ') in comp
6562
dct = dict(__instancecheck__=_check,
6663
__subclasscheck__=_check)
6764
meta = type("ABCBase", (type,), dct)
@@ -84,7 +81,7 @@ def _check(cls, inst):
8481
ABCSparseArray = create_pandas_abc_type("ABCSparseArray", "_subtyp",
8582
('sparse_array', 'sparse_series'))
8683
ABCCategorical = create_pandas_abc_type("ABCCategorical","_typ",("categorical"))
87-
84+
ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period",))
8885

8986
class _ABCGeneric(type):
9087

pandas/lib.pyx

+42-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import numpy as np
44

55
from numpy cimport *
66

7+
np.import_array()
78

89
cdef extern from "numpy/arrayobject.h":
910
cdef enum NPY_TYPES:
@@ -235,7 +236,47 @@ cpdef checknull_old(object val):
235236
return util._checknull(val)
236237

237238
def isscalar(object val):
238-
return np.isscalar(val) or val is None or PyDateTime_Check(val) or PyDelta_Check(val)
239+
"""
240+
Return True if given value is scalar.
241+
242+
This includes:
243+
- numpy array scalar (e.g. np.int64)
244+
- Python builtin numerics
245+
- Python builtin byte arrays and strings
246+
- None
247+
- instances of datetime.datetime
248+
- instances of datetime.timedelta
249+
- Period
250+
251+
"""
252+
253+
return (np.PyArray_IsAnyScalar(val)
254+
# As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3.
255+
or PyBytes_Check(val)
256+
or val is None
257+
or PyDate_Check(val)
258+
or PyDelta_Check(val)
259+
or PyTime_Check(val)
260+
or util.is_period_object(val))
261+
262+
263+
def item_from_zerodim(object val):
264+
"""
265+
If the value is a zerodim array, return the item it contains.
266+
267+
Examples
268+
--------
269+
>>> item_from_zerodim(1)
270+
1
271+
>>> item_from_zerodim('foobar')
272+
'foobar'
273+
>>> item_from_zerodim(np.array(1))
274+
1
275+
>>> item_from_zerodim(np.array([1]))
276+
array([1])
277+
278+
"""
279+
return util.unbox_if_zerodim(val)
239280

240281

241282
@cython.wraparound(False)

pandas/src/inference.pyx

+4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ def is_bool(object obj):
1616
def is_complex(object obj):
1717
return util.is_complex_object(obj)
1818

19+
def is_period(object val):
20+
""" Return a boolean if this is a Period object """
21+
return util.is_period_object(val)
22+
1923
_TYPE_MAP = {
2024
'int8': 'integer',
2125
'int16': 'integer',

pandas/src/numpy_helper.h

+15
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,21 @@ void set_array_not_contiguous(PyArrayObject *ao) {
167167
}
168168

169169

170+
// If arr is zerodim array, return a proper array scalar (e.g. np.int64).
171+
// Otherwise, return arr as is.
172+
PANDAS_INLINE PyObject*
173+
unbox_if_zerodim(PyObject* arr) {
174+
if (PyArray_IsZeroDim(arr)) {
175+
PyObject *ret;
176+
ret = PyArray_ToScalar(PyArray_DATA(arr), arr);
177+
return ret;
178+
} else {
179+
Py_INCREF(arr);
180+
return arr;
181+
}
182+
}
183+
184+
170185
// PANDAS_INLINE PyObject*
171186
// get_base_ndarray(PyObject* ap) {
172187
// // if (!ap || (NULL == ap)) {

pandas/src/util.pxd

+4-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cdef extern from "numpy_helper.h":
2222
inline void transfer_object_column(char *dst, char *src, size_t stride,
2323
size_t length)
2424
object sarr_from_data(cnp.dtype, int length, void* data)
25+
inline object unbox_if_zerodim(object arr)
2526

2627
cdef inline object get_value_at(ndarray arr, object loc):
2728
cdef:
@@ -64,7 +65,6 @@ cdef inline int is_contiguous(ndarray arr):
6465
cdef inline is_array(object o):
6566
return cnp.PyArray_Check(o)
6667

67-
6868
cdef inline bint _checknull(object val):
6969
try:
7070
return val is None or (cpython.PyFloat_Check(val) and val != val)
@@ -82,3 +82,6 @@ cdef inline bint _checknull_old(object val):
8282

8383
cdef inline bint _checknan(object val):
8484
return not cnp.PyArray_Check(val) and val != val
85+
86+
cdef inline bint is_period_object(object val):
87+
return getattr(val,'_typ','_typ') == 'period'

pandas/tests/test_categorical.py

+6
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,12 @@ def test_datetime_categorical_comparison(self):
917917
self.assert_numpy_array_equal(dt_cat > dt_cat[0], [False, True, True])
918918
self.assert_numpy_array_equal(dt_cat[0] < dt_cat, [False, True, True])
919919

920+
def test_reflected_comparison_with_scalars(self):
921+
# GH8658
922+
cat = pd.Categorical([1, 2, 3])
923+
self.assert_numpy_array_equal(cat > cat[0], [False, True, True])
924+
self.assert_numpy_array_equal(cat[0] < cat, [False, True, True])
925+
920926

921927
class TestCategoricalAsBlock(tm.TestCase):
922928
_multiprocess_can_split_ = True

pandas/tests/test_lib.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from datetime import datetime, timedelta, date, time
2+
3+
import numpy as np
4+
5+
import pandas as pd
6+
from pandas.lib import isscalar, item_from_zerodim
7+
import pandas.util.testing as tm
8+
9+
10+
class TestIsscalar(tm.TestCase):
11+
def test_isscalar_builtin_scalars(self):
12+
self.assertTrue(isscalar(None))
13+
self.assertTrue(isscalar(True))
14+
self.assertTrue(isscalar(False))
15+
self.assertTrue(isscalar(0.))
16+
self.assertTrue(isscalar(np.nan))
17+
self.assertTrue(isscalar('foobar'))
18+
self.assertTrue(isscalar(b'foobar'))
19+
self.assertTrue(isscalar(u'foobar'))
20+
self.assertTrue(isscalar(datetime(2014, 1, 1)))
21+
self.assertTrue(isscalar(date(2014, 1, 1)))
22+
self.assertTrue(isscalar(time(12, 0)))
23+
self.assertTrue(isscalar(timedelta(hours=1)))
24+
self.assertTrue(isscalar(pd.NaT))
25+
26+
def test_isscalar_builtin_nonscalars(self):
27+
self.assertFalse(isscalar({}))
28+
self.assertFalse(isscalar([]))
29+
self.assertFalse(isscalar([1]))
30+
self.assertFalse(isscalar(()))
31+
self.assertFalse(isscalar((1,)))
32+
self.assertFalse(isscalar(slice(None)))
33+
self.assertFalse(isscalar(Ellipsis))
34+
35+
def test_isscalar_numpy_array_scalars(self):
36+
self.assertTrue(isscalar(np.int64(1)))
37+
self.assertTrue(isscalar(np.float64(1.)))
38+
self.assertTrue(isscalar(np.int32(1)))
39+
self.assertTrue(isscalar(np.object_('foobar')))
40+
self.assertTrue(isscalar(np.str_('foobar')))
41+
self.assertTrue(isscalar(np.unicode_(u'foobar')))
42+
self.assertTrue(isscalar(np.bytes_(b'foobar')))
43+
self.assertTrue(isscalar(np.datetime64('2014-01-01')))
44+
self.assertTrue(isscalar(np.timedelta64(1, 'h')))
45+
46+
def test_isscalar_numpy_zerodim_arrays(self):
47+
for zerodim in [np.array(1),
48+
np.array('foobar'),
49+
np.array(np.datetime64('2014-01-01')),
50+
np.array(np.timedelta64(1, 'h'))]:
51+
self.assertFalse(isscalar(zerodim))
52+
self.assertTrue(isscalar(item_from_zerodim(zerodim)))
53+
54+
def test_isscalar_numpy_arrays(self):
55+
self.assertFalse(isscalar(np.array([])))
56+
self.assertFalse(isscalar(np.array([[]])))
57+
self.assertFalse(isscalar(np.matrix('1; 2')))
58+
59+
def test_isscalar_pandas_scalars(self):
60+
self.assertTrue(isscalar(pd.Timestamp('2014-01-01')))
61+
self.assertTrue(isscalar(pd.Timedelta(hours=1)))
62+
self.assertTrue(isscalar(pd.Period('2014-01-01')))
63+
64+
def test_isscalar_pandas_containers(self):
65+
self.assertFalse(isscalar(pd.Series()))
66+
self.assertFalse(isscalar(pd.Series([1])))
67+
self.assertFalse(isscalar(pd.DataFrame()))
68+
self.assertFalse(isscalar(pd.DataFrame([[1]])))
69+
self.assertFalse(isscalar(pd.Panel()))
70+
self.assertFalse(isscalar(pd.Panel([[[1]]])))
71+
self.assertFalse(isscalar(pd.Index([])))
72+
self.assertFalse(isscalar(pd.Index([1])))

pandas/tseries/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class Period(PandasObject):
6363
"""
6464
__slots__ = ['freq', 'ordinal']
6565
_comparables = ['name','freqstr']
66+
_typ = 'period'
6667

6768
@classmethod
6869
def _from_ordinal(cls, ordinal, freq):
@@ -498,7 +499,6 @@ def strftime(self, fmt):
498499
base, mult = _gfc(self.freq)
499500
return tslib.period_format(self.ordinal, base, fmt)
500501

501-
502502
def _get_ordinals(data, freq):
503503
f = lambda x: Period(x, freq=freq).ordinal
504504
if isinstance(data[0], Period):

0 commit comments

Comments
 (0)