From 6d7dbc9ee7bff71524cd0cc8dc74d17977866991 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Fri, 20 Apr 2012 23:38:04 +0200 Subject: [PATCH 1/6] ENH: treat complex number in maybe_convert_objects --- pandas/src/inference.pyx | 28 +++++++++++++++++++++++----- pandas/src/numpy_helper.h | 4 ++++ pandas/src/util.pxd | 1 + pandas/tests/test_tseries.py | 7 +++++++ 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 9b3ac87691017..524dbf7514e16 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -281,9 +281,11 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, cdef: Py_ssize_t i, n ndarray[float64_t] floats + ndarray[complex64_t] complexes ndarray[int64_t] ints ndarray[uint8_t] bools bint seen_float = 0 + bint seen_complex = 0 bint seen_int = 0 bint seen_bool = 0 bint seen_object = 0 @@ -294,6 +296,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, n = len(objects) floats = np.empty(n, dtype='f8') + complexes = np.empty(n, dtype='c8') ints = np.empty(n, dtype='i8') bools = np.empty(n, dtype=np.uint8) @@ -305,7 +308,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if val is None: seen_null = 1 - floats[i] = fnan + floats[i] = complexes[i] = fnan elif util.is_bool_object(val): seen_bool = 1 bools[i] = val @@ -316,15 +319,20 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, elif util.is_integer_object(val): seen_int = 1 floats[i] = val + complexes[i] = val if not seen_null: ints[i] = val elif util.is_float_object(val): - floats[i] = val + floats[i] = complexes[i] = val seen_float = 1 + elif util.is_complex_object(val): + complexes[i] = val + seen_complex = 1 elif try_float and not util.is_string_object(val): # this will convert Decimal objects try: floats[i] = float(val) + complexes[i] = complex(val) seen_float = 1 except Exception: seen_object = 1 @@ -334,14 +342,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if not safe: if seen_null: if (seen_float or seen_int) and not seen_object: - return floats + if seen_complex: + return complexes + else: + return floats else: return objects else: if seen_object: return objects elif not seen_bool: - if seen_float: + if seen_complex: + return complexes + elif seen_float: return floats elif seen_int: return ints @@ -354,7 +367,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, # don't cast int to float, etc. if seen_null: if (seen_float or seen_int) and not seen_object: - return floats + if seen_complex: + return complexes + else: + return floats else: return objects else: @@ -363,6 +379,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, elif not seen_bool: if seen_int and seen_float: return objects + elif seen_complex: + return complexes elif seen_float: return floats elif seen_int: diff --git a/pandas/src/numpy_helper.h b/pandas/src/numpy_helper.h index 1cea82d42a2c5..0efd210047222 100644 --- a/pandas/src/numpy_helper.h +++ b/pandas/src/numpy_helper.h @@ -57,6 +57,10 @@ PANDAS_INLINE int is_float_object(PyObject* obj) { return (PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating)); } +PANDAS_INLINE int +is_complex_object(PyObject* obj) { + return (PyComplex_Check(obj) || PyArray_IsScalar(obj, ComplexFloating)); +} PANDAS_INLINE int is_bool_object(PyObject* obj) { diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd index c1c76b726a6d7..823f50a9fc0fb 100644 --- a/pandas/src/util.pxd +++ b/pandas/src/util.pxd @@ -4,6 +4,7 @@ cimport numpy as cnp cdef extern from "numpy_helper.h": inline int is_integer_object(object) inline int is_float_object(object) + inline int is_complex_object(object) inline int is_bool_object(object) inline int is_string_object(object) inline int is_datetime64_object(object) diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index cdeada76957c4..82739d9426a3d 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -200,6 +200,13 @@ def test_convert_objects_ints(): result = lib.maybe_convert_objects(arr) assert(issubclass(result.dtype.type, np.integer)) +def test_convert_objects_complex_number(): + for dtype in np.sctypes['complex']: + arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O') + assert(arr[0].dtype == np.dtype(dtype)) + result = lib.maybe_convert_objects(arr) + assert(issubclass(result.dtype.type, np.complexfloating)) + def test_rank(): from scipy.stats import rankdata from numpy import nan From ec1470156e724c714c8ac8a916d091570073b71c Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 21 Apr 2012 00:00:13 +0200 Subject: [PATCH 2/6] ENH: treat complex number in maybe_convert_objects --- pandas/src/inference.pyx | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 524dbf7514e16..beb36c158a077 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -234,31 +234,37 @@ def maybe_convert_numeric(ndarray[object] values, set na_values): cdef: Py_ssize_t i, n ndarray[float64_t] floats + ndarray[complex64_t] complexes ndarray[int64_t] ints bint seen_float = 0 + bint seen_complex = 0 object val float64_t fval n = len(values) floats = np.empty(n, dtype='f8') + complexes = np.empty(n, dtype='c8') ints = np.empty(n, dtype='i8') for i from 0 <= i < n: val = values[i] if util.is_float_object(val): - floats[i] = val + floats[i] = complexes[i] = val seen_float = 1 elif val in na_values: - floats[i] = nan + floats[i] = complexes[i] = nan seen_float = 1 elif val is None: - floats[i] = nan + floats[i] = complexes[i] = nan seen_float = 1 elif len(val) == 0: - floats[i] = nan + floats[i] = complexes[i] = nan seen_float = 1 + elif util.is_complex_object(val): + complexes[i] = val + seen_complex = 1 else: fval = util.floatify(val) floats[i] = fval @@ -268,7 +274,9 @@ def maybe_convert_numeric(ndarray[object] values, set na_values): else: ints[i] = fval - if seen_float: + if seen_complex: + return complexes + elif seen_float: return floats else: return ints From ba08846063f3e176d22adc8a0c727ed178b699df Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 21 Apr 2012 00:01:26 +0200 Subject: [PATCH 3/6] TST: Add complex number in test_constructor_scalar_inference --- pandas/tests/test_frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 8837fb3a3888a..3ad31f6baba40 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1510,12 +1510,13 @@ def test_constructor_corner(self): def test_constructor_scalar_inference(self): data = {'int' : 1, 'bool' : True, - 'float' : 3., 'object' : 'foo'} + 'float' : 3., 'complex': 4j, 'object' : 'foo'} df = DataFrame(data, index=np.arange(10)) self.assert_(df['int'].dtype == np.int64) self.assert_(df['bool'].dtype == np.bool_) self.assert_(df['float'].dtype == np.float64) + self.assert_(df['complex'].dtype == np.complex64) self.assert_(df['object'].dtype == np.object_) def test_constructor_DataFrame(self): From a351e9435208b99ee1ba6e9aa0c5863613ab3177 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 21 Apr 2012 00:26:48 +0200 Subject: [PATCH 4/6] ENH: treat complex number in internals.form_blocks --- pandas/core/internals.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c962dcc9f4c05..37653a21ceb03 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -980,6 +980,7 @@ def form_blocks(data, axes): # put "leftover" items in float bucket, where else? # generalize? float_dict = {} + complex_dict = {} int_dict = {} bool_dict = {} object_dict = {} @@ -987,6 +988,8 @@ def form_blocks(data, axes): for k, v in data.iteritems(): if issubclass(v.dtype.type, np.floating): float_dict[k] = v + elif issubclass(v.dtype.type, np.complexfloating): + complex_dict[k] = v elif issubclass(v.dtype.type, np.datetime64): datetime_dict[k] = v elif issubclass(v.dtype.type, np.integer): @@ -1001,6 +1004,10 @@ def form_blocks(data, axes): float_block = _simple_blockify(float_dict, items, np.float64) blocks.append(float_block) + if len(complex_dict): + complex_block = _simple_blockify(complex_dict, items, np.complex64) + blocks.append(complex_block) + if len(int_dict): int_block = _simple_blockify(int_dict, items, np.int64) blocks.append(int_block) From 824cb1045a21527b7515a7d0338fd4af36c7d656 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 21 Apr 2012 04:49:47 +0200 Subject: [PATCH 5/6] ENH: add internals.ComplexBlock --- pandas/core/internals.py | 22 +++++++++++++++++----- pandas/src/tseries.pyx | 2 +- pandas/tests/test_internals.py | 16 ++++++++++++---- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 37653a21ceb03..6ec614f284dfd 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -243,6 +243,12 @@ def should_store(self, value): # unnecessarily return issubclass(value.dtype.type, np.floating) +class ComplexBlock(Block): + _can_hold_na = True + + def should_store(self, value): + return issubclass(value.dtype.type, np.complexfloating) + class IntBlock(Block): _can_hold_na = False @@ -260,7 +266,8 @@ class ObjectBlock(Block): def should_store(self, value): return not issubclass(value.dtype.type, - (np.integer, np.floating, np.bool_)) + (np.integer, np.floating, np.complexfloating, + np.bool_)) class DatetimeBlock(IntBlock): _can_hold_na = True @@ -272,6 +279,8 @@ def make_block(values, items, ref_items, do_integrity_check=False): if issubclass(vtype, np.floating): klass = FloatBlock + elif issubclass(vtype, np.complexfloating): + klass = ComplexBlock elif issubclass(vtype, np.datetime64): klass = DatetimeBlock elif issubclass(vtype, np.integer): @@ -416,7 +425,7 @@ def is_consolidated(self): def get_numeric_data(self, copy=False): num_blocks = [b for b in self.blocks - if isinstance(b, (IntBlock, FloatBlock))] + if isinstance(b, (IntBlock, FloatBlock, ComplexBlock))] indexer = np.sort(np.concatenate([b.ref_locs for b in num_blocks])) new_items = self.items.take(indexer) @@ -1094,8 +1103,9 @@ def _interleaved_dtype(blocks): have_bool = counts[BoolBlock] > 0 have_object = counts[ObjectBlock] > 0 have_float = counts[FloatBlock] > 0 + have_complex = counts[ComplexBlock] > 0 have_dt64 = counts[DatetimeBlock] > 0 - have_numeric = have_float or have_int + have_numeric = have_float or have_complex or have_int if have_object: return np.object_ @@ -1103,10 +1113,12 @@ def _interleaved_dtype(blocks): return np.object_ elif have_bool: return np.bool_ - elif have_int and not have_float: + elif have_int and not have_float and not have_complex: return np.int64 - elif have_dt64 and not have_float: + elif have_dt64 and not have_float and not have_complex: return np.datetime64 + elif have_complex: + return np.complex64 else: return np.float64 diff --git a/pandas/src/tseries.pyx b/pandas/src/tseries.pyx index 609f0686dc50a..0156bafb7d78e 100644 --- a/pandas/src/tseries.pyx +++ b/pandas/src/tseries.pyx @@ -156,7 +156,7 @@ cdef double INF = np.inf cdef double NEGINF = -INF cpdef checknull(object val): - if util.is_float_object(val): + if util.is_float_object(val) or util.is_complex_object(val): return val != val or val == INF or val == NEGINF elif util.is_datetime64_object(val): return val.view('i8') == NaT diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 84a0589443249..976b4439fffdf 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -19,13 +19,17 @@ def assert_block_equal(left, right): def get_float_mat(n, k): return np.repeat(np.atleast_2d(np.arange(k, dtype=float)), n, axis=0) -TEST_COLS = ['a', 'b', 'c', 'd', 'e', 'f', 'g'] +TEST_COLS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] N = 10 def get_float_ex(cols=['a', 'c', 'e']): floats = get_float_mat(N, 3).T return make_block(floats, cols, TEST_COLS) +def get_complex_ex(cols=['h']): + complexes = (get_float_mat(N, 1).T * 1j).astype(np.complex64) + return make_block(complexes, cols, TEST_COLS) + def get_obj_ex(cols=['b', 'd']): mat = np.empty((N, 2), dtype=object) mat[:, 0] = 'foo' @@ -44,6 +48,7 @@ class TestBlock(unittest.TestCase): def setUp(self): self.fblock = get_float_ex() + self.cblock = get_complex_ex() self.oblock = get_obj_ex() self.bool_block = get_bool_ex() self.int_block = get_int_ex() @@ -60,6 +65,7 @@ def _check(blk): assert_block_equal(blk, unpickled) _check(self.fblock) + _check(self.cblock) _check(self.oblock) _check(self.bool_block) @@ -175,7 +181,8 @@ def setUp(self): self.blocks = [get_float_ex(), get_obj_ex(), get_bool_ex(), - get_int_ex()] + get_int_ex(), + get_complex_ex()] self.mgr = BlockManager.from_blocks(self.blocks, np.arange(N)) def test_constructor_corner(self): @@ -198,13 +205,13 @@ def test_is_indexed_like(self): self.assert_(not self.mgr._is_indexed_like(mgr2)) def test_block_id_vector_item_dtypes(self): - expected = [0, 1, 0, 1, 0, 2, 3] + expected = [0, 1, 0, 1, 0, 2, 3, 4] result = self.mgr.block_id_vector assert_almost_equal(expected, result) result = self.mgr.item_dtypes expected = ['float64', 'object', 'float64', 'object', 'float64', - 'bool', 'int64'] + 'bool', 'int64', 'complex64'] self.assert_(np.array_equal(result, expected)) def test_union_block_items(self): @@ -298,6 +305,7 @@ def test_consolidate_ordering_issues(self): self.mgr.set('d', randn(N)) self.mgr.set('b', randn(N)) self.mgr.set('g', randn(N)) + self.mgr.set('h', randn(N)) cons = self.mgr.consolidate() self.assertEquals(cons.nblocks, 1) From bae81555be2c16cb191b5c9f2b9f9f305eea53d5 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Sat, 21 Apr 2012 04:57:53 +0200 Subject: [PATCH 6/6] BUG: fix max recursion error in test_reindex_items It looks like sorting by dtype itself does not work. To see that, try this snippet: >>> from numpy import dtype >>> sorted([dtype('bool'), dtype('float64'), dtype('complex64'), ... dtype('float64'), dtype('object')]) [dtype('bool'), dtype('float64'), dtype('complex64'), dtype('float64'), dtype('object')] --- pandas/core/internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6ec614f284dfd..d7bdcd5942dd9 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1126,7 +1126,7 @@ def _consolidate(blocks, items): """ Merge blocks having same dtype """ - get_dtype = lambda x: x.dtype + get_dtype = lambda x: x.dtype.name # sort by dtype grouper = itertools.groupby(sorted(blocks, key=get_dtype),