Skip to content

Better treatment for complex numbers #1098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ def should_store(self, value):
# unnecessarily
return issubclass(value.dtype.type, np.floating)

class ComplexBlock(Block):
_can_hold_na = True

def should_store(self, value):
return issubclass(value.dtype.type, np.complexfloating)

class IntBlock(Block):
_can_hold_na = False

Expand All @@ -260,7 +266,8 @@ class ObjectBlock(Block):

def should_store(self, value):
return not issubclass(value.dtype.type,
(np.integer, np.floating, np.bool_))
(np.integer, np.floating, np.complexfloating,
np.bool_))

class DatetimeBlock(IntBlock):
_can_hold_na = True
Expand All @@ -272,6 +279,8 @@ def make_block(values, items, ref_items, do_integrity_check=False):

if issubclass(vtype, np.floating):
klass = FloatBlock
elif issubclass(vtype, np.complexfloating):
klass = ComplexBlock
elif issubclass(vtype, np.datetime64):
klass = DatetimeBlock
elif issubclass(vtype, np.integer):
Expand Down Expand Up @@ -416,7 +425,7 @@ def is_consolidated(self):

def get_numeric_data(self, copy=False):
num_blocks = [b for b in self.blocks
if isinstance(b, (IntBlock, FloatBlock))]
if isinstance(b, (IntBlock, FloatBlock, ComplexBlock))]

indexer = np.sort(np.concatenate([b.ref_locs for b in num_blocks]))
new_items = self.items.take(indexer)
Expand Down Expand Up @@ -980,13 +989,16 @@ def form_blocks(data, axes):
# put "leftover" items in float bucket, where else?
# generalize?
float_dict = {}
complex_dict = {}
int_dict = {}
bool_dict = {}
object_dict = {}
datetime_dict = {}
for k, v in data.iteritems():
if issubclass(v.dtype.type, np.floating):
float_dict[k] = v
elif issubclass(v.dtype.type, np.complexfloating):
complex_dict[k] = v
elif issubclass(v.dtype.type, np.datetime64):
datetime_dict[k] = v
elif issubclass(v.dtype.type, np.integer):
Expand All @@ -1001,6 +1013,10 @@ def form_blocks(data, axes):
float_block = _simple_blockify(float_dict, items, np.float64)
blocks.append(float_block)

if len(complex_dict):
complex_block = _simple_blockify(complex_dict, items, np.complex64)
blocks.append(complex_block)

if len(int_dict):
int_block = _simple_blockify(int_dict, items, np.int64)
blocks.append(int_block)
Expand Down Expand Up @@ -1087,27 +1103,30 @@ def _interleaved_dtype(blocks):
have_bool = counts[BoolBlock] > 0
have_object = counts[ObjectBlock] > 0
have_float = counts[FloatBlock] > 0
have_complex = counts[ComplexBlock] > 0
have_dt64 = counts[DatetimeBlock] > 0
have_numeric = have_float or have_int
have_numeric = have_float or have_complex or have_int

if have_object:
return np.object_
elif have_bool and have_numeric:
return np.object_
elif have_bool:
return np.bool_
elif have_int and not have_float:
elif have_int and not have_float and not have_complex:
return np.int64
elif have_dt64 and not have_float:
elif have_dt64 and not have_float and not have_complex:
return np.datetime64
elif have_complex:
return np.complex64
else:
return np.float64

def _consolidate(blocks, items):
"""
Merge blocks having same dtype
"""
get_dtype = lambda x: x.dtype
get_dtype = lambda x: x.dtype.name

# sort by dtype
grouper = itertools.groupby(sorted(blocks, key=get_dtype),
Expand Down
46 changes: 36 additions & 10 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -234,31 +234,37 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
cdef:
Py_ssize_t i, n
ndarray[float64_t] floats
ndarray[complex64_t] complexes
ndarray[int64_t] ints
bint seen_float = 0
bint seen_complex = 0
object val
float64_t fval

n = len(values)

floats = np.empty(n, dtype='f8')
complexes = np.empty(n, dtype='c8')
ints = np.empty(n, dtype='i8')

for i from 0 <= i < n:
val = values[i]

if util.is_float_object(val):
floats[i] = val
floats[i] = complexes[i] = val
seen_float = 1
elif val in na_values:
floats[i] = nan
floats[i] = complexes[i] = nan
seen_float = 1
elif val is None:
floats[i] = nan
floats[i] = complexes[i] = nan
seen_float = 1
elif len(val) == 0:
floats[i] = nan
floats[i] = complexes[i] = nan
seen_float = 1
elif util.is_complex_object(val):
complexes[i] = val
seen_complex = 1
else:
fval = util.floatify(val)
floats[i] = fval
Expand All @@ -268,7 +274,9 @@ def maybe_convert_numeric(ndarray[object] values, set na_values):
else:
ints[i] = <int64_t> fval

if seen_float:
if seen_complex:
return complexes
elif seen_float:
return floats
else:
return ints
Expand All @@ -281,9 +289,11 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
cdef:
Py_ssize_t i, n
ndarray[float64_t] floats
ndarray[complex64_t] complexes
ndarray[int64_t] ints
ndarray[uint8_t] bools
bint seen_float = 0
bint seen_complex = 0
bint seen_int = 0
bint seen_bool = 0
bint seen_object = 0
Expand All @@ -294,6 +304,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
n = len(objects)

floats = np.empty(n, dtype='f8')
complexes = np.empty(n, dtype='c8')
ints = np.empty(n, dtype='i8')
bools = np.empty(n, dtype=np.uint8)

Expand All @@ -305,7 +316,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,

if val is None:
seen_null = 1
floats[i] = fnan
floats[i] = complexes[i] = fnan
elif util.is_bool_object(val):
seen_bool = 1
bools[i] = val
Expand All @@ -316,15 +327,20 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
elif util.is_integer_object(val):
seen_int = 1
floats[i] = <float64_t> val
complexes[i] = <double complex> val
if not seen_null:
ints[i] = val
elif util.is_float_object(val):
floats[i] = val
floats[i] = complexes[i] = val
seen_float = 1
elif util.is_complex_object(val):
complexes[i] = val
seen_complex = 1
elif try_float and not util.is_string_object(val):
# this will convert Decimal objects
try:
floats[i] = float(val)
complexes[i] = complex(val)
seen_float = 1
except Exception:
seen_object = 1
Expand All @@ -334,14 +350,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
if not safe:
if seen_null:
if (seen_float or seen_int) and not seen_object:
return floats
if seen_complex:
return complexes
else:
return floats
else:
return objects
else:
if seen_object:
return objects
elif not seen_bool:
if seen_float:
if seen_complex:
return complexes
elif seen_float:
return floats
elif seen_int:
return ints
Expand All @@ -354,7 +375,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
# don't cast int to float, etc.
if seen_null:
if (seen_float or seen_int) and not seen_object:
return floats
if seen_complex:
return complexes
else:
return floats
else:
return objects
else:
Expand All @@ -363,6 +387,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
elif not seen_bool:
if seen_int and seen_float:
return objects
elif seen_complex:
return complexes
elif seen_float:
return floats
elif seen_int:
Expand Down
4 changes: 4 additions & 0 deletions pandas/src/numpy_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ PANDAS_INLINE int
is_float_object(PyObject* obj) {
return (PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating));
}
PANDAS_INLINE int
is_complex_object(PyObject* obj) {
return (PyComplex_Check(obj) || PyArray_IsScalar(obj, ComplexFloating));
}

PANDAS_INLINE int
is_bool_object(PyObject* obj) {
Expand Down
2 changes: 1 addition & 1 deletion pandas/src/tseries.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ cdef double INF = <double> np.inf
cdef double NEGINF = -INF

cpdef checknull(object val):
if util.is_float_object(val):
if util.is_float_object(val) or util.is_complex_object(val):
return val != val or val == INF or val == NEGINF
elif util.is_datetime64_object(val):
return val.view('i8') == NaT
Expand Down
1 change: 1 addition & 0 deletions pandas/src/util.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ cimport numpy as cnp
cdef extern from "numpy_helper.h":
inline int is_integer_object(object)
inline int is_float_object(object)
inline int is_complex_object(object)
inline int is_bool_object(object)
inline int is_string_object(object)
inline int is_datetime64_object(object)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,12 +1510,13 @@ def test_constructor_corner(self):

def test_constructor_scalar_inference(self):
data = {'int' : 1, 'bool' : True,
'float' : 3., 'object' : 'foo'}
'float' : 3., 'complex': 4j, 'object' : 'foo'}
df = DataFrame(data, index=np.arange(10))

self.assert_(df['int'].dtype == np.int64)
self.assert_(df['bool'].dtype == np.bool_)
self.assert_(df['float'].dtype == np.float64)
self.assert_(df['complex'].dtype == np.complex64)
self.assert_(df['object'].dtype == np.object_)

def test_constructor_DataFrame(self):
Expand Down
16 changes: 12 additions & 4 deletions pandas/tests/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@ def assert_block_equal(left, right):
def get_float_mat(n, k):
return np.repeat(np.atleast_2d(np.arange(k, dtype=float)), n, axis=0)

TEST_COLS = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
TEST_COLS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
N = 10

def get_float_ex(cols=['a', 'c', 'e']):
floats = get_float_mat(N, 3).T
return make_block(floats, cols, TEST_COLS)

def get_complex_ex(cols=['h']):
complexes = (get_float_mat(N, 1).T * 1j).astype(np.complex64)
return make_block(complexes, cols, TEST_COLS)

def get_obj_ex(cols=['b', 'd']):
mat = np.empty((N, 2), dtype=object)
mat[:, 0] = 'foo'
Expand All @@ -44,6 +48,7 @@ class TestBlock(unittest.TestCase):

def setUp(self):
self.fblock = get_float_ex()
self.cblock = get_complex_ex()
self.oblock = get_obj_ex()
self.bool_block = get_bool_ex()
self.int_block = get_int_ex()
Expand All @@ -60,6 +65,7 @@ def _check(blk):
assert_block_equal(blk, unpickled)

_check(self.fblock)
_check(self.cblock)
_check(self.oblock)
_check(self.bool_block)

Expand Down Expand Up @@ -175,7 +181,8 @@ def setUp(self):
self.blocks = [get_float_ex(),
get_obj_ex(),
get_bool_ex(),
get_int_ex()]
get_int_ex(),
get_complex_ex()]
self.mgr = BlockManager.from_blocks(self.blocks, np.arange(N))

def test_constructor_corner(self):
Expand All @@ -198,13 +205,13 @@ def test_is_indexed_like(self):
self.assert_(not self.mgr._is_indexed_like(mgr2))

def test_block_id_vector_item_dtypes(self):
expected = [0, 1, 0, 1, 0, 2, 3]
expected = [0, 1, 0, 1, 0, 2, 3, 4]
result = self.mgr.block_id_vector
assert_almost_equal(expected, result)

result = self.mgr.item_dtypes
expected = ['float64', 'object', 'float64', 'object', 'float64',
'bool', 'int64']
'bool', 'int64', 'complex64']
self.assert_(np.array_equal(result, expected))

def test_union_block_items(self):
Expand Down Expand Up @@ -298,6 +305,7 @@ def test_consolidate_ordering_issues(self):
self.mgr.set('d', randn(N))
self.mgr.set('b', randn(N))
self.mgr.set('g', randn(N))
self.mgr.set('h', randn(N))

cons = self.mgr.consolidate()
self.assertEquals(cons.nblocks, 1)
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_tseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,13 @@ def test_convert_objects_ints():
result = lib.maybe_convert_objects(arr)
assert(issubclass(result.dtype.type, np.integer))

def test_convert_objects_complex_number():
for dtype in np.sctypes['complex']:
arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O')
assert(arr[0].dtype == np.dtype(dtype))
result = lib.maybe_convert_objects(arr)
assert(issubclass(result.dtype.type, np.complexfloating))

def test_rank():
from scipy.stats import rankdata
from numpy import nan
Expand Down