Skip to content

[WIP] Quick fix to provide complex data type support for hashmap based algorithms #27599

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
10 changes: 2 additions & 8 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""
from textwrap import dedent
from typing import Dict
from warnings import catch_warnings, simplefilter, warn
from warnings import warn

import numpy as np

Expand Down Expand Up @@ -101,13 +101,7 @@ def _ensure_data(values, dtype=None):
elif is_object_dtype(values) and dtype is None:
return ensure_object(np.asarray(values)), "object", "object"
elif is_complex_dtype(values) or is_complex_dtype(dtype):

# ignore the fact that we are casting to float
# which discards complex parts
with catch_warnings():
simplefilter("ignore", np.ComplexWarning)
values = ensure_float64(values)
return values, "float64", "float64"
raise TypeError("Complex data types not supported...Coercing to object")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to do the object-coersion here than fallback to the except branch. clearer


except (TypeError, ValueError, OverflowError):
# if we are trying to coerce to a dtype
Expand Down
125 changes: 125 additions & 0 deletions pandas/tests/test_complex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Index, Series
import pandas.core.algorithms as algos
import pandas.util.testing as tm


class TestBasicComplexSupport:
@pytest.mark.parametrize(
"array,expected",
[
(
[1 + 1j, 0, 1, 1j, 1 + 2j],
Series([1, 1, 1, 1, 1], index=[1 + 2j, 1 + 1j, 1j, 1, 0]),
),
(
[1 + 2j, 0, 1j, 1, 1j, 1 + 1j],
# index is sorted by value counts in descending order by default
Series([2, 1, 1, 1, 1], index=[1j, 1 + 2j, 1 + 1j, 1, 0]),
),
],
)
def test_value_counts(self, array, expected):
result = algos.value_counts(array)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"array,expected",
[
(
[1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)], dtype=object),
)
],
)
def test_unique(self, array, expected):
result = algos.unique(array)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize(
"array,expected",
[
(
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
Series([False, False, True, False, False, False, True], dtype=bool),
)
],
)
def test_duplicated(self, array, expected):
result = Series(array).duplicated()
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"array,expected",
[
(
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
Series([False, True, True, False, True, True, True], dtype=bool),
)
],
)
def test_isin(self, array, expected):
result = Series(array).isin([1j, 1 + 1j, 1 + 2j])
tm.assert_series_equal(result, expected)

def test_factorize(self):
array = [1, 2, 2 + 1j]
labels, uniques = pd.factorize(array)
expected = np.array([0, 1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(labels, expected)
expected = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object)
tm.assert_numpy_array_equal(uniques, expected)

@pytest.mark.parametrize(
"frame,expected",
[
(
DataFrame([dict(a=1, b=1 + 1j), dict(a=1, b=1 + 2j)]),
DataFrame(
np.array([1, 1], dtype=np.int64),
index=Index([(1 + 1j), (1 + 2j)], dtype="object", name="b"),
columns=Index(["a"], dtype="object"),
),
)
],
)
def test_groupby(self, frame, expected):
result = frame.groupby("b", sort=False).count()
tm.assert_frame_equal(result, expected)

# sorting of the index should fail since complex numbers are unordered
with pytest.raises(TypeError):
frame.groupby("b", sort=True).count()

@pytest.mark.parametrize(
"array,expected",
[
([0, 1j, 1, 1, 1 + 1j, 1 + 2j], Series([1], dtype=np.complex128)),
([1 + 1j, 2j, 1 + 1j], Series([1 + 1j], dtype=np.complex128)),
],
)
def test_unimode(self, array, expected):
result = Series(array).mode()
tm.assert_series_equal(result, expected)

# mode tries to sort multimodal series.
# A warning will be raised since complex numbers
# are not ordered.
@pytest.mark.parametrize(
"array,expected",
[
(
# no modes
[0, 1j, 1, 1 + 1j, 1 + 2j],
Series([0, 1, 1j, 1 + 1j, 1 + 2j], dtype=np.complex128),
),
([1 + 1j, 2j, 1 + 1j, 2j, 3], Series([1 + 1j, 2j], dtype=np.complex128)),
],
)
def test_multimode(self, array, expected):
with tm.assert_produces_warning(UserWarning):
result = Series(array).mode()
tm.assert_series_equal(result, expected)