Skip to content

Complex Dtype Support for Hashmap Algos #36482

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 44 commits into from
Sep 4, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
1c6b786
Merge master
alimcmaster1 Jan 3, 2020
42a46d7
Fix test failures ignore FutureWarning
alimcmaster1 Jan 4, 2020
8331d06
Filter warning correctly
alimcmaster1 Jan 4, 2020
3ba4169
Fix imports
alimcmaster1 Jan 4, 2020
8302589
Merge remote-tracking branch 'remotes/upstream/master' into lucaiones…
alimcmaster1 Jan 4, 2020
5068771
Add warning annotation
alimcmaster1 Jan 4, 2020
8d65aa7
Remove unrequired annotation
alimcmaster1 Jan 4, 2020
8b7ac7d
Merge remote-tracking branch 'remotes/upstream/master' into lucaiones…
alimcmaster1 Jan 4, 2020
45c8237
Merge remote-tracking branch 'upstream/master' into lucaionescu-mcmali
alimcmaster1 Jan 4, 2020
cb74fe3
Update docs
alimcmaster1 Jan 5, 2020
b29404e
Create deepsource.toml
alimcmaster1 Jan 16, 2020
f983f4f
Commit Complex handling
alimcmaster1 Sep 16, 2020
c2e4e82
run black
alimcmaster1 Sep 19, 2020
7c42495
Use pandas.testing
alimcmaster1 Sep 19, 2020
41b1faf
Use pandas.testing
alimcmaster1 Sep 19, 2020
da53f38
Clean ups
alimcmaster1 Sep 19, 2020
32262e7
Merge remote-tracking branch 'upstream/master' into mcmali-complex
alimcmaster1 Nov 26, 2020
f4932d9
Move test to sep files
alimcmaster1 Nov 26, 2020
328e242
Refactor Tests
alimcmaster1 Nov 28, 2020
8d5d517
Merge remote-tracking branch 'upstream/master' into mcmali-complex
alimcmaster1 Nov 28, 2020
38e0dc7
Merge remote-tracking branch 'origin/master' into mcmali-complex
alimcmaster1 Jan 12, 2021
2008239
Merge master
alimcmaster1 Jan 12, 2021
574be58
Complex 128 support
alimcmaster1 Jan 12, 2021
e0c3e44
Remove deepsource.toml
alimcmaster1 Jan 12, 2021
1b487b8
run black
alimcmaster1 Jan 12, 2021
b393a08
Fix tests
alimcmaster1 Jan 13, 2021
554090f
Merge remote-tracking branch 'upstream/master' into mcmali-complex
alimcmaster1 Jan 18, 2021
ab38ad9
Add ReadMe
alimcmaster1 Jan 18, 2021
a28c495
Add ReadMe
alimcmaster1 Aug 25, 2021
7a9f960
Merge Master
alimcmaster1 Aug 26, 2021
9e558ce
Merge remote-tracking branch 'upstream/master' into mcmali-complex
alimcmaster1 Aug 26, 2021
1d11a90
Add np complex64 and np.nan tests
alimcmaster1 Aug 26, 2021
737ea96
complex 64 and 128 testing
alimcmaster1 Aug 26, 2021
ae7674b
complex 64 and 128 testing
alimcmaster1 Aug 26, 2021
d1e00b7
Pep8
alimcmaster1 Aug 26, 2021
df28514
isort
alimcmaster1 Aug 26, 2021
6b4c10e
More tests
alimcmaster1 Aug 26, 2021
9afed5f
Add type info
alimcmaster1 Aug 27, 2021
96d5a58
Merge Master
alimcmaster1 Aug 31, 2021
e9a4ca2
Fix whatsnew
alimcmaster1 Aug 31, 2021
e882b4e
Merge remote-tracking branch 'upstream/master' into mcmali-complex
alimcmaster1 Sep 1, 2021
6bf72a0
Updates as per comments
alimcmaster1 Sep 1, 2021
e53417d
Fix tests
alimcmaster1 Sep 1, 2021
fdf45b1
Merge Master
alimcmaster1 Sep 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,8 @@ def _ensure_data(
elif is_float_dtype(values) or is_float_dtype(dtype):
return ensure_float64(values), np.dtype("float64")
elif is_complex_dtype(values) or is_complex_dtype(dtype):

# ignore the fact that we are casting to float
# which discards complex parts
with catch_warnings():
simplefilter("ignore", np.ComplexWarning)
values = ensure_float64(values)
return values, np.dtype("float64")
# Complex dtype is not supported coerce to object
return ensure_object(values), np.dtype("complex64")

except (TypeError, ValueError, OverflowError):
# if we are trying to coerce to a dtype
Expand Down
129 changes: 129 additions & 0 deletions pandas/tests/test_complex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import numpy as np
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any better locations for this test file?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you split the tests to the appropriate files: pandas/tests/series/methods/test_value_counts.py

for example

import pytest

import pandas as pd
from pandas import DataFrame, Index, Series
import pandas.testing as tm


class TestBasicComplexSupport:
@pytest.mark.parametrize(
"array,expected",
[
(
[1 + 1j, 0, 1, 1j, 1 + 2j],
Series([1, 1, 1, 1, 1], index=[1 + 2j, 1 + 1j, 1j, 1, 0]),
),
(
[1 + 2j, 0, 1j, 1, 1j, 1 + 1j],
# index is sorted by value counts in descending order by default
Series([2, 1, 1, 1, 1], index=[1j, 1 + 2j, 1 + 1j, 1, 0]),
),
],
)
def test_value_counts(self, array, expected):
result = pd.value_counts(array)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"array,expected",
[
(
[1 + 1j, 0, 1, 1j, 1 + 2j, 1 + 2j],
np.array([(1 + 1j), 0j, (1 + 0j), 1j, (1 + 2j)]),
)
],
)
def test_unique(self, array, expected):
result = pd.unique(array)
np.testing.assert_array_equal(result, expected)

@pytest.mark.parametrize(
"array,expected",
[
(
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
Series([False, False, True, False, False, False, True], dtype=bool),
)
],
)
def test_duplicated(self, array, expected):
result = Series(array, dtype=np.complex64).duplicated()
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"array,expected",
[
(
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
Series([False, True, True, False, True, True, True], dtype=bool),
)
],
)
def test_isin(self, array, expected):
result = Series(array).isin([1j, 1 + 1j, 1 + 2j])
tm.assert_series_equal(result, expected)

def test_factorize(self):
array = [1, 2, 2 + 1j]
labels, uniques = pd.factorize(array)

expected_labels = np.array([0, 1, 2], dtype=np.intp)
np.testing.assert_array_equal(labels, expected_labels)

expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=np.complex64)
np.testing.assert_array_equal(uniques, expected_uniques)

@pytest.mark.parametrize(
"frame,expected",
[
(
DataFrame([{"a": 1, "b": 1 + 1j}, {"a": 1, "b": 1 + 2j}]),
DataFrame(
np.array([1, 1], dtype=np.int64),
index=Index([(1 + 1j), (1 + 2j)], dtype="object", name="b"),
columns=Index(["a"], dtype="object"),
),
)
],
)
def test_groupby(self, frame, expected):
result = frame.groupby("b", sort=False).count()
tm.assert_frame_equal(result, expected)

# sorting of the index should fail since complex numbers are unordered
with pytest.raises(
TypeError,
match="'<' not supported between instances of 'complex' and 'complex'",
):
frame.groupby("b", sort=True).count()

@pytest.mark.parametrize(
"array,expected",
[
([0, 1j, 1, 1, 1 + 1j, 1 + 2j], Series([1], dtype=np.complex128)),
([1 + 1j, 2j, 1 + 1j], Series([1 + 1j], dtype=np.complex128)),
],
)
def test_unimode(self, array, expected):
result = Series(array).mode()
tm.assert_series_equal(result, expected)

# mode tries to sort multimodal series.
# A warning will be raised since complex numbers
# are not ordered.
@pytest.mark.parametrize(
"array,expected",
[
(
# no modes
[0, 1j, 1, 1 + 1j, 1 + 2j],
Series([0, 1, 1j, 1 + 1j, 1 + 2j], dtype=np.complex128),
),
([1 + 1j, 2j, 1 + 1j, 2j, 3], Series([1 + 1j, 2j], dtype=np.complex128)),
],
)
def test_multimode(self, array, expected):
with pytest.warns(UserWarning):
result = Series(array).mode()
tm.assert_series_equal(result, expected)